diff --git a/backends/tofino/CMakeLists.txt b/backends/tofino/CMakeLists.txt index 8ceaf2e1eb2..64d52bf83f1 100644 --- a/backends/tofino/CMakeLists.txt +++ b/backends/tofino/CMakeLists.txt @@ -23,9 +23,6 @@ if (CMAKE_BUILD_TYPE STREQUAL Release OR CMAKE_BUILD_TYPE STREQUAL RelWithDebInf add_definitions("-DRELEASE_BUILD=1") endif() -# JBay is always enabled, the preprocessor guard is deprecated -add_definitions("-DHAVE_JBAY=1") - list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") if (ENABLE_STATIC_LIBS) @@ -85,8 +82,7 @@ else() set (BFN_P4C_GIT_SHA $ENV{BFN_P4C_GIT_SHA}) endif() endif() -set (ENV{P4C_VERSION} "${BFN_P4C_VERSION} (SHA: ${BFN_P4C_GIT_SHA})") -MESSAGE(STATUS "p4c-barefoot version: $ENV{P4C_VERSION}") +MESSAGE(STATUS "p4c-barefoot version: ${BFN_P4C_VERSION}") # Generate the sha specific version file. It includes the GIT SHA. # Because this version changes frequently, we include it separately from the normal version files. @@ -229,6 +225,7 @@ set (BF_P4C_IR_SRCS bf-p4c/parde/match_register.cpp bf-p4c/parde/clot/clot.cpp bf-p4c/phv/phv.cpp + # FIXME: This should be a library. bf-utils/dynamic_hash/dynamic_hash.cpp bf-utils/dynamic_hash/bfn_hash_algorithm.cpp ) @@ -240,3 +237,6 @@ endforeach() set(EXTENSION_IR_SOURCES ${EXTENSION_IR_SOURCES} ${QUAL_BF_P4C_IR_SRCS} PARENT_SCOPE) add_subdirectory(bf-p4c) + +# Initialize bf-asm after bf-p4c. +add_subdirectory(bf-asm) diff --git a/backends/tofino/LICENSE b/backends/tofino/LICENSE index a24a1c32224..bc47beb02fe 100644 --- a/backends/tofino/LICENSE +++ b/backends/tofino/LICENSE @@ -1,4 +1,4 @@ -Copyright (C) 2024 Intel Corporation +Copyright (C) 2025 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy diff --git a/backends/tofino/bf-asm/.gdbinit b/backends/tofino/bf-asm/.gdbinit new file mode 100644 index 00000000000..90702882411 --- /dev/null +++ b/backends/tofino/bf-asm/.gdbinit @@ -0,0 +1,402 @@ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 + +# vim: ft=python +set print object +set unwindonsignal on +set unwind-on-terminating-exception on + +if $_isvoid($bpnum) + break __assert_fail + break error + break bug +end + +define d + call ::dump($arg0) +end + + +python +def template_split(s): + parts = [] + bracket_level = 0 + current = [] + for c in (s): + if c == "," and bracket_level == 1: + parts.append("".join(current)) + current = [] + else: + if c == '>': + bracket_level -= 1 + if bracket_level > 0: + current.append(c) + if c == '<': + bracket_level += 1 + parts.append("".join(current)) + return parts + +def vec_begin(vec): + return vec['_M_impl']['_M_start'] +def vec_end(vec): + return vec['_M_impl']['_M_finish'] +def vec_size(vec): + return int(vec_end(vec) - vec_begin(vec)) +def vec_at(vec, i): + return (vec_begin(vec) + i).dereference() + +class bitvecPrinter(object): + "Print a bitvec" + def __init__(self, val): + self.val = val + def to_string(self): + data = self.val['data'] + rv = "" + size = self.val['size'] + ptr = self.val['ptr'] + unitsize = ptr.type.target().sizeof * 8 + while size > 1: + data = ptr.dereference() + i = 0 + while i < unitsize: + if (rv.__len__() % 120 == 119): rv += ':' + elif (rv.__len__() % 30 == 29): rv += ' ' + elif (rv.__len__() % 6 == 5): rv += '_' + if (data & 1) == 0: + rv += "0" + else: + rv += "1" + data >>= 1 + i += 1 + ptr += 1 + size -= 1 + data = ptr.dereference() + while rv == "" or data > 0: + if (rv.__len__() % 120 == 119): rv += ':' + elif (rv.__len__() % 30 == 29): rv += ' ' + elif (rv.__len__() % 6 == 5): rv += '_' + if (data & 1) == 0: + rv += "0" + else: + rv += "1" + data >>= 1 + return rv +class value_t_Printer(object): + "Print a value_t" + def __init__(self, val): + self.val = val + def to_string(self): + typ = self.val['type'] + if typ == 0: # tINT + return str(self.val['i']) + elif typ == 1: # tBIGINT + v = self.val['bigi'] + data = v['data'] + size = v['size'] + val = 0 + while size > 0: + val <<= 64 + val += data.dereference() + size -= 1 + data += 1 + return str(val) + elif typ == 2: # tRANGE + return str(self.val['lo']) + '..' + str(self.val['hi']) + elif typ == 3: # tSTR + return self.val['s'] + elif typ == 4: # tMATCH + return self.val['m'] + elif typ == 5: # tBIGMATCH + return self.val['bigm'] + elif typ == 6: # tVEC + return "vector of %d elements" % self.val['vec']['size'] + elif typ == 7: # tMAP + return "map of %d elements" % self.val['map']['size'] + elif typ == 8: # tCMD + cmd = self.val['vec']['data'] + count = self.val['vec']['size'] + rv = str(cmd.dereference()) + rv += "(" + while count > 1: + count -= 1 + cmd += 1 + rv += str(cmd.dereference()) + if count > 1: + rv += ", " + rv += ")" + return rv; + else: + return "" + class _vec_iter: + def __init__(self, data, size): + self.data = data + self.size = size + self.counter = -1 + def __iter__(self): + return self + def __next__(self): + self.counter += 1 + if self.counter >= self.size: + raise StopIteration + item = self.data.dereference() + self.data += 1 + return ("[%d]" % self.counter, item) + def next(self): return self.__next__() + class _map_iter: + def __init__(self, data, size): + self.data = data + self.size = size + def __iter__(self): + return self + def __next__(self): + self.size -= 1 + if self.size < 0: + raise StopIteration + item = self.data.dereference() + self.data += 1 + return ("[" + str(item['key']) + "]", item['value']) + def next(self): return self.__next__() + + class _not_iter: + def __init__(self): + pass + def __iter__(self): + return self + def __next__(self): + raise StopIteration + def next(self): return self.__next__() + def children(self): + typ = self.val['type'] + if typ == 6: + vec = self.val['vec'] + return self._vec_iter(vec['data'], vec['size']) + elif typ == 7: + map = self.val['map'] + return self._map_iter(map['data'], map['size']) + else: + return self._not_iter() +class value_t_VECTOR_Printer(object): + "Print a VECTOR(value_t)" + def __init__(self, val): + self.val = val + def to_string(self): + return "vector of %d elements" % self.val['size'] + class _iter: + def __init__(self, data, size): + self.data = data + self.size = size + self.counter = -1 + def __iter__(self): + return self + def __next__(self): + self.counter += 1 + if self.counter >= self.size: + raise StopIteration + item = self.data.dereference() + self.data += 1 + return ("[%d]" % self.counter, item) + def next(self): return self.__next__() + def children(self): + return self._iter(self.val['data'], self.val['size']) +class pair_t_VECTOR_Printer(object): + "Print a VECTOR(pair_t)" + def __init__(self, val): + self.val = val + def to_string(self): + return "map of %d elements" % self.val['size'] + class _iter: + def __init__(self, data, size): + self.data = data + self.size = size + def __iter__(self): + return self + def __next__(self): + self.size -= 1 + if self.size < 0: + raise StopIteration + item = self.data.dereference() + self.data += 1 + return ("[" + str(item['key']) + "]", item['value']) + def next(self): return self.__next__() + def children(self): + return self._iter(self.val['data'], self.val['size']) +class ordered_map_Printer: + "Print an ordered_map<>" + def __init__(self, val): + self.val = val + self.args = template_split(val.type.tag) + self.eltype = gdb.lookup_type('std::pair<' + self.args[0] + ' const,' + self.args[1] + '>') + def to_string(self): + it = self.val['data']['_M_impl']['_M_node']['_M_next'] + e = self.val['data']['_M_impl']['_M_node'].address + if it == e: # empty map + return "{}" + else: + return None + class _iter: + def __init__(self, eltype, it, e): + self.eltype = eltype + self.it = it + self.e = e + def __iter__(self): + return self + def __next__(self): + if self.it == self.e: + raise StopIteration + el = (self.it + 1).cast(self.eltype.pointer()).dereference() + self.it = self.it.dereference()['_M_next'] + return ("[" + str(el['first']) + "]", el['second']); + def next(self): return self.__next__() + def children(self): + return self._iter(self.eltype, self.val['data']['_M_impl']['_M_node']['_M_next'], + self.val['data']['_M_impl']['_M_node'].address) +class InputXbar_Group_Printer: + "Print an InputXbar::Group" + def __init__(self, val): + self.val = val + def to_string(self): + types = [ 'invalid', 'exact', 'ternary', 'byte', 'gateway', 'xcmp' ] + t = int(self.val['type']) + if t >= 0 and t < len(types): + rv = types[t] + else: + rv = '' % int(self.val['type']) + rv += ' group ' + str(self.val['index']) + return rv +class ActionBusSource_Printer: + "Print an ActionBusSource" + def __init__(self, val): + self.val = val + def to_string(self): + try: + types = [ "None", "Field", "HashDist", "HashDistPair", "RandomGen", + "TableOutput", "TableColor", "TableAddress", "Ealu", "XCmp", + "NameRef", "ColorRef", "AddressRef" ] + t = int(self.val['type']) + if t >= 0 and t < len(types): + rv = types[t] + else: + rv = '' % int(self.val['type']) + if t == 9: # XCMP on one line without children + rv += "[" + str(self.val['xcmp_group']) + ":" + str(self.val['xcmp_byte']) + "]" + except Exception as e: + rv += "{crash: "+str(e)+"}" + return rv + class _iter: + def __init__(self, val, type): + self.val = val + self.type = type + self.count = 0 + def __iter__(self): + return self + def __next__(self): + self.count = self.count + 1 + if self.type == 3: + if self.count == 1: + return ("hd1", self.val['hd1']) + elif self.count == 2: + return ("hd2", self.val['hd2']) + else: + raise StopIteration + #elif self.type == 9: + # XCmp on one line without children + # if self.count == 1: + # return ("group", self.val['xcmp_group']) + # elif self.count == 2: + # return ("byte", self.val['xcmp_byte']) + elif self.count > 1: + raise StopIteration + elif self.type == 1: + return ("field", self.val['field'].dereference()) + elif self.type == 2: + return ("hd", self.val['hd']) + elif self.type == 4: + return ("rng", self.val['rng']) + elif self.type == 5 or self.type == 6 or self.type == 7: + return ("table", self.val['table']) + elif self.type == 10 or self.type == 11 or self.type == 12: + return ("name_ref", self.val['name_ref']) + raise StopIteration + def next(self): return self.__next__() + def children(self): + return self._iter(self.val, int(self.val['type'])) + +class PhvRef_Printer: + "Print a Phv::Ref" + def __init__(self, val): + self.val = val + def to_string(self): + threads = [ "ig::", "eg::", "gh::" ] + rv = threads[self.val['gress_']] + str(self.val['name_']) + if self.val['lo'] >= 0: + rv += '(' + str(self.val['lo']) + if self.val['hi'] >= 0: + rv += '..' + str(self.val['hi']) + rv += ')' + return rv + +class Mem_Printer: + "Print a MemUnit or subclass" + def __init__(self, val, big, small): + self.val = val + self.big = big + self.small = small + def to_string(self): + if self.val['stage'] > -32768: + return "%s(%d,%d,%d)" % (self.big, self.val['stage'], self.val['row'], self.val['col']) + if self.val['row'] >= 0: + return "%s(%d,%d)" % (self.big, self.val['row'], self.val['col']) + return "%s(%d)" % (self.small, self.val['col']) + +def bfas_pp(val): + if val.type.tag == 'bitvec': + return bitvecPrinter(val) + if val.type.tag == 'value_t': + return value_t_Printer(val) + if val.type.tag == 'value_t_VECTOR': + return value_t_VECTOR_Printer(val) + if val.type.tag == 'pair_t_VECTOR': + return pair_t_VECTOR_Printer(val) + if str(val.type.tag).startswith('ordered_map<'): + return ordered_map_Printer(val) + if val.type.tag == 'InputXbar::Group': + return InputXbar_Group_Printer(val) + if val.type.tag == 'ActionBusSource': + return ActionBusSource_Printer(val) + if val.type.tag == 'Phv::Ref': + return PhvRef_Printer(val) + if val.type.tag == 'SRamMatchTable::Ram': + return Mem_Printer(val, 'Ram', 'Lamb') + if val.type.tag == 'MemUnit': + return Mem_Printer(val, 'Mem', 'Mem') + return None + +try: + found = False + for i in range(len(gdb.pretty_printers)): + try: + if gdb.pretty_printers[i].__name__ == "bfas_pp": + gdb.pretty_printers[i] = bfas_pp + found = True + except: + pass + if not found: + gdb.pretty_printers.append(bfas_pp) +except: + pass + +end diff --git a/backends/tofino/bf-asm/.gitignore b/backends/tofino/bf-asm/.gitignore new file mode 100644 index 00000000000..0651a09e2dc --- /dev/null +++ b/backends/tofino/bf-asm/.gitignore @@ -0,0 +1,27 @@ +Makefile.in +aclocal.m4 +autom4te.cache +build +compile +configure +depcomp +install-sh +missing +ylwrap +*.o +*.d +*.out +*.tofino +*.pyc +gen +templates +asm-parse.c +lex-yaml.c +json2cpp +json_diff +mksizes +reflow +tags +tfas +y.output +faillog.txt diff --git a/backends/tofino/bf-asm/CMakeLists.txt b/backends/tofino/bf-asm/CMakeLists.txt new file mode 100644 index 00000000000..5afc518db18 --- /dev/null +++ b/backends/tofino/bf-asm/CMakeLists.txt @@ -0,0 +1,310 @@ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 + +# # # #### Tofino assembler +project(BFASM) + +MESSAGE("-- Adding bf-asm") + +OPTION(ASAN_ENABLED "Enable ASAN checks" OFF) + +set (BFASM_LIB_DEPS p4ctoolkit ${P4C_LIB_DEPS}) +set (BFASM_GEN_DIR ${BFASM_BINARY_DIR}/gen) + +# other required libraries +include (CheckLibraryExists) +# check includes +include (CheckIncludeFile) +check_include_file (execinfo.h HAVE_EXECINFO_H) +check_include_file (ucontext.h HAVE_UCONTEXT_H) + +set(CMAKE_CXX_FLAGS "") # clear CXX_FLAGS +# TODO: Fix build warnings with -Wall and enable it. +# add_cxx_compiler_option ("-Wall") +# add_cxx_compiler_option ("-Wextra") +# add_cxx_compiler_option ("-Wno-unused") +# add_cxx_compiler_option ("-Wno-unused-parameter") +# add_cxx_compiler_option ("-Wno-pragmas") +# add_cxx_compiler_option ("-Wno-unknown-pragmas") +add_cxx_compiler_option ("-Wno-overloaded-virtual") +add_cxx_compiler_option ("-Wno-deprecated") +if (${CMAKE_SYSTEM_PROCESSOR} MATCHES i386|i586|i686) + # on 32-bit platforms we get a lot of warnings when using the error macros + add_cxx_compiler_option("-Wno-write-strings") +endif() +if (ENABLE_BAREFOOT_INTERNAL) + add_definitions("-DBAREFOOT_INTERNAL=1") +endif() + +message(STATUS "P4C ${P4C_SOURCE_DIR}") +macro(get_schema_version schema_file schema_var) + execute_process( + COMMAND python3 -c "from ${schema_file} import get_schema_version;print(get_schema_version(), end='', flush=True)" + OUTPUT_VARIABLE __schema_version + RESULT_VARIABLE __schema_errcode + ERROR_VARIABLE __schema_errstr + WORKING_DIRECTORY ${BFN_P4C_SOURCE_DIR}/compiler_interfaces/schemas) + if (${__schema_errcode}) + MESSAGE(FATAL_ERROR "Error retrieving ${schema_file} version ${__schema_errstr}") + endif() + set(${schema_var} ${__schema_version}) +endmacro(get_schema_version) +# Now force cmake to rerun if any of the files that we depend on versions for +# change: context and manifest for now +# We generate a pair of dummy dependency files will be ignored +set(SCHEMA_FILES + ${BFN_P4C_SOURCE_DIR}/compiler_interfaces/schemas/context_schema.py + ${BFN_P4C_SOURCE_DIR}/compiler_interfaces/schemas/manifest_schema.py + ${BFN_P4C_SOURCE_DIR}/compiler_interfaces/schemas/phv_schema.py + ${BFN_P4C_SOURCE_DIR}/compiler_interfaces/schemas/power_schema.py + ${BFN_P4C_SOURCE_DIR}/compiler_interfaces/schemas/resources_schema.py + ) +foreach (f ${SCHEMA_FILES}) + configure_file(${f} ${CMAKE_BINARY_DIR}/${f}.dep) +endforeach() + +get_schema_version(context_schema CONTEXT_SCHEMA_VERSION) +MESSAGE(STATUS "Found context schema version ${CONTEXT_SCHEMA_VERSION}") +add_definitions("-DCONTEXT_SCHEMA_VERSION=\"${CONTEXT_SCHEMA_VERSION}\"") + +# ASAN CHECKS +if (ASAN_ENABLED) + # force this set of flags only + set (CMAKE_CXX_FLAGS "-fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer -fno-optimize-sibling-calls -g -O1") +endif() + +# json_diff +set (JSONDIFF_SOURCES json_diff.cpp json.cpp fdstream.cpp) + +# bfdumpbin +set (BFDUMPBIN_SOURCES bfdumpbin.cpp fdstream.cpp json.cpp bson.cpp) + +# bfdis +set (BFDIS_SOURCES bfdis.cpp disasm.cpp fdstream.cpp) + +# reflow +set (REFLOW_SOURCES reflow.cpp) + +# b2j +set (B2J_SOURCES b2j.cpp json.cpp bson.cpp) + +# j2b +set (J2B_SOURCES j2b.cpp json.cpp bson.cpp) + +# mksizes +set (MKSIZES_SOURCES mksizes.cpp) + +# json_diff +add_executable (json_diff ${JSONDIFF_SOURCES}) +# Disable errors for warnings. FIXME: Get rid of this. +target_compile_options(json_diff PUBLIC "-Wno-error") + +# bfdumpbin +add_executable (bfdumpbin ${BFDUMPBIN_SOURCES}) +# Disable errors for warnings. FIXME: Get rid of this. +target_compile_options(bfdumpbin PUBLIC "-Wno-error") + +# reflow +add_executable (reflow ${REFLOW_SOURCES}) +# Disable errors for warnings. FIXME: Get rid of this. +target_compile_options(reflow PUBLIC "-Wno-error") + +# b2j +add_executable (b2j ${B2J_SOURCES}) +# Disable errors for warnings. FIXME: Get rid of this. +target_compile_options(b2j PUBLIC "-Wno-error") + +# j2b +add_executable (j2b ${J2B_SOURCES}) +# Disable errors for warnings. FIXME: Get rid of this. +target_compile_options(j2b PUBLIC "-Wno-error") + +# mksizes +add_executable (mksizes ${MKSIZES_SOURCES}) +# Disable errors for warnings. FIXME: Get rid of this. +target_compile_options(mksizes PUBLIC "-Wno-error") + +set (BFAS_COMMON_SOURCES + action_bus.cpp + action_table.cpp + asm-types.cpp + atcam_match.cpp + attached_table.cpp + bfas.cpp + bson.cpp + counter.cpp + crash.cpp + deparser.cpp + depositfield.cpp + dynhash.cpp + error_mode.cpp + exact_match.cpp + exename.cpp + flexible_headers.cpp + gateway.cpp + hash_action.cpp + hash_dist.cpp + hashexpr.cpp + idletime.cpp + input_xbar.cpp + instruction.cpp + json.cpp + match_table.cpp + meter.cpp + misc.cpp + p4_table.cpp + parser-tofino-jbay.cpp + phase0.cpp + phv.cpp + primitives.cpp + proxy_hash.cpp + salu_inst.cpp + selection.cpp + sram_match.cpp + stage.cpp + stateful.cpp + synth2port.cpp + tables.cpp + target.cpp + ternary_match.cpp + top_level.cpp + ubits.cpp + vector.c + widereg.cpp + # FIXME: This should be a library. + ${BFN_P4C_SOURCE_DIR}/bf-utils/dynamic_hash/dynamic_hash.cpp + ${BFN_P4C_SOURCE_DIR}/bf-utils/dynamic_hash/bfn_hash_algorithm.cpp + ) + + +BISON_TARGET (asm-parse asm-parse.ypp ${BFASM_GEN_DIR}/asm-parse.cpp VERBOSE) + +add_custom_command(OUTPUT ${BFASM_GEN_DIR}/uptr_sizes.h + COMMAND ${CMAKE_COMMAND} -E make_directory ${BFASM_GEN_DIR} + COMMAND ${BFASM_BINARY_DIR}/mksizes > ${BFASM_GEN_DIR}/uptr_sizes.h) +add_custom_target(bfasm_uptr DEPENDS mksizes ${BFASM_GEN_DIR}/uptr_sizes.h) + +add_custom_command(OUTPUT ${BFASM_GEN_DIR}/lex-yaml.c + COMMAND ${FLEX_EXECUTABLE} -t ${BFASM_SOURCE_DIR}/lex-yaml.l > ${BFASM_GEN_DIR}/lex-yaml.c + DEPENDS ${BFASM_SOURCE_DIR}/lex-yaml.l + COMMENT "Generating lex-yaml.cpp") +add_custom_target(bfasm_yaml DEPENDS ${BFASM_GEN_DIR}/lex-yaml.c) +add_dependencies(bfasm_yaml bfasm_uptr) + +set (BFAS_GEN_SOURCES + ${BFASM_GEN_DIR}/asm-parse.cpp + ${BFASM_GEN_DIR}/uptr_sizes.h +) + +set (BFASM_WALLE ${BFASM_SOURCE_DIR}/walle/walle.py) +set (WALLE_SOURCES + ${BFASM_SOURCE_DIR}/walle/chip.py + ${BFASM_SOURCE_DIR}/walle/csr.py + ${BFASM_SOURCE_DIR}/walle/walle.py) + +add_subdirectory (tofino) +add_subdirectory (jbay) +set (BFASM_LIBS ${BFASM_LIBS} regs_jbay regs_tofino) + +# Other configuration files that need to be generated +configure_file ("${BFASM_SOURCE_DIR}/cmake/config.h.cmake" "${BFASM_BINARY_DIR}/config.h") + +set_source_files_properties (${BFAS_GEN_SOURCES} ${BFASM_GEN_DIR}/lex-yaml.c PROPERTIES GENERATED TRUE) + +set (BFAS_SOURCES ${BFAS_COMMON_SOURCES} ${BFAS_GEN_SOURCES} + ${BFAS_TOFINO_SRCS} + ${BFAS_JBAY_SRCS} +) + +# bfdis +if (ENABLE_GTESTS) + # FIXME -- bfdis depends on bfas_lib which is only built if GTESTS are enabled. So for + # now we only enable bfdis with ENABLE_GTESTS. Should fix to use bfas_lib for bfas + # rather than building separately, so it will always be anbled. + add_executable (bfdis ${BFDIS_SOURCES}) + target_link_libraries (bfdis bfas_lib ${BFASM_LIBS} ${BFASM_LIB_DEPS}) +endif() + + +set_source_files_properties(${BFAS_SOURCES} PROPERTIES COMPILE_FLAGS ${BFASM_CXX_FLAGS}) +# Remove compiler flag that is C++ only for vector.c +string(REPLACE "-Wno-overloaded-virtual" "" vector_c_flags ${BFASM_CXX_FLAGS}) +set_source_files_properties(vector.c PROPERTIES COMPILE_FLAGS ${vector_c_flags}) +add_executable (bfas ${BFAS_SOURCES}) +# Enable extensions for bfas. FIXME: Do we need this? +target_compile_options(bfas PRIVATE -std=gnu++17) +# Disable errors for warnings. FIXME: Get rid of this. +target_compile_options(bfas PUBLIC "-Wno-error") +target_link_libraries (bfas ${BFASM_LIBS} ${BFASM_LIB_DEPS}) +add_dependencies(bfas bfasm_yaml) + +install (TARGETS bfas + RUNTIME DESTINATION bin) +# Link bfas into the p4c binary folder. +add_custom_target(linkbfas + COMMAND ${CMAKE_COMMAND} -E create_symlink ${BFASM_BINARY_DIR}/bfas ${P4C_BINARY_DIR}/bfas + ) +add_dependencies(linkbfas bfas) +add_dependencies(p4c_driver linkbfas) + + +string(CONFIGURE "/^DECLARE_(ABSTRACT_)?TABLE_TYPE\(([a-zA-Z0-9_]+)/2/c/" CTAGS_CXX_REGEXP @ONLY) +add_custom_target(ctags-asm + COMMAND ctags -R -I VECTOR "--regex-C++=${CTAGS_CXX_REGEXP}" + COMMENT "Generating ctags") + + +if (ENABLE_GTESTS) + # TODO Components need to be built, once, into intermediate libraries. + # These lib would then be linked to unit-tests and also linked into larger components/executable. + # The exact shape, size, hierarchy of components is to be decided. + # For now we will allow the source to be built twice, once for 'bfas', and once for + # gtest/CMakeList.txt as a single monolithic component 'bfas_lib'. + # TODO ASAP refactor bfas.cpp, moving main() into its own file. + # add_executable (bfas asm_main.cpp) + # target_link_libraries (bfas bfas_lib ${BFASM_LIBS} ${BFASM_LIB_DEPS}) + + add_library (bfas_lib ${BFAS_SOURCES}) + target_compile_definitions(bfas_lib PRIVATE BUILDING_FOR_GTEST) # removes main() + target_link_libraries (bfas_lib PRIVATE ${BFASM_LIBS} ${BFASM_LIB_DEPS}) + + set(BFAS_GTEST_SOURCES + gtest/gtestasm.cpp + gtest/asm-types.cpp + gtest/depositfield.cpp + gtest/gateway.cpp + gtest/hashexpr.cpp + gtest/mirror.cpp + gtest/parser-test.cpp + gtest/register-matcher.h + gtest/register-matcher.cpp + ) + + # Do not use a unity build for gtestasm (for now). + set_source_files_properties (${BFAS_GTEST_SOURCES} PROPERTIES SKIP_UNITY_BUILD_INCLUSION TRUE) + + add_executable (gtestasm ${BFAS_GTEST_SOURCES} ${BFP4C_SOURCES}) + target_link_libraries (gtestasm PRIVATE bfas_lib gtest ${BFASM_LIB_DEPS}) + target_compile_options (gtestasm PRIVATE -Wall -Wextra -ggdb -O3 + -Wno-unused-parameter -Wno-sign-compare) + # Disable errors for warnings. FIXME: Get rid of this. + target_compile_options(gtestasm PUBLIC "-Wno-error") + + # Add to CTests - but this is in the BFASM project viz build/bf-asm, not build/p4c + add_test (NAME gtestasm COMMAND gtestasm WORKING_DIRECTORY ${P4C_BINARY_DIR}) + set_tests_properties (gtestasm PROPERTIES LABELS "gtest") +endif () diff --git a/backends/tofino/bf-asm/Options.md b/backends/tofino/bf-asm/Options.md new file mode 100644 index 00000000000..05dba981d2a --- /dev/null +++ b/backends/tofino/bf-asm/Options.md @@ -0,0 +1,98 @@ +# bfas command line options + +usage: bfas [ options ] file.bfa + +### general options + +* -h + help + +* --target *target* + + specify the target (obsolete as target is generally specified in the .bfa file) + +* -Werror + + treat warnings as errors + +### options for controlling output + +* -a +* --allpipes + + Generate a binary that has explicit writes for all pipes, rather than just one + +* -s +* --singlepipe +* --pipe*N* + +* -G +* --gen\_json + + Generate .cfg.json files instead of binary + +* --no-bin +* --num-stages-override*N* + +* -M + + Attempt to match glass bit-for-bit + +* -o *directory* + + Generate output in the specified directory rather than in the current working dir + +### options for controling cfg details + +* -C + condense json by stripping out unset subtrees (default) + +* --disable-egress-latency-padding + + Disable the padding of egress latency to avoid tofino1 TM overrun bus + +* --disable-longbranch +* --enable-longbranch + + Disable or enable support for long branches + +* --disable-tof2lab44-workaround + +* --high\_availability\_disabled +* --multi-parsers +* --no-condense +* --noop-fill-instruction *opcode* + + Insert instructions (of the form *opcode* R, R, R) for noop slots in VLIW instructions + where the slot is not used by any action in the stage. *opcode* must be one that is an + identity function when applied to two copies of the same value (and, or, alu\_a, alu\_b, + mins, maxs, minu, or maxu) + +* -p + Disable power gating + +* --singlewrite +* --stage\_dependency\_pattern *pattern* +* --table-handle-offset*N* + +### options for logging/debugging + +* -l *file* + + redirect logging output to file + +* --log-hashes + +* -q + + disable all logging output + +* --no-warn + +* -T *debug spec* + + enable logging of specific source files and specific levels + +* -v + + increase logging verbosity diff --git a/backends/tofino/bf-asm/README.md b/backends/tofino/bf-asm/README.md new file mode 100644 index 00000000000..ed0f50f1ae0 --- /dev/null +++ b/backends/tofino/bf-asm/README.md @@ -0,0 +1,375 @@ +# Tofino Assembler + +## Documentation + +## Setup + +The repository contains code for the Barefoot assembler (bfas) and linker (walle). +More info on walle can be found in walle/README.md. + +Assembler takes assembly files (.bfa or .tfa) as input to generate output json which is +then fed to walle to produce binary for tofino. + +## Dependencies + +- GNU make +- A C++ compiler supporting C++11 (the Makefile uses g++ by defalt) +- bison +- flex + +Running the test suite requires access to the Glass p4c\_tofino compiler. +Running stf tests requires access to the simple test harness. The +`tests/runtests` script will look in various places for these tools (see the top +of the script) + +## Building Assembler + +The assenbler is built automatically as part of the full bf-p4c-tofino build; there +is currently no supported standalone method of building the assembler by itself. + +## Address Sanitizer checks + +(obsolete) +To enable address sanitizer checks in the assembler use, + +``` +user@box$ ./bootstrap.sh --enable-asan-checks +``` + +Or alternatively, + +``` +user@box$ ./configure --enable-asan-checks +``` + +This configures the Makefile to add -fsanitizer=address & -fsanitizer=undefined. +By default the leak sanitizer is also enabled along with the address santizier. +You can disable it by setting environment variable ASAN\_OPTIONS with +"detect\_leaks=0". + +## Testing + +### Make Targets + +``` +user@box$ make check +``` + +Runs tests/runtests script on all .p4 files in the tests and tests/mau +directories and .bfa files in tests/asm directory. This script can run one or +more tests specified on the command line, or will run all .p4 files in the +current directory if run with no arguments. Stf tests can be run if specified +explicitly on the command line; they will not run by default. + +``` +user@box$ make check-sanity +``` + +This is similar to `make check` but will only run on .p4 files in the tests +directory which is a small subset for a quick sanity check. + +### Runtests Script + +The ./tests/runtests script will first run glass compiler (p4c-tofino) on +input .p4 file and then run the assembler (bfas) on generated assembly (.tfa) +file. Glass also generates output json which is then compared (by the script) +to the json generated from assembler. + +To skip running glass use -f option on the runtests script + +Use -j to run parallel threads. If invoking through Make targets set +MAKEFLAGS to "-j " + +### Expected Failures + +expected\_failures.txt files are under tests & tests/mau directory which outline +failing tests with cause (compile, bfas, mismatch). These files must be updated +to reflect any new or fixed fails. + +| FAIL | TYPE | CAUSE | +|----------|--------------|---------------------------------------------------------| +| compile | Glass | Glass cannot compile input .p4 file | +| bfas | Assembler | Assembler error while running input assembly file (.bfa)| +| mismatch | Json output | Difference in json outputs for glass and assembler | + +### Context Json Ignore +Context Json output from Glass compiler is verbose and may or may not be +consumed entirely by the drivers unlike the assembler Json output. The +tests/runtests script ignores the keys placed in the tests/ctxt\_json\_ignore file +while creating json diff to only display relevant mismatches + +### Json Diff +Each test after running will have its own .out dir with following +items: +E.g. TEST = exact\_match0.p4 +exact\_match0.p4.out +##### Glass Json output +``` +├── cfg +│   ├── memories.all.parser.egress.cfg.json.gz +│   ├── memories.all.parser.ingress.cfg.json.gz +│   ├── memories.pipe.cfg.json.gz +│   ├── memories.top.cfg.json.gz +│   ├── regs.all.deparser.header_phase.cfg.json.gz +│   ├── regs.all.deparser.input_phase.cfg.json.gz +│   ├── regs.all.parse_merge.cfg.json.gz +│   ├── regs.all.parser.egress.cfg.json.gz +│   ├── regs.all.parser.ingress.cfg.json.gz +│   ├── regs.match_action_stage.00.cfg.json.gz +│   ├── regs.match_action_stage.01.cfg.json.gz +│   ├── regs.match_action_stage.02.cfg.json.gz +│   ├── regs.match_action_stage.03.cfg.json.gz +│   ├── regs.match_action_stage.04.cfg.json.gz +│   ├── regs.match_action_stage.05.cfg.json.gz +│   ├── regs.match_action_stage.06.cfg.json.gz +│   ├── regs.match_action_stage.07.cfg.json.gz +│   ├── regs.match_action_stage.08.cfg.json.gz +│   ├── regs.match_action_stage.09.cfg.json.gz +│   ├── regs.match_action_stage.0a.cfg.json.gz +│   ├── regs.match_action_stage.0b.cfg.json.gz +│   ├── regs.pipe.cfg.json.gz +│   └── regs.top.cfg.json.gz +├── context +│   ├── deparser.context.json +│   ├── mau.context.json +│   ├── parser.context.json +│   └── phv.context.json +``` +##### Assembler Output Directory +``` +├── exact_match0.out +``` +##### Assembler Json Output +``` +│   ├── memories.all.parser.egress.cfg.json.gz +│   ├── memories.all.parser.ingress.cfg.json.gz +│   ├── memories.pipe.cfg.json.gz +│   ├── memories.top.cfg.json.gz +│   ├── regs.all.deparser.header_phase.cfg.json.gz +│   ├── regs.all.deparser.input_phase.cfg.json.gz +│   ├── regs.all.parse_merge.cfg.json.gz +│   ├── regs.all.parser.egress.cfg.json.gz +│   ├── regs.all.parser.ingress.cfg.json.gz +│   ├── regs.match_action_stage.00.cfg.json.gz +│   ├── regs.match_action_stage.01.cfg.json.gz +│   ├── regs.match_action_stage.02.cfg.json.gz +│   ├── regs.match_action_stage.03.cfg.json.gz +│   ├── regs.match_action_stage.04.cfg.json.gz +│   ├── regs.match_action_stage.05.cfg.json.gz +│   ├── regs.match_action_stage.06.cfg.json.gz +│   ├── regs.match_action_stage.07.cfg.json.gz +│   ├── regs.match_action_stage.08.cfg.json.gz +│   ├── regs.match_action_stage.09.cfg.json.gz +│   ├── regs.match_action_stage.0a.cfg.json.gz +│   ├── regs.match_action_stage.0b.cfg.json.gz +│   ├── regs.pipe.cfg.json.gz +│   ├── regs.top.cfg.json.gz +``` +##### Context Json +``` +│   └── context.json +``` +##### Symlink to Glass Assembly File +``` +├── exact_match0.tfa -> out.tfa +``` +##### Glass Run Log +``` +├── glsc.log +``` +##### Json Diff File +``` +├── json_diff.txt +``` +##### Glass Output Logs +``` +├── logs +│   ├── asm.log +│   ├── mau.characterize.log +│   ├── mau.config.log +│   ├── mau.gateway.log +│   ├── mau.gw.log +│   ├── mau.log +│   ├── mau.power.log +│   ├── mau.resources.log +│   ├── mau.rf.log +│   ├── mau.sram.log +│   ├── mau.tcam.log +│   ├── mau.tp.log +│   ├── pa.characterize.log +│   ├── pa.liveness.log +│   ├── pa.log +│   ├── parde.calcfields.log +│   ├── parde.config.log +│   ├── parde.error.log +│   ├── parde.log +│   ├── pa.results.log +│   ├── parser.characterize.log +│   └── transform.log +├── name_lookup.c +``` +##### Glass output assembly file +``` +├── out.tfa +``` +##### Assembler Run Log +``` +├── bfas.config.log +├── bfas.log +``` +##### Test visualization htmls +``` +└── visualization + ├── deparser.html + ├── jquery.js + ├── mau.html + ├── parser.egress.html + ├── parser.ingress.html + ├── phv_allocation.html + └── table_placement.html +``` + +## Backends (Tofino/JBay) +Assembler currently supports Tofino backend but code is generic enough to be +ported to a different backend like JBay. Architecture specific constants must be +parameterized and placed in the constants.h file + +"tofino" and "jbay" directories hold the chip schema to be used by the +assembler. The chip schema contains register information and is a binary +(python pickle file) generated from csv file in bfnregs repository. + +### Extracting information from hardware bfnregs info + +To the greatest extent possible, we automatically generate assembler support code +directly from the information provided to use by the hardware team. The main 'source' +we get from hardware are the Semfore .csr files and the associated .csv files generated +by Semafore from the .csr files. We use walle (walle subdirectory) to read the .csv +files and distill them into a chip.schema -- a python pickle file containing the +datastructures defined in walle/csr.py that encapsulate the information and structure +of all the hardware registers. + +We then use walle to generate C++ code embodying the register structure, defining C++ +classes containing the structure of all the registers. The template.yaml file defines +various options for the structure of the resulting C++ code -- which registers to use +as the 'roots' of class hierarchies, what files to write the code in, which methods to +define in each class. Within the templates.yaml file, there's a `global:` section giving +global options for all files, a `generate:` section listing the files to generate, and +an `ignore:` section listing register subtrees to ignore (no code will be generated for +them -- its as if they don't exist). + +Options that can be used include: + +| option | description | +| ----------- | ---------------- | +| decl | generate just declarations (suitable for a header file) | +| defn | generate definitions for those declarations. With neither `decl` or `defn` will generate complete classes with inline methods | +| checked\_array | Use the checked\_array class (`checked_array.h`) for arrays (default) | +| delete\_copy | Delete copy constructors for generated classes | +| dump\_unread | generate a `dump_unread` method which dumps all unread registers to an ostream (default False) | +| emit\_binary | generate an `emit_binary` method that outputs binary code for the driver/model | +| emit\_fieldname | generate `emit_fieldname` method used to print logging messages | +| emit\_json | generate `emit_json` method to generate config json | +| enable\_disable | generate `enable`, `disable`, and `modified` methods | +| global | generate the specified register types once as global names rather than as nested in the containing object(s) | +| include | generate a `#include` of the specified file | +| name | Change the name of the top-level object | +| namespace | Put all declarations in the specified namesapce | +| unpack\_json | generate `unpack_json` method | +| widereg | Use `widereg` for registers wider than 64 bits | +| write\_dma | Generate `'B'` block writes for the specified registers instead of `'R'` single register writes in `emit_binary` methods | + +This results in C++ code that can either generate .cfg.json files or binary files for use by +the driver/model. When cfg.json files are produced, walle can be used to link them into a +binary file. There are also options for generating C++ code to read .cfg.json files for +future support of binary disassembly. + +### Config JSON +The config json files (with .cfg.json extension) are generated by the +assembler which are fed into walle to generate the binary +(also called `tofino.bin`) + +The config json is nothing but json files with a map of all the registers for +a backend. In order to limit the json file size assembler disables registers +which are not set (with the -C or condense json flag). Some registers are also +explicitly disabled or enabled based on what the drive expects to see in the +tofino.bin. Below is the status of regs and whether they will appear in the +config json. +``` +--------------------------------- +Disabled - (unconditionally) +--------------------------------- +mem_pipe.mau +regs.input.icr.inp_cfg +regs.input.icr.intr +regs.header.hem.he_edf_cfg +regs.header.him.hi_edf_cfg +regs.glb_group +regs.chan0_group.chnl_drop +regs.chan0_group.chnl_metadata_fix +regs.chan1_group.chnl_drop +regs.chan1_group.chnl_metadata_fix +regs.chan2_group.chnl_drop +regs.chan2_group.chnl_metadata_fix +regs.chan3_group.chnl_drop +regs.chan3_group.chnl_metadata_fix +--------------------------------- +Disabled - (if Zero) +--------------------------------- +regs (In all regs) +mem_top (mau) +mem_pipe (mau/dummy_reg) +reg_top (ethgpiobr, ethgpiotl, pipes) +reg_pipe (mau, pmarb, deparser) +--------------------------------- +Enabled - (always) +--------------------------------- +regs.dp.imem.imem_subword8 +regs.dp.imem.imem_subword16 +regs.dp.imem.imem_subword32 +regs.rams.map_alu.row[row].adrmux.mapram_config[col] +``` +Once JBay support is added for all regs, above will be different for both +backends. + +Driver dictates which regs are disabled or enabled unconditionally. Other +regs which are disabled if zero are to limit file size and driver should +automatically fill in the zero values. + +#### Generating and using chip.schema + +chip.schema files are generated by walle from the csv files in the +bfnregs repo. To generate a new chip.schema file, use + + walle/walle.py --generate-schema ${BFNREGS_REPO}/modules/${CHIP}_regs + +where `${BFNREGS_REPO}` is the root of the bfnregs repo, and `${CHIP}` +is the chip to target (`tofino`, `trestles`, or `jbay` at the moment). +The newly created chip.schema file should then be moved into the jbay +or tofino subdirectory where the build system expects to find it. + +chip.schema is a binary (python pickle) file; you can use +`walle.py --dump-schema` to dump it as (vaguely human readble) +yaml. It is basically a DAG of python objects (csr.address\_map, +csr.address\_map\_instance, and csr.reg) describing the register tree. +The build uses walle to turn this into json files describing various +subtrees of the dag. The `template_objectss.yaml` file describes which +subtrees to generate json files for as well as list of subtrees to +ignore (elide from the json files). Names in this file are the names of +csr.address\_map objects (NOT instances), and where the generated files +are nested, the containing json will contain a reference to the contained +json rather than a copy of the tree. In this way, the generated json +files as a group describe the DAG even though json can only describe +trees, not DAGs. + +If, when running make, you get a KeyError from walle, that generally +means that the template\_objects.yaml file contains a refernce to +some csr.address\_map that does not exist in the chip.schema file -- +the register tree has changed in a way that invalidates the json files +it is trying to generate. If you have your python setup to drop into pydb +automatically on an uncaught exception (highly recommended), at that point +you can use `pp section` to list all the csr.address\_map objects that +*are* in the chip.schema. Generally you'll find that it is the 'ignore' +names that have changed, so fixing them is trivial. + +## Assembly Syntax +The assembly syntax is documented in `SYNTAX.md` file diff --git a/backends/tofino/bf-asm/SYNTAX.yaml b/backends/tofino/bf-asm/SYNTAX.yaml new file mode 100644 index 00000000000..e11a476f63a --- /dev/null +++ b/backends/tofino/bf-asm/SYNTAX.yaml @@ -0,0 +1,1294 @@ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 + +# yaml format tags for tofino assembler +# +# symbols used below: +# ::= a string of 1 or more letters, digits, '_', '-', '.', +# '$', or '@' not starting with a digit. Cannot start or +# end with '-' or '.' or have two consecutive '-' or '.' +# ::= "ingress" | "egress" | "ghost" +# ghost is only availble in jbay mau and phv sections +# ghost only has 'ghost_md' and 'pipe_mask' sections used to +# configure 'tm_status_phv' register +# ::= that matches a predefined register name +# ::= (..) | () +# no spaces between parts of the +# ::= unsigned integer constant +# 0x/0b/0o prefix for hex/binary/octal +# ::= constant where one or more digits may be replaced +# by '*' to denote don't-care for ternary matches. +# ::= .. +# no spaces between parts of the +# ::= | | '[' | , ... ']' +# can be a single constant or range or multiple constants +# or ranges in a yaml list +# ::= | +# Must be a register name or a name defined in the phv section +# ::= +# Denotes a single bit +# ::= half | byte0 | byte1 +# denotes one of the parser match units +# ::= { , } +# ::= | | +# | hash_dist [ .. ] +# | rng [ .. ] +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +phv []: + # Defines PHV alias names for registers. is 'ingress', 'ghost' or + # 'egress' and is optional. If present, aliases are defined only for + # that thread. If not present, aliases are for all threads + : | + # Makes an alias for a register or piece of a register. + # register B0 through B63 for byte registers, H0-H95 for half + # (16-bit) registers, or W0-W63 for word (32-bit) registers. + # May also use R0-R224 for all registers in W/B/H order. + # TW0-TW31, TB0-TB21, TH0-TH47, R256-R367 for tagalong + : + stage ..: | + stage : | + # makes name an alias that varies over stages. In the first form, the map + # applies just for those stages; in the second form it applies from that stage + # up until the next stage specified by another stage key. Stage numbers + # apply to iPHV (input), so uses for VLIW destinations use the mapping for the + # next stage. Parser uses stage 0 and deparser uses the end-of-pipe stage + context_json: + : + # any keys/vector/values here are converted to json and output into + # the context.json phv_allocation.records info for field + # verbatim +hdr: + # Information related to hdr_ids (header names and compressed + # header sequence and length encoding in bridge metadata) + map: + # Mapping of hdr_ids to header names + : + # constant is hdr_id, name can be used as substitution for hdr_id + # e.g. md32: 0, ethernet: 1, ipv4: 2, ... + seq: + # Compressed header sequence encoding in bridge metadata + : [ , ... ] + # constant is 8b sequence number; 255 is reserved for + # escape value to specify explicit list of all headers + # e.g. 0: ethernet, ipv4 + len: + # Compressed header length encoding in bridge metadata + : { base_len: , num_comp_bits: , scale: } + # base_len is 8b, num_comp_bits 3b, scale 2b + # e.g. ipv4: { base_len: 20, num_comp_bits: 4, scale: 2 } # (20B + N * 4B; N < 10) + # e.g. ipv6: { base_len: 40, num_comp_bits: 7, scale: 3 } # (40B + N * 8B) +parser [] [, ...]: + # Defines a parser. must be 'ingress' or 'egress' + # can have numerical values, such as range, int, vector of range and int. + # can also have symbolic values, such as "ALL", "CPU", "PCIE", "UNUSED" + # "CPU" and "PCIE" are symbolic names that the corresponding numeric values are device-specific. + # "UNUSED" represents the parser program that is not configured to any physical + # parser during device initialization, but may be used by driver during runtime. + name: # parser name defined in the arch file, used by bfrt to find the parser + # config in context.json + start: + start: '[' , ... ']' + # define up to 4 distinct initial start states for the four channels + priority: | '[' , ... ']' + priority_threshold: | '[' , ... ']' + # define initial parser priority and threshold for the 4 channels + states: + # Parser states can be defined with or without the 'states' key but this + # is preferred as it avoids name collisions with other assembly + # directives, + []: + # Defines a parser state. The state 'start' is the implicit initial state + # if there is no explicit initial state defined by a separate 'start' entry. + # The state 'end' cannot exist (used for exit) + # The optional constant is the 8-bit value used to denote the state; + # overlapping state values will be flagged as an error + match: | { : } + # specifies up to 4 bytes to match against in the input buffer + # may also specify 'ctr_zero' and 'ctr_neg' to match those + # special bits, or the specific matchers 'byte0', 'byte1' or 'half' + # to match against values explicitly loaded by a 'save' in a previous + # state. May additionally specify specific matchers to use. + option: + # enable an optional feature for this state. Currently the only option + # is "ignore_max_depth" which means the state (and subsequent states) are + # ignore for calculating the max parser bytes. Used for min parser padding + # states + : + # actions to perform when the match matches this match constant + # this is a tcam priority match, so only the first match triggers + buf_req: + # number bytes that must be in the input buffer to not stall + counter: inc | dec | load | + # modification of the counter + src: # matcher to load counter from + max: # max value of the counter + rotate: # number of bits to circular right rotate + mask: # mask of rotated value + add: # immediate to add to masked value + checksum : + # modification of checksum unit + type: VERIFY | RESIDUAL | CLOT + mask: # vector of byte ranges of input buffer + swap: + dest: | clot # write destination + start: 0 | 1 + end: 0 | 1 + next: | + # next state -- match-constant takes don't care bits + # from current state + offset: [set] | inc + # modificate to the offset + priority: | @ [ >> ] [ & ] + # update the packet priority + load: { : , ... } | + # specifies one or more values from the input stream to be + # loaded into specific matchers or specific matchers to have + # their values preserved for use by later states + save: [ , ... ] + # specifies on more more values from the matchers to be + # saved into the scratch pad registers + shift: + # number of bytes to shift out + intr_md: + # number of bits of intrinsic metadata being shifted out + [rotate] : [offset] + [rotate] : [offset] + # write the specifed byte (or range) to named phv slot + [offset] : [rotate] + # write the specified constant to the phv location + clot : + # output a CLOT (jbay only) + start: + length: | + # expression is generally '@' [ '>>' ] [ '&' ] + # with variations (unary is highest precedence, followed by shift, + # mask(&), +/- lowest) + max_length: + stack_depth: + stack_inc: + hdr_len_inc_stop: + # stop the header length counter, and use value as final increment amount (jbay only) + disable_partial_hdr_err: + # specifies whether the partial header error is enabled or not for + # the current state. Specific to: + # - JBay + partial_hdr_err_proc: + # specifies whether or not the best effort extraction (a.k.a. greedy extract) + # is performed when insufficient data is available in the input buffer. + value_set : + handle: # pvs handle + field_mapping: + (range) : (range) + # actions to perform for a value set of the given and integer + default: + # actions to perform regardless of the match + # if there is no 'default' tag in a state, anything that is not + # recognized as a valid state tag is treated as part of an implicit + # default + hdr_len_adj: + # value for the hdr_len_adj register + init_zero: [ ,... ] + # list of phv slots that should be initialized to (valid) zero + meta_opt: + # value for the meta_opt register + multi_write: [ ,... ] + # list of phv slots that the parser may write multiple times + # values OR'd with previous values + parser_error: + # define a phv location to receive parser error codes + bubble: + # configure rate limit registers for pipe + inc: + dec: + max: + interval: # tofino2+ only + ghost_md: + # container(s) allocated to store ghost intrinsic metadata + # tofino2+ only (set tm_status_phv reg) + pipe_mask: + # pipe_mask to be set for ghost packets + # (tm_status_phv.pipe_mask in tofino2) + parse_depth_checks_disabled: true | false + # Parse depth checks disabled + states: + : + # specifies match values for states (64b) + # e.g. parse_ipv4: 0x********_******02 + # e.g. parse_tcp: 0x********_******04 + port_metadata: + # specifies port metadata for each logical port number + : + # constant is logical port number, vector is port metadata (14B) + profile : + # specifies a parser profile, constant represents the TCAM&SRAM index + match_port: + match_inband_metadata: + # if the logical port number (2'b0 ++ 6b) or inband metadata (8B) match is omitted, + # it is treated as * + initial_pktlen: + # specifies the value to adjust the length for AdjustedPacketLength (6b) + initial_seglen: + # specifies the value to adjust the length for AdjustedSegLength (6b) + initial_state: | + # specifies initial state (80b) + # if a state name is used, all-match bits are initialized to 0 + # and upper-most two bytes are set to 0 + initial_flags: + # specifies initial flags (64b) + initial_ptr: + # specifies initial pointer (8b) + initial_w0_offset: + # specifies initial W0 offset (8b) + initial_w1_offset: + # specifies initial W1 offset (8b) + initial_w2_offset: + # specifies initial W2 offset (8b) + initial_alu0_instruction: + # specifies initial instruction for ALU0 (15b) + initial_alu1_instruction: + # specifies initial instruction for ALU1 (19b) + metadata_select: '[' , ... ']' + # specifies source of each of 32B of MD32 metadata + analyzer_stage []: + # constant is stage number + # if name is present, it is looked up in state_map and all rules match the state + # only one of stage name and rule state match can be present + # e.g. stage 0 parse_ipv4 + rule : + # constant is explicit rule index; it also specifies the rule priority + # when more rules match (higher value is higher priority) + # each rule supports up to 4 instructions for modifying flags, one for up to 16 bits + # (modify_flags16), one for up to 4 bits (modify_flags4), and two for 1 bit + # (modify_flag0 and modify_flag1). All of these can be set simultaneously. If the + # affected flags overlap, the instructions take effect in the following order of + # precedence: + # modify_flags16, modify_flag4, modify_flag0, modify_flag1. + match_state: | + # if state match constant is ommited, it is derived from the state name at stage + # if it is missing, it is treated as * + match_w0: + match_w1: + # if w0 or w1 match constant is ommited, it is treated as * + # e.g. rule 0 w0 17 # state is matched based on `stage 0 parse_ipv4' above + # e.g. rule 0 w0 17 state 0x************02 # 17 is for TCP; 02 is for parse_ipv4 (see state_map) + next_state: | + # specifies state for the next stage (80b) + # e.g. next_state: 0x**************04 + # e.g. next_state: parse_tcp + next_skip_extractions: + # skips Wx extractions in the next stage (bool) + next_w0_offset: + # specifies W0 offset for the next stage (8b) + # e.g. next_w0_offset: 2 # TCP destination port + next_w1_offset: + # specifies W1 offset for the next stage (8b) + next_w2_offset: + # specifies W2 offset for the next stage (8b) + next_alu0_instruction: + # specifies instruction for ALU0 for the next stage (15b) + next_alu1_instruction: + # specifies instruction for ALU1 for the next stage (19b) + push_hdr_id: { hdr: , offset: } + # specifies header ID (8b) or name, and offset (8b) to be pushed to ana_hdr_ptrs + # if name is specified, it is looked up in hdr -> map + # 0xff for header ID is reserved for invalid + # offset is relative to pointer + # e.g. push_hdr_id: ipv4 0 + modify_flags16: + # specifies src (2b), imm (16b), mask (16b), and shift (6b) to set/clear + # multiple flags at once. + src: + imm: + # whether to set or clear the corresponding flags, only used if src == 3 + mask: + # modify a flag if the corresponding mask bit is set + shift: + # index into flags at which to start the operation + modify_flags4: + # same as modify_flag16, but with 4b imm and mask + src: + imm: + mask: + shift: + modify_flag0: { set: | clear: }, + # set or clear the flag at the index given by the 6-bit + modify_flag1: { set: | clear: }, + # same as modify_flag0 + modify_checksum: { idx: , enabled: }, + # changes the enabled state of the checksum unit at the 1-bit index idx + phv_builder_group : + # specifies extract groups + pov_select: + # specifies which POV bytes are used to address the TCAM & SRAM + extract : + # specifies extracts + match: + # match constant is 4B + source: + # specifies a PHE source pair + initial_predication_vector: + # specifies the initial predication information + pov_select: + # specifies which POV bytes are used to address the TCAM + next_tbl_config: + # a mapping for the IPV TCAM, from match constants to next table identifiers + : + # match constant is 4B, constant is 1B + ghost_initial_predication_vector: + # specifies the initial predication information for the ghost thread + pov_select: + # specifies which POV bytes are used to address the TCAM + next_tbl_config: + # a mapping for the IPV TCAM, from match constants to next table identifiers + : + # match constant is 4B, constant is 1B + checksum_checkers: + mask : + # There can be up to 4 masks. + # Each mask is specified as an up to 224b wide constant. + # A mask specifies which bytes of the header are used for the checksum computation. + # 1 -> used, 0 -> not used + unit : + # Each unit is able to verify 1 checksum. + # The checksum is computed according to the selected 'config' + # which specifies a header ('hdr') and a mask ('mask_sel'). + # A config is selected using the 'match_pov' key. + # There are 2 units. + # Both units operate independently and allow verification of overlapping bytes. + pov_select: + # Specifies which POV bytes are used to address the TCAM + config : + # There are up to 16 checksum configurations for each unit. + match_pov: + # 32b match key + mask_sel: + # Selects one of the 4 masks (2b). + # Both csum units can select the same mask. + hdr: + # Specifies which header is used for the checksum computation. + # The concrete header bytes which are used for the checksum + # are specified using a mask. + # One of the 4 masks is selected using the 'mask_sel' field. + pov_flags_pos: + # specifies start position of POV flags in bridge metadata (6b) + pov_state_pos: + # specifies start position of POV state in bridge metadata (6b) +stage : + # Defines a single stage of the MAU. The order of the tables within the + # stage is the logical table ordering, so order matters + []: + # common keys available in (almost) all tables types + row: + # one or more ram rows the table uses + # whether these are physical or logical rows depends on the table type + column: | '[' , ... ']' + # May be a single vector or a list of vectors. If a list, length + # must match the number of rows specified + # Denotes the rams used on each row. RAM type (sram, tcam or mapram) + # depends on the table type + stage : | '[' , ... ']' + - stage : + stages: | '[' , ... ']' + bus: 0 | 1 | '[' 0 | 1, ... ']' + # bus(es) to use. If a list, must match the number of rows + lhbus: 0 | 1 | + rhbus: 0 | 1 | + word: + # for wide tables, specify which word of the wide word is in each row of the table + vpns: + # vpn values to use for rams + dyanamic_config: + : + : { : } + # defines the match for one specific dconfig bit. Match may be a single + # match for the entire width or matches against specific + # named slices of those PHVs. Other bits are implicitly don't-care + input_xbar [ | ]: + # Input xbar config for this table + : | '[' , ... ']' | + { : | ..: ,... } + [] group : ... # tofino 1/2 + # One or more registers to be mapped into the specified ixbar group + # in order or at the locations specified. Locations are bit offsets in + # the group (even for groups that are not bit-addressable + hash [table] : + # hash table config + : + # specify one column of the table -- hash is a 64-bit constant + | : + # specify one or more columns according to expression. Phv refs + # must be in the corresponding input group of this input_xbar + + # identity copy of phv (must match width of range) + random(, ...) + # random hash of the given phv locations. We generate + # with random(3) and we do NOT call srand, so the hash + # for a given program is repeatable. + crc(, [,] , ...) + # Deprecated. + # crc hash of the given phv locations -- first arg is integer + # constant denoting polynomial (Koopman notation). Second arg + # is an initial constant prefixed to the input. + crc(, [,] '{' : , ... '}' [, '{' : '}' ]) + # crc hash of the given phv locations, at the specified offsets + # from the lsb of the crc input as a whole. + # arguments are polynomial (Koopman Notation) + # init shift register + # final xor + # total number of bits in the crc + # -- + # hash calculation could have a list of constants as inputs. + # : represents the constant value in hash calculation, + # the key encodes the offset and length of the constant + # the value encodes the value of the constant + crc_rev(...) + # bit-reversed (little endian instead of the defaul big endian) crc + xor(, '{' : , ... '}') + # XOR of a data block (message) + # + # Fields specified in the second parameter are joined into one + # big bit stream, cut into blocks of width specified in the first + # parameter and the blocks are bitwise XORed together. If the + # field list is not continuous (there are gaps in bit offsets), + # zeros are padded in. + # + # Constants are not supported by this directive - they are computed + # into the seed value by the backend. + ^ + # xor of other expressions. + & + & + # mask expression, including just some bits in the result + stripe(, ...) + # stripe other expressions across the width required, + # repeating as necessary + sextend() + sign_extend() + # sign extend expression (replicating the sign bit to the needed width + : parity + # Keyword parity indicates this bit is reserved for parity + # calculation + valid : + # specifies the 16-bit valid hash for one column of the table + hash group : + # hash group config + table: + # one or more hash tables to xor together for this group + seed: + # 52-bit hash seed value + seed_parity: + # optional parameter to indicate if seed must be parity + # encoded, must be true when hash parity is enabled on the + # group. + + # a single table to use for the group + : '[]' + - + # use an xbar group configured elsewhere + random_seed: + # random seed from pragma for the table + exact unit: + output unit: + gateway: + # gateway table on this table -- see below + format: { : , : ... } + # format of data in the table, mapping names to ranges of bits. + # fields with sizes instead of explicit ranges will be laid out + # by the assembler following preceeding fields + hash_dist: { : , ... } + : # hash distribution unit to config + hash: + mask: + shift: + expand: + # hash distribution config params + output: | '[' ,... ']' + # outputs to enable for this hast_dist unit + # 'lo' | 'hi' | 'meter' | 'stats' | 'action' | 'hashmod' + instruction: (, ) + # specifies where to get the action index and pfe for the instruction + # to run in a given table + action: ( [, ]) + # Action table to use -- action is a named field from the format + # that determines which action to do. Index is optional (for + # indirect action), named field from format. If not present use + # direct action (index is match address). + action_enable: + enable_action_data_enable: true | false + enable_action_instruction_enable: true | false + default_action: + default_action_handle: + # Specifies a unique integer for action handle, used to match glass + # If not present assembler generate handles + default_action_parameters: { : , ... } + # Specifies list of params and values + action_bus: { : | .. : , ... } + # immediate actions data + # meter output data + actions: + # defines actions that can be used in the table + []: + # the optional index is the index to use in the 8-entry + # instruction indirection map of the table. + [-
] # constant imem address to use for this action + [- ] # map of aliases for data operands + : [ () ] + # defines a name as an alias for (a slice of) something else + [- p4_param_order: '{' param_name : , ... '}' ] + # Param order specifying param name and width for context json (p4_parameters) + [- hit_allowed: '{' allowed: true|false, reason: '}' ] + [- default_action: '{' allowed: true|false, reason: '}' ] + # the next table to be run when the entry hits with this action, could be + # an index into the hit_next + [- next_table: | ] + # the next table to run when the entry misses with this action + [- next_table_miss: ] + [- context_json: ... ] + # any keys/vector/values here are converted to json and output + # into the context.json info for this action verbatim + - + selector: ( [ , [ , ] ] ) + # selection table to use + stats: [ () ] + # statistics table to use + meter: [ () ] + # meter table to use + stateful: [ ( [, ]) ] + stateful: [ (, counter [ hit | miss | gateway ]) ] + # stateful table to use + idletime: + # idletime table + row: + column: | '[' , ... ']' + bus: + precision: 1 | 2 | 3 | 6 + sweep_interval: + notification: enable | disable | two_way + per_flow_enable: true | false + table_counter: disable | table_miss | table_hit | gateway_miss | + gateway_hit | gateway_inhibit + # event type to count in per-table event counter + hit: | '[' , ... ']' + # next table on table hit. If a list, 'format' must contain a + # 'next' field that determines which next table to use + miss: + # next table on table miss + next: + # default (unconditional) next table. Exclusive with hit/miss + p4: # information about P4 level tables and control plane API + name: + # P4 table name + handle: + # runtime API handle for the table + size: + # table size specified in P4 -- may be smaller than the actual + # table size, as table is rounded up to fill memories + match_type: exact | ternary | lpm | ... + action_profile: + how_referenced: direct | indirect + p4_param_order: + # order of match params as seen in p4 program + # PD generated has same order and needs to match context + # json output + : + # param names with their types and size info + type: + size: + ... + context_json: + # any keys/vector/values here are converted to json and + # output into the context.json info for this match param + # verbatim + context_json: + # any keys/vector/values here are converted to json and output into + # the context.json info for this table verbatim + static_entries: + # List of static entries as described in the p4 program. These are + # passed on directly to the driver through context json. The + # match_key_fields_values and action_parameters_values follow the + # same order as the p4_param_order list in the table and action + # sections. + # Match Key Fields Values based on match type: + # Exact - field_name, value + # Ternary - field_name, value, mask + # Range - field_name, range_start, range_end + # Lpm - field_name, value, prefix_length // TODO + - priority: + match_key_fields_values: + - field_name: + value: + mask: # Only for ternary match + range_start: # Only for range match + range_end: # Only for range match + prefix_length: # Only for lpm match // TODO + action_handle: + is_default_entry: + action_parameters_values: + - parameter_name: + value: + exact_match []: + # Exact match table + row: + column: | '[' , ... ']' + # physical rows and srams used by the table + stash: # Stash Allocation for exact match tables only + row: + col: + # Row and col are indexed in sync to give RAM used to determine word + # in entry + unit: + # Unit value can be (0,1) as there are 2 units per row and is indexed + # in sync with row/col values to give stash unit + input_xbar: + # specifies exact match groups, hash tables, and groups (see above) + # If there are multiple groups, the must match the total width of the + # format, which must in turn match the rows and the ways. + format: { : , : ... } + # names may have `()` suffix denoting up to 5 match groups + # all match groups must contain the same keys + # some names have predefined meanings: + match: ... # exact match groups to match against + action: ... # field that selects which action to run + next: ... # next table + match: | '[' , ... ']' + # value(s) to match against the 'match' field(s) in the format + ways: + - '{' < way description '}' + # description of one way of the table + xme: + # 0 - 7 (lambs) , 8 - 15 (stms) + group: + # hash group or XME used for this way + index: | + # hash bits used to index the way rams/lambs (including subword bits) + select: [ '&' ] + # hash bits used to select enable rams/lambs in the way + rams: '[' , ... ']' + # rams or lambs in the way. Each is a vector of 1, 2, or 3 integers + '[' , ']' # tofino1/2 + - '[' ,,, '[' , ']',... ']' + # DEPRECATED description of one way of the table + # initial 3 values are hash group, 10-bit slice from group, and + # mask of upper 12 bits from the group. + match_group_map: '[' '[' ,... ']',... ']' + # map from per-word match groups to overall match groups + # one row for each word in the width of the table with up to + # 5 values for up to 5 match groups in that word. Values are + # match groups in the format + # common keys described above + action: + actions: + action_bus: + default_action: + default_action_handle: + default_action_parameters: + context_json: + gateway: + hash_dist: + hit: + idletime: + meter: + miss: + next: + p4: + selector: + stateful: + stats: + ternary_match []: + # Ternary match table + row: + column: | '[' , ... ']' + # tcam rows and columns to use + input_xbar: + # specifies ternary match groups + group : | '[' , ... ']' + # odd groups are 5 bytes wide, even groups 6 -- the extra byte + # is the byte group n/2 + # TBD -- Need a way to explicitly set byte swizzler? + match: + # Input xbar group(s) to match against -- may be a vector of maps for wide + # matches using multiple groups + group: + # Match group to match against (placed on tcam bus) + byte_group: + # byte group to use for top 4 bits of tcam bus + byte_config: + # value for tcams.vh_data_xbar.tcam_vh_xbar.tcam_row_halfbyte_mux_ctl + # .tcam_row_halfbyte_mux_ctl_select + dirtcam: + # dirtcam control bits for the group; used to set + # tcams.col.tcam_mode.tcam_data_dirtcam_mode (bits 0..9) + # and tcams.col.tcam_mode.tcam_vbit_dirtcam_mode (bits 10..11) + indirect: + # ternary indirection table to use with this table + # if there's an indirection table, it should contain all the table refs + indirect: | + indirect_bus: + # which indirect bus to use for ternary tables with no indirection table + # common keys described above + action: + actions: + action_bus: + default_action: + default_action_handle: + default_action_parameters: + context_json: + gateway: + hash_dist: + hit: + idletime: + meter: + miss: + next: + p4: + selector: + stateful: + stats: + ternary_indirect : + # Ternary indirection table + row: + column: | '[' , ... ']' + # physical rows and srams to use + bus: 0 | 1 | '[' 0 | 1, ... ']' + # ternary indirection bus to use. List must match rows + format: { : , ... } + # fields in the ram record, sized in bits + # common keys described above + action: + actions: + action_bus: + default_action: + default_action_handle: + default_action_parameters: + context_json: + gateway: + hash_dist: + hit: + idletime: + meter: + miss: + next: + p4: + selector: + stateful: + stats: + hash_action []: + # hash-action table + row: + bus: + # specify which physical row and exact match bus to use + input_xbar: + # input xbar config (as exact match table) + # common keys described above + action: + actions: + action_bus: + default_action: + default_action_handle: + default_action_parameters: + context_json: + gateway: + hash_dist: + hit: + idletime: + meter: + miss: + next: + p4: + selector: + stateful: + stats: + phase0_match + # special phase 0 match table before stage 0 (only in stage 0 ingress) + p4: # information about P4 level tables and control plane API + width: + # other common keys are NOT available in this table type + proxy_hash []: + # Proxy hash Table + row: + column: | '[' , ... ']' + # see exact_match + input_xbar: + # see exact_match + format: { : , : ... } + # see exact_match + match: ... # exact match groups to match against + action: ... # field that selects which action to run + next: ... # next table + match: hash_group(..) | '[' hash_group(..), ... ']' + # hash groups + ways: + - '[' ,,, '[' , ']',... ']' + # see exact_match + proxy_hash_group: + # hash group of the 8 possible hash groups to use + proxy_hash_algorithm: + # for the context JSON, proxy_hash_algorithm key + # common keys described above + action: + actions: + action_bus: + default_action: + default_action_handle: + default_action_parameters: + context_json: + gateway: + hash_dist: + hit: + idletime: + meter: + miss: + next: + p4: + selector: + stateful: + stats: + action : + # Action table + logical_row: + column: | '[' , ... ']' + # srams to use -- in logical (16x6) coords, not physical (8x12) + home_row: | '[' , ... ']' + # row(s) to use as home rows for the table + format []: { : , ... } + # fields in the ram record. Different actions may have + # different formats (and different sizes)... + action_bus: { : | .. : , ... } + # mapping from action bus bytes to values in the table. Names + # must be present in the 'format' for the table. + # Can be optional -- if not present, assembler will attempt to + # lay out fields in the action bus based on usage in actions. + actions: + # defines actions that can be used in the table + []: + # the optional index is the index to use in the 8-entry + # instruction indirection map of the table. + [-
] # constant imem address to use for this action + [- ] # map of aliases for data operands + : [ () ] + # defines a name as an alias for (a slice of) something else + [- p4_param_order: '{' param_name : , ... '}' ] + # Param order specifying param name and width for context json (p4_parameters) + [- p4_param_order: + param_name: + width: + context_json: #anything + ... ] + # Alternative syntax for specifying param order when attaching context_json + [- default_action: '{' allowed: true|false, reason: '}' ] + - + p4: # information about P4 level tables and control plane API + # same as exact_match p4 info + context_json: #anything + gateway []: + # 'bare' Gateway table -- no corresponding match table, so must + # always specify next table + name: + # Only output when gateway associated with a match table i.e. not + # 'bare' + row: + # physical match row to use + bus: 0 | 1 + # match bus to use + payload_row: + payload_bus: + # row/bus to use for payload -- can only be specified on a + # standalone gateway, as an attached gateway uses the row(s) + # specified by the table it is attached to + input_xbar: + # as for exact_match, but can only specify one group + match: | '[' , ... ']' + # value(s) to match against the match constants + xor: | '[' , ... ']' + # value(s) to xor against the match value + range: 2 | 4 + # do 2 or 4 bit range matches in the upper 12 bits of the gateway + : + # match row for gateway. Value may be (for next table) + # or "run_table" or a map with some or all of these keys. + next: + # next table for this match + run_table: | + # disable the gateway (run the logical match normally) + # not applicable to bare gateways + action: + # run the specified action when the line hits + ? [ , ..., ] : + # Range match row for gateway. Each value except the ladt is a + # 2**n bit lookup table for a range match unit (so 4 bit values + # for range:2 and 16 bit values for range:4). The last value is + # the normal tcam match for the bottm 32 bits of the gateway + # Same value options as normal match rows. Big-endian order + # for units (last int is bottom 2 or 4 bits of upper 12 bits) + miss: + # behavior if no row matches (same options as match row above) + condition: + # condition output used for model logging + expression : + # condition string as specified in p4 + true : + # next table name when condition is true + false : + # next table name when condition is false + payload: + # payload data to use if gateway is not disabled (run_table is false) + match_address: + # gateway match address to use if the gateway is not disabled + context_json: #anything + selection []: + row: + logical_bus: + # must match the number of rows specified. Indicate the logical bus + # used for each rows. Value can be: 'A' => Action Bus, 'S' => Synth + # Bus, 'O' => Overflow Bus, 'X' => Undefined. + column: | '[' , ... ']' + # srams to use -- in logical (16x6) coords, not physical (8x12) + maprams: | '[' , ... ']' + # map rams to use + home_row: + # represent the row ultimately connected to the ALU + input_xbar: + # hash match groups on input xbar + mode: resilient | fair + non_linear: true | false + per_flow_enable: true | false + pool_sizes: + selection_hash: + hash_dist: + # see hash_action hash_dist + p4: # information about P4 level tables and control plane API + # same as exact_match p4 info + context_json: #anything + counter []: + row: + logical_bus: + # must match the number of rows specified. Indicate the logical bus + # used for each rows. Value can be: 'A' => Action Bus, 'S' => Synth + # Bus, 'O' => Overflow Bus, 'X' => Undefined. + column: | '[' , ... ']' + # srams to use -- in logical (16x6) coords, not physical (8x12) + maprams: | '[' , ... ']' + # map rams to use + vpns: + home_row: + # represent the row ultimately connected to the ALU + format: + count: bytes | packets | both | packets_and_bytes + lrt: '{' : , ... '}' + - '{' threshold: , interval: '}' ... + # largest recent with threshold params + global_binding: true | false + per_flow_enable: true | false + bytecount_adjust: + # add value to counted bytes + meter []: + row: + logical_bus: + # must match the number of rows specified. Indicate the logical bus + # used for each rows. Value can be: 'A' => Action Bus, 'S' => Synth + # Bus, 'O' => Overflow Bus, 'X' => Undefined. + column: | '[' , ... ']' + # srams to use -- in logical (16x6) coords, not physical (8x12) + maprams: | '[' , ... ']' + # map rams to use + vpns: + home_row: + # represent the row ultimately connected to the ALU + input_xbar: + # hash match groups on input xbar + color_aware: true | false | per_flow + color_maprams: + row: + # logical rows + column: | '[' , ... ']' + bus: + vpns: + hash_dist: + # see hash_action hash_dist + type: standard | lpf | red + count: bytes | packets + bytecount_adjust: + # add value to counted bytes + sweep_interval: + global_binding: true | false + per_flow_enable: true | false + context_json: #anything + stateful []: + row: + logical_bus: + # must match the number of rows specified. Indicate the logical bus + # used for each rows. Value can be: 'A' => Action Bus, 'S' => Synth + # Bus, 'O' => Overflow Bus, 'X' => Undefined. + column: | '[' , ... ']' + # srams to use -- in logical (16x6) coords, not physical (8x12) + maprams: | '[' , ... ']' + # map rams to use + vpns: + home_row: + # represent the row ultimately connected to the ALU + hash_dist: + # see hash_action hash_dist + input_xbar: + # exact match group and hash to use for phv input + data_bytemask: + hash_bytemask: + # masks specifying which byte of the phv input come from data and hash + initial_value: { lo : , hi : } + # Specify initial value for register, assumed 0 otherwise + const_table: | '{' : '}' + math_table: + data: | '{' : '}' + invert: true | false + shift: + scale: + log_vpn: | + # vpns to use in stateful logging mode + pred_shift: + pred_comb_shift: + # set the salu_output_pred_shift and _comb_shift csr regs explicitly + # FIXME -- should have a better way of doing this? + actions: + : + - + # SALU instructions to run for this table + context_json: #anything + # jbay additional features: + sbus: + # jbay only -- shared bus use + learn: | '['
, ... ']' + match:
| '['
, ... ']' + combine: "and" | "or" + fifo: { push: , pop: } + stack: { push: , pop: } + bloom filter clear: + # fifo or stack or bloom filter fast clear mode (mutually exclusive) + # is hit | miss | gateway | active | control_plane + # controls when the stack/fifo is pushed or popped + watermark: push | pop + # watermark interrupts sent every pushes or pops + offset_vpn: true | false + # adjust immediate data by vpn offset to compute vpns for multistage + # fifo/stack (jbay only) + address_shift: + # shift up the incoming meter address before vpn/index/subword extract (jbay only) + stage_alu_id: + # stage + alu id to be preprended to output addresses + dependency: concurrent | action | match + # set the interstage dependency between this stage and the + # previous stage. Ignored in stage 0 + error_mode: no_config | propagate | map_to_immediate | disable + always_run_action: + # action that runs automatcially in the stage independent of tables + - + # configuration setting for mpr_stage_id + mpr_stage_id: + # configuration setting for mpr_bus_dep_glob_exec + # A bit that is 0 means treat that global execute bit as pass-through (action dependent), + # because the next stage is action dependent, while a 1 means update it in the current stage. + mpr_bus_dep_glob_exec: + # configuration setting for mpr_bus_dep_long_brch + # A bit that is 0 means treat that long branch tag ID bit as pass-through (action dependent), + # because the next stage is action dependent, while a 1 means update it in the current stage. + mpr_bus_dep_long_brch: + # configuration setting for mpr_always_run + mpr_always_run: + # Note that unspecified values are assumed to be 0. + mpr_next_table_lut: + : # Resolved incoming logical ID to activation bit map + mpr_glob_exec_lut: + : # Resolved incoming global execute bit to activation bit map + mpr_long_brch_lut: + : # Resolved incoming long branch tag ID to activation bit map +deparser : + # Defines a deparser. must be 'ingress' or 'egress' + dictionary: + # ordered list of phv locations to write out as the output deparser + - : + # single value to write iff the referred bit is set + - full_checksum : + # checksum result to write iff the referred bit is set + - : + # constant to write iff the referred bit is set (jbay only) + - clot : + # clot to output (jbay only) + pov: + length: + # maximum length of the clot + : | checksum + # offset in clot to replace with a PHV or checksum value + pov: | '[' , ... ']' + # optional explicit use/ordering of phvs for POV. All phvs used for POV bits + # in the dictionary will be added to the end of this, if not already present + partial_checksum : + : { swap: [, pov: ] } + # checksum unit programming -- pov bits for jbay only + full_checksum : + partial_checksum | clot : { pov: , invert: } + : [ ':' + # more generally, any deparser param that comes from the phv is + # specified this way. Only jbay has pov bits here + # are as follows + select: [ ':' ] + # controls which digest group is output + shift: + : | '[' , ... ']' + # values for a single digest group; specifies the sequence of + # phv containers in the appropriate table entry. this is usually + # data that is included in the digest, but it may also contain + # control metadata; for example, when configuring mirroring on + # Tofino, the first phv container specifies the mirror session id. + context_json: # anything + # ingress or egress params: + mirror: + egress_unicast_port: [ ':' ] + # specifies the port to write to + # FIXME: should this be squashed into the port? + egress_unicast_pipe: [ ':' ] + # specifies the port to write to + drop_ctl: [ ':' ] + # jbay only, ingress or egress + afc: '{' location> ':' '}' + mirr_epipe_port: '{' location> ':' '}' + mirr_c2c_ctrl: '{' location> ':' '}' + mirr_coal_smpl_len: '{' location> ':' '}' + mirr_dond_ctrl: '{' location> ':' '}' + mirr_hash: '{' location> ':' '}' + mirr_icos: '{' location> ':' '}' + mirr_io_sel: '{' location> ':' '}' + mirr_mc_ctrl: '{' location> ':' '}' + mirr_qid: '{' location> ':' '}' + mtu_trunc_err_f: '{' location> ':' '}' + mtu_trunc_len: '{' location> ':' '}' + # ingress only deparser params: + learning: [ ':' ] + resubmit: [ ':' ] + copy_to_cpu: [ ':' ] + egress_multicast_group_: [ ':' ] + hash_lag_ecmp_mcast_: [ ':' ] + copy_to_cpu_cos: [ ':' ] + ingress_port_source: [ ':' ] + deflect_on_drop: [ ':' ] + meter_color: [ ':' ] + icos: [ ':' ] + qid: [ ':' ] + xid: [ ':' ] + yid: [ ':' ] + rid: [ ':' ] + warp: [ ':' ] + ct_disable: [ ':' ] + ct_mcast: [ ':' ] + # jbay ingress only + bypass_egr: '{' location> ':' '}' + # egress only deparser params: [ ':' ] + force_tx_err: [ ':' ] + tx_pkt_has_offsets: [ ':' ] + capture_tx_ts: [ ':' ] + coal: [ ':' ] + ecos: [ ':' ] + copy_to_cpu_cos: [ ':' ] # or c2c_cos + copy_to_cpu_qid: [ ':' ] # or c2c_qid + mirr_bitmap: [ ':' ] + valid_vec: [ ':' ] + # Ingress pipe -> TM fields + # - tableid (1b) -- ??? + # - mcid1 - Multicast Group ID 1 + # - mcid2 - Multicast Group ID 2 + # - hash1 - Hash for L1 (is the the same as hash_lag_ecmp_mcast_?) + # - hash2 - Hash for L2 (is the the same as hash_lag_ecmp_mcast_?) + packet_body_offset: + # TODO: Needed? Maybe just use a fixed header type for PBO? + # Packet body offset + # Payload body offset is: + # base_offset (unsigned) + const_offset (signed) + var_offset (unsigned). + hdr: | + # Header name or ID to use for base offset location + offset: + # Constant (signed) offset to add to the base offset + # Default: 0 + var_off_pos: + # Variable offset: start bit position in POV + # Default: 0 + var_off_len: + # Variable offset: length in POV + # Default: 0 + zero: , + # list of phv slots that should be initialized to (valid) zero + remaining_bridge_metadata: + # packing of remaining bridge metadata + pov_select: + # POV bytes used to address the TCAM & SRAM + config : + # TCAM & SRAM configuration + match: + # POV match (4B) + start: + # start position of the remaining bridge metadata in bridge metadata (6b) + # register: rem_brm_ext_ram.rem_brm_ext[*].rem_brm_start + bytes: '[' | , ... ']' | '{' ':' | , ... '}' + # source PHEs of the remaining bridge metadata + # if a list is used, the items are implicitly addressed (from 0 up) bytes of remaining bridge metadata + # if a map is used, the items are explicitly addressed bytes of remaining bridge metadata + # up to 62 items depending on the remaining bridge metadata start position and the number of POV bytes (8B flags + 8B state) + # if an 8b constant is used as the PHE byte source, its value is directly written to the configuration registers + # if a PHE (slice) name is used as the PHE byte source, it is first mapped to the PHE byte number + # registers: rem_brm_ext_ram.rem_brm_ext[*].b*_phv_sel +flexible_headers: + # Lists the headers that were re-packed by the compiler because + # they were marked flexible. See context.json schema 'flexible_headers' node + # for more information. This section is optional. It exists only if there are + # flexible headers defined in the program (e.g., bridged metadata) + # It consists of the json snippet that is part of context.json verbatim. +primitives: + # Defines the name of the json file that has information on primitives used + # within table actions. These are placed in the respective actions as + # primitives node. This node is mainly used by model for logging + # instructions as specified in original p4 program +dynhash: + # Defines the name of the json file that has the dynamic hash calculation + # node. This node is directly merged into the context json at the top level +# version 1.0.0 +version: + # semantic versioning number +# version 1.0.1 +version: + version: + # semantic versioning number + run_id: + # defines an id that ties together all the files produced by the compiler + # part of the Version section + target: + # specify the target architecture diff --git a/backends/tofino/bf-asm/action_bus.cpp b/backends/tofino/bf-asm/action_bus.cpp new file mode 100644 index 00000000000..6019286c81a --- /dev/null +++ b/backends/tofino/bf-asm/action_bus.cpp @@ -0,0 +1,1204 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "action_bus.h" + +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/stage.h" +#include "lib/hex.h" +#include "misc.h" + +static MeterBus_t MeterBus; + +std::ostream &operator<<(std::ostream &out, const ActionBusSource &src) { + const char *sep = ""; + switch (src.type) { + case ActionBusSource::None: + out << "None"; + break; + case ActionBusSource::Field: + out << "Field("; + for (auto &range : src.field->bits) { + out << sep << range.lo << ".." << range.hi; + sep = ", "; + } + out << ")"; + if (src.field->fmt && src.field->fmt->tbl) + out << " " << src.field->fmt->tbl->find_field(src.field); + break; + case ActionBusSource::HashDist: + out << "HashDist(" << src.hd->hash_group << ", " << src.hd->id << ")"; + break; + case ActionBusSource::HashDistPair: + out << "HashDistPair([" << src.hd_tuple.hd1->hash_group << ", " << src.hd_tuple.hd1->id + << "]," << "[" << src.hd_tuple.hd2->hash_group << ", " << src.hd_tuple.hd2->id + << "])"; + break; + case ActionBusSource::RandomGen: + out << "rng " << src.rng.unit; + break; + case ActionBusSource::TableOutput: + out << "TableOutput(" << (src.table ? src.table->name() : "0") << ")"; + break; + case ActionBusSource::TableColor: + out << "TableColor(" << (src.table ? src.table->name() : "0") << ")"; + break; + case ActionBusSource::TableAddress: + out << "TableAddress(" << (src.table ? src.table->name() : "0") << ")"; + break; + case ActionBusSource::Ealu: + out << "EALU"; + break; + case ActionBusSource::XcmpData: + out << "XCMP(" << src.xcmp_data.xcmp_group << ":" << src.xcmp_data.xcmp_byte << ")"; + break; + case ActionBusSource::NameRef: + out << "NameRef(" << (src.name_ref ? src.name_ref->name : "0") << ")"; + break; + case ActionBusSource::ColorRef: + out << "ColorRef(" << (src.name_ref ? src.name_ref->name : "0") << ")"; + break; + case ActionBusSource::AddressRef: + out << "AddressRef(" << (src.name_ref ? src.name_ref->name : "0") << ")"; + break; + default: + out << ""; + break; + } + return out; +} + +/* identifes which bytes on the action bus are tied together in the hv_xbar input, + * so must be routed together. The second table here is basically just bitcount of + * masks in the first table. */ +static std::array, ACTION_HV_XBAR_SLICES> action_hv_slice_byte_groups = {{ + {0x3, 0x3, 0xc, 0xc, 0xf0, 0xf0, 0xf0, 0xf0, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, + 0xff00, 0xff00}, + {0xf, 0xf, 0xf, 0xf, 0xf0, 0xf0, 0xf0, 0xf0, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, + 0xff00, 0xff00}, + {0xf, 0xf, 0xf, 0xf, 0xf0, 0xf0, 0xf0, 0xf0, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, + 0xff00, 0xff00}, + {0xf, 0xf, 0xf, 0xf, 0xf0, 0xf0, 0xf0, 0xf0, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, + 0xff00, 0xff00}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, + 0xff00, 0xff00}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, + 0xff00, 0xff00}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, + 0xff00, 0xff00}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, 0xff00, + 0xff00, 0xff00}, +}}; + +static std::array, ACTION_HV_XBAR_SLICES> action_hv_slice_group_align = { + {{2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8}, + {4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8}, + {4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8}, + {4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8}, + {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}, + {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}, + {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}, + {8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}}}; + +ActionBus::ActionBus(Table *tbl, VECTOR(pair_t) & data) { + lineno = data.size ? data[0].key.lineno : -1; + for (auto &kv : data) { + if (!CHECKTYPE2(kv.key, tINT, tRANGE)) continue; + unsigned idx = kv.key.type == tRANGE ? kv.key.range.lo : kv.key.i; + if (!CHECKTYPE2M(kv.value, tSTR, tCMD, "field name or slice")) continue; + const char *name = kv.value.s; + value_t *name_ref = &kv.value; + unsigned off = 0, sz = 0; + if (kv.value.type == tCMD) { + BUG_CHECK(kv.value.vec.size > 0 && kv.value[0].type == tSTR); + if (kv.value == "hash_dist" || kv.value == "rng") { + if (!PCHECKTYPE(kv.value.vec.size > 1, kv.value[1], tINT)) continue; + name = kv.value[0].s; + name_ref = nullptr; + } else { + if (!PCHECKTYPE2M(kv.value.vec.size == 2, kv.value[1], tRANGE, tSTR, + "field name or slice")) + continue; + // if ((kv.value[1].range.lo & 7) != 0 || (kv.value[1].range.hi & 7) != 7) { + // error(kv.value.lineno, "Slice must be byte slice"); + // continue; } + name = kv.value[0].s; + name_ref = &kv.value[0]; + if (kv.value[1].type == tRANGE) { + off = kv.value[1].range.lo; + sz = kv.value[1].range.hi - kv.value[1].range.lo + 1; + } else if (kv.value[1] != "color") { + error(kv.value[1].lineno, "unexpected %s", kv.value[1].s); + } + } + } + Table::Format::Field *f = tbl->lookup_field(name, "*"); + ActionBusSource src; + const char *p = name - 1; + while (!f && (p = strchr(p + 1, '.'))) + f = tbl->lookup_field(p + 1, std::string(name, p - name)); + if (!f) { + if (tbl->table_type() == Table::ACTION) { + error(kv.value.lineno, "No field %s in format", name); + continue; + } else if (kv.value == "meter") { + src = ActionBusSource(MeterBus); + if (kv.value.type == tCMD) { + if (kv.value[1] == "color") { + src.type = ActionBusSource::ColorRef; + if (!sz) off = 24, sz = 8; + } else if (kv.value[1] == "address") { + src.type = ActionBusSource::AddressRef; + } + } + } else if (kv.value.type == tCMD && kv.value == "hash_dist") { + if (auto hd = tbl->find_hash_dist(kv.value[1].i)) { + src = ActionBusSource(hd); + } else { + error(kv.value.lineno, "No hash_dist %" PRId64 " in table %s", kv.value[1].i, + tbl->name()); + continue; + } + sz = 16; + for (int i = 2; i < kv.value.vec.size; ++i) { + if (kv.value[i] == "lo" || kv.value[i] == "low") { + src.hd->xbar_use |= HashDistribution::IMMEDIATE_LOW; + } else if (kv.value[i] == "hi" || kv.value[i] == "high") { + src.hd->xbar_use |= HashDistribution::IMMEDIATE_HIGH; + off += 16; + } else if (kv.value[i].type == tINT) { + if (auto hd_hi = tbl->find_hash_dist(kv.value[i].i)) { + src.hd->xbar_use |= HashDistribution::IMMEDIATE_LOW; + hd_hi->xbar_use |= HashDistribution::IMMEDIATE_HIGH; + setup_slot(kv.value.lineno, tbl, name, idx + 2, ActionBusSource(hd_hi), + 16, 16); + setup_slot(kv.value.lineno, tbl, name, idx, + ActionBusSource(src.hd, hd_hi), 32, 0); + } + } else if (kv.value[i].type == tRANGE) { + if ((kv.value[i].range.lo & 7) != 0 || (kv.value[i].range.hi & 7) != 7) + error(kv.value.lineno, "Slice must be byte slice"); + off += kv.value[i].range.lo; + sz = kv.value[i].range.hi - kv.value[i].range.lo + 1; + } else { + error(kv.value[i].lineno, "Unexpected hash_dist %s", + value_desc(kv.value[i])); + break; + } + } + } else if (kv.value.type == tCMD && kv.value == "rng") { + src = ActionBusSource(RandomNumberGen(kv.value[1].i)); + if (kv.value.vec.size > 2 && CHECKTYPE(kv.value[2], tRANGE)) { + off = kv.value[2].range.lo; + sz = kv.value[2].range.hi + 1 - off; + } + } else if (name_ref) { + src = ActionBusSource(new Table::Ref(*name_ref)); + if (kv.value.type == tCMD) { + if (kv.value[1] == "color") { + src.type = ActionBusSource::ColorRef; + if (!sz) off = 24, sz = 8; + } else if (kv.value[1] == "address") { + src.type = ActionBusSource::AddressRef; + } + } + } else if (tbl->format) { + error(kv.value.lineno, "No field %s in format", name); + continue; + } + } else { + src = ActionBusSource(f); + if (!sz) sz = f->size; + if (off + sz > f->size) + error(kv.value.lineno, "Invalid slice of %d bit field %s", f->size, name); + } + if (kv.key.type == tRANGE) { + unsigned size = (kv.key.range.hi - idx + 1) * 8; + // Make slot size (sz) same as no. of bytes allocated on action bus. + if (size > sz) sz = size; + } else if (!sz) { + sz = idx < ACTION_DATA_8B_SLOTS ? 8 + : idx < ACTION_DATA_8B_SLOTS + 2 * ACTION_DATA_16B_SLOTS ? 16 + : 32; + } + setup_slot(kv.key.lineno, tbl, name, idx, src, sz, off); + tbl->apply_to_field( + name, [](Table::Format::Field *f) { f->flags |= Table::Format::Field::USED_IMMED; }); + if (f) { + auto &slot = by_byte.at(idx); + tbl->apply_to_field(name, [&slot, tbl, off](Table::Format::Field *f) { + ActionBusSource src(f); + if (slot.data.emplace(src, off).second) { + LOG4(" data += " << src.toString(tbl) << " off=" << off); + } + }); + } + } +} + +std::unique_ptr ActionBus::create() { + return std::unique_ptr(new ActionBus()); +} + +std::unique_ptr ActionBus::create(Table *tbl, VECTOR(pair_t) & data) { + return std::unique_ptr(new ActionBus(tbl, data)); +} + +void ActionBus::setup_slot(int lineno, Table *tbl, const char *name, unsigned idx, + ActionBusSource src, unsigned sz, unsigned off) { + if (idx >= ACTION_DATA_BUS_BYTES) { + error(lineno, "Action bus index out of range"); + return; + } + if (by_byte.count(idx)) { + auto &slot = by_byte.at(idx); + if (sz > slot.size) { + slot.name = name; + slot.size = sz; + } + slot.data.emplace(src, off); + LOG4("ActionBus::ActionBus: " << idx << ": " << name << " sz=" << sz + << " data += " << src.toString(tbl) << " off=" << off); + } else { + by_byte.emplace(idx, Slot(name, idx, sz, src, off)); + LOG4("ActionBus::ActionBus: " << idx << ": " << name << " sz=" << sz + << " data = " << src.toString(tbl) << " off=" << off); + } +} + +unsigned ActionBus::Slot::lo(Table *tbl) const { + int rv = -1; + for (auto &src : data) { + int off = src.second; + if (src.first.type == ActionBusSource::Field) off += src.first.field->immed_bit(0); + BUG_CHECK(rv < 0 || rv == off); + rv = off; + } + BUG_CHECK(rv >= 0); + return rv; +} + +bool ActionBus::compatible(const ActionBusSource &a, unsigned a_off, const ActionBusSource &b, + unsigned b_off) { + if ((a.type == ActionBusSource::HashDist) && (b.type == ActionBusSource::HashDistPair)) { + return ((compatible(a, a_off, ActionBusSource(b.hd_tuple.hd1), b_off)) || + (compatible(a, a_off, ActionBusSource(b.hd_tuple.hd2), b_off + 16))); + } else if ((a.type == ActionBusSource::HashDistPair) && (b.type == ActionBusSource::HashDist)) { + return ((compatible(ActionBusSource(a.hd_tuple.hd1), a_off, b, b_off)) || + (compatible(ActionBusSource(a.hd_tuple.hd2), a_off + 16, b, b_off))); + } + if (a.type != b.type) return false; + switch (a.type) { + case ActionBusSource::Field: + // corresponding fields in different groups are compatible even though they + // are at different locations. Table::Format::pass1 checks that + if (a.field->by_group == b.field->by_group) return true; + return a.field->bit(a_off) == b.field->bit(b_off); + case ActionBusSource::HashDist: + return a.hd->hash_group == b.hd->hash_group && a.hd->id == b.hd->id && a_off == b_off; + case ActionBusSource::HashDistPair: + return ((a.hd_tuple.hd1->hash_group == b.hd_tuple.hd1->hash_group && + a.hd_tuple.hd1->id == b.hd_tuple.hd1->id) && + (a_off == b_off) && + (a.hd_tuple.hd2->hash_group == b.hd_tuple.hd2->hash_group && + a.hd_tuple.hd2->id == b.hd_tuple.hd2->id)); + case ActionBusSource::TableOutput: + return a.table == b.table; + default: + return false; + } +} + +void ActionBus::pass1(Table *tbl) { + bool is_immed_data = dynamic_cast(tbl) != nullptr; + LOG1("ActionBus::pass1(" << tbl->name() << ")" << (is_immed_data ? " [immed]" : "")); + if (lineno < 0) + lineno = tbl->format && tbl->format->lineno >= 0 ? tbl->format->lineno : tbl->lineno; + Slot *use[ACTION_DATA_BUS_SLOTS] = {0}; + for (auto &slot : Values(by_byte)) { + for (auto it = slot.data.begin(); it != slot.data.end();) { + if (it->first.type >= ActionBusSource::NameRef && + it->first.type <= ActionBusSource::AddressRef) { + // Remove all NameRef and replace with TableOutputs or Fields + // ColorRef turns into TableColor, AddressRef into TableAddress + if (it->first.name_ref) { + bool ok = false; + if (*it->first.name_ref) { + ActionBusSource src(*it->first.name_ref); + switch (it->first.type) { + case ActionBusSource::NameRef: + src.table->set_output_used(); + break; + case ActionBusSource::ColorRef: + src.type = ActionBusSource::TableColor; + src.table->set_color_used(); + break; + case ActionBusSource::AddressRef: + src.type = ActionBusSource::TableAddress; + src.table->set_address_used(); + break; + default: + BUG(); + } + slot.data[src] = it->second; + ok = true; + } else if (tbl->actions) { + Table::Format::Field *found_field = nullptr; + Table::Actions::Action *found_act = nullptr; + for (auto &act : *tbl->actions) { + int lo = -1, hi = -1; + auto name = act.alias_lookup(it->first.name_ref->lineno, + it->first.name_ref->name, lo, hi); + if (auto *field = tbl->lookup_field(name, act.name)) { + if (found_field) { + if (field != found_field || + slot.data.at(ActionBusSource(field)) != it->second + lo) + error(it->first.name_ref->lineno, + "%s has incompatible " + "aliases in actions %s and %s", + it->first.name_ref->name.c_str(), + found_act->name.c_str(), act.name.c_str()); + } else { + found_act = &act; + found_field = field; + slot.data[ActionBusSource(field)] = it->second + lo; + ok = true; + } + } + } + } + if (!ok) + error(it->first.name_ref->lineno, "No format field or table named %s", + it->first.name_ref->name.c_str()); + } else { + auto att = tbl->get_attached(); + if (!att || att->meters.empty()) { + error(lineno, "No meter table attached to %s", tbl->name()); + } else if (att->meters.size() > 1) { + error(lineno, "Multiple meter tables attached to %s", tbl->name()); + } else { + ActionBusSource src(att->meters.at(0)); + switch (it->first.type) { + case ActionBusSource::NameRef: + src.table->set_output_used(); + break; + case ActionBusSource::ColorRef: + src.type = ActionBusSource::TableColor; + src.table->set_color_used(); + break; + case ActionBusSource::AddressRef: + src.type = ActionBusSource::TableAddress; + src.table->set_address_used(); + break; + default: + BUG(); + } + slot.data[src] = it->second; + } + } + it = slot.data.erase(it); + } else { + if (it->first.type == ActionBusSource::TableColor) + it->first.table->set_color_used(); + if (it->first.type == ActionBusSource::TableOutput) + it->first.table->set_output_used(); + ++it; + } + } + if (error_count > 0) continue; + auto first = slot.data.begin(); + if (first != slot.data.end()) { + for (auto it = next(first); it != slot.data.end(); ++it) { + if (!compatible(first->first, first->second, it->first, it->second)) + error(lineno, "Incompatible action bus entries at offset %d", slot.byte); + } + } + int slotno = Stage::action_bus_slot_map[slot.byte]; + for (unsigned byte = slot.byte; byte < slot.byte + slot.size / 8U; + byte += Stage::action_bus_slot_size[slotno++] / 8U) { + if (slotno >= ACTION_DATA_BUS_SLOTS) { + error(lineno, "%s extends past the end of the actions bus", slot.name.c_str()); + break; + } + if (auto tbl_in_slot = tbl->stage->action_bus_use[slotno]) { + if (tbl_in_slot != tbl) { + if (!(check_atcam_sharing(tbl, tbl_in_slot) || + check_slot_sharing(slot, tbl->stage->action_bus_use_bit_mask))) + warning(lineno, "Action bus byte %d set in table %s and table %s", byte, + tbl->name(), tbl->stage->action_bus_use[slotno]->name()); + } + } else { + tbl->stage->action_bus_use[slotno] = tbl; + // Set a per-byte mask on the action bus bytes to indicate which + // bits in bytes are being used. A slot can be shared among + // tables which dont overlap any bits. The code assumes the + // action bus allocation is byte aligned (and sets the mask to + // 0xF), while this could ideally be not the case. In that + // event, the mask must be set accordingly. This will require + // additional logic to determine which bits in the byte are used + // or additional syntax in the action bus assembly output. + tbl->stage->action_bus_use_bit_mask.setrange(slot.byte * 8U, slot.size); + } + if (use[slotno]) { + BUG_CHECK(!slot.data.empty() && !use[slotno]->data.empty()); + auto nsrc = slot.data.begin()->first; + unsigned noff = slot.data.begin()->second; + unsigned nstart = 8 * (byte - slot.byte) + noff; + if (nsrc.type == ActionBusSource::Field) nstart = nsrc.field->immed_bit(nstart); + auto osrc = use[slotno]->data.begin()->first; + unsigned ooff = use[slotno]->data.begin()->second; + unsigned ostart = 8 * (byte - use[slotno]->byte) + ooff; + if (osrc.type == ActionBusSource::Field) { + if (ostart < osrc.field->size) + ostart = osrc.field->immed_bit(ostart); + else + ostart += osrc.field->immed_bit(0); + } + if (ostart != nstart) + error(lineno, + "Action bus byte %d used inconsistently for fields %s and " + "%s in table %s", + byte, use[slotno]->name.c_str(), slot.name.c_str(), tbl->name()); + } else { + use[slotno] = &slot; + } + unsigned hi = slot.lo(tbl) + slot.size - 1; + if (action_hv_slice_use.size() <= hi / 128U) action_hv_slice_use.resize(hi / 128U + 1); + auto &hv_groups = action_hv_slice_byte_groups.at(slot.byte / 16); + for (unsigned byte = slot.lo(tbl) / 8U; byte <= hi / 8U; ++byte) { + byte_use[byte] = 1; + action_hv_slice_use.at(byte / 16).at(slot.byte / 16) |= hv_groups.at(byte % 16); + } + } + } +} + +bool ActionBus::check_slot_sharing(Slot &slot, bitvec &action_bus) { + return (action_bus.getrange(slot.byte * 8U, slot.size) == 0); +} + +bool ActionBus::check_atcam_sharing(Table *tbl1, Table *tbl2) { + bool atcam_share_bytes = false; + bool atcam_action_share_bytes = false; + // Check tables are not same atcam's sharing bytes on action bus + if (tbl1->to() && tbl2->to() && + tbl1->p4_table->p4_name() == tbl2->p4_table->p4_name()) + atcam_share_bytes = true; + // Check tables are not same atcam action tables sharing bytes on action bus + if (auto tbl1_at = tbl1->to()) { + if (auto tbl2_at = tbl2->to()) { + auto tbl1_mt = tbl1_at->get_match_table(); + auto tbl2_mt = tbl2_at->get_match_table(); + if (tbl1_mt->p4_table->p4_name() == tbl2_mt->p4_table->p4_name()) + atcam_action_share_bytes = true; + } + } + return (atcam_share_bytes || atcam_action_share_bytes); +} + +void ActionBus::need_alloc(Table *tbl, const ActionBusSource &src, unsigned lo, unsigned hi, + unsigned size) { + LOG3("need_alloc(" << tbl->name() << ") " << src << " lo=" << lo << " hi=" << hi << " size=0x" + << hex(size)); + need_place[src][lo] |= size; + switch (src.type) { + case ActionBusSource::Field: + lo += src.field->immed_bit(0); + break; + case ActionBusSource::TableOutput: + src.table->set_output_used(); + break; + case ActionBusSource::TableColor: + src.table->set_color_used(); + break; + case ActionBusSource::TableAddress: + src.table->set_address_used(); + break; + case ActionBusSource::XcmpData: + break; + default: + break; + } + byte_use.setrange(lo / 8U, size); +} + +/** + * find_free -- find a free slot on the action output bus for some data. Looks through bytes + * in the range min..max for a free space where we can put 'bytes' bytes from an action + * input bus starting at 'lobyte'. 'step' is an optimization to only check every step bytes + * as we know alignment restrictions mean those are the only possible aligned spots + */ +int ActionBus::find_free(Table *tbl, unsigned min, unsigned max, unsigned step, unsigned lobyte, + unsigned bytes) { + unsigned avail; + LOG4("find_free(" << min << ", " << max << ", " << step << ", " << lobyte << ", " << bytes + << ")"); + for (unsigned i = min; i + bytes - 1 <= max; i += step) { + unsigned hv_slice = i / ACTION_HV_XBAR_SLICE_SIZE; + auto &hv_groups = action_hv_slice_byte_groups.at(hv_slice); + int mask1 = action_hv_slice_group_align.at(hv_slice).at(lobyte % 16U) - 1; + int mask2 = action_hv_slice_group_align.at(hv_slice).at((lobyte + bytes - 1) % 16U) - 1; + if ((i ^ lobyte) & mask1) continue; // misaligned + bool inuse = false; + for (unsigned byte = lobyte & ~mask1; byte <= ((lobyte + bytes - 1) | mask2); ++byte) { + if (!byte_use[byte]) continue; + if (action_hv_slice_use.size() <= byte / 16U) + action_hv_slice_use.resize(byte / 16U + 1); + if (action_hv_slice_use.at(byte / 16U).at(hv_slice) & hv_groups.at(byte % 16U)) { + LOG5(" input byte " << byte << " in use for hv_slice " << hv_slice); + inuse = true; + break; + } + } + if (inuse) { + // skip up to next hv_slice + while ((i + step) / ACTION_HV_XBAR_SLICE_SIZE == hv_slice) i += step; + continue; + } + for (unsigned byte = i & ~mask1; byte <= ((i + bytes - 1) | mask2); ++byte) + if (tbl->stage->action_bus_use[Stage::action_bus_slot_map[byte]]) { + LOG5(" output byte " + << byte << " in use by " + << tbl->stage->action_bus_use[Stage::action_bus_slot_map[byte]]->name()); + inuse = true; + break; + } + if (inuse) continue; + for (avail = 1; avail < bytes; avail++) + if (tbl->stage->action_bus_use[Stage::action_bus_slot_map[i + avail]]) break; + if (avail >= bytes) return i; + } + return -1; +} + +/** + * find_merge -- find any adjacent/overlapping data on the action input bus that means the + * data at 'offset' actually already on the action output bus + * offset offset (in bits) on the action input bus of the data we're interested in + * bytes how many bytes of data on the action input bus + * use bitmask of the sizes of phv that need to access this on the action output bus + */ +int ActionBus::find_merge(Table *tbl, int offset, int bytes, int use) { + LOG4("find_merge(" << offset << ", " << bytes << ", " << use << ")"); + bool is_action_data = dynamic_cast(tbl) != nullptr; + for (auto &alloc : by_byte) { + if (use & 1) { + if (alloc.first >= 32) break; + } else if (use & 2) { + if (alloc.first < 32) continue; + if (alloc.first >= 96) break; + } + if (alloc.second.is_table_output()) continue; // can't merge table output with immediate + int inbyte = alloc.second.lo(tbl) / 8U; + int align = 4; + if (is_action_data) + align = action_hv_slice_group_align.at(alloc.first / 16U).at(inbyte % 16U); + int outbyte = alloc.first & ~(align - 1); + inbyte &= ~(align - 1); + if (offset >= inbyte * 8 && offset + bytes * 8 <= (inbyte + align) * 8) + return outbyte + offset / 8 - inbyte; + } + return -1; +} + +void ActionBus::do_alloc(Table *tbl, ActionBusSource src, unsigned use, int lobyte, int bytes, + unsigned offset) { + LOG2("putting " << src << '(' << offset << ".." << (offset + bytes * 8 - 1) << ")[" + << (lobyte * 8) << ".." << ((lobyte + bytes) * 8 - 1) << "] at action_bus " + << use); + unsigned hv_slice = use / ACTION_HV_XBAR_SLICE_SIZE; + auto &hv_groups = action_hv_slice_byte_groups.at(hv_slice); + for (unsigned byte = lobyte; byte < unsigned(lobyte + bytes); ++byte) { + if (action_hv_slice_use.size() <= byte / 16) action_hv_slice_use.resize(byte / 16 + 1); + action_hv_slice_use.at(byte / 16).at(hv_slice) |= hv_groups.at(byte % 16); + } + while (bytes > 0) { + int slot = Stage::action_bus_slot_map[use]; + int slotsize = Stage::action_bus_slot_size[slot]; + auto slot_tbl = tbl->stage->action_bus_use[slot]; + // Atcam tables are mutually exclusive and should be allowed to share + // bytes on action bus + if (slot_tbl && !Table::allow_bus_sharing(tbl, slot_tbl)) + BUG_CHECK(slot_tbl == tbl || slot_tbl->action_bus->by_byte.at(use).data.count(src)); + tbl->stage->action_bus_use[slot] = tbl; + Slot &sl = by_byte.emplace(use, Slot(src.name(tbl), use, bytes * 8U)).first->second; + if (sl.size < bytes * 8U) sl.size = bytes * 8U; + sl.data.emplace(src, offset); + LOG4(" slot " << sl.byte << "(" << sl.name << ") data += " << src.toString(tbl) + << " off=" << offset); + offset += slotsize; + bytes -= slotsize / 8U; + use += slotsize / 8U; + } +} + +const unsigned ActionBus::size_masks[8] = {7, 7, 15, 15, 31, 31, 31, 31}; + +void ActionBus::alloc_field(Table *tbl, ActionBusSource src, unsigned offset, + unsigned sizes_needed) { + LOG4("alloc_field(" << src << ", " << offset << ", " << sizes_needed << ")"); + int lineno = this->lineno; + bool is_action_data = dynamic_cast(tbl) != nullptr; + int lo, hi, use; + bool can_merge = true; + if (src.type == ActionBusSource::Field) { + lo = src.field->immed_bit(offset); + hi = src.field->immed_bit(src.field->size) - 1; + lineno = tbl->find_field_lineno(src.field); + } else { + lo = offset; + if (src.type == ActionBusSource::TableOutput || src.type == ActionBusSource::TableColor || + src.type == ActionBusSource::TableAddress || src.type == ActionBusSource::RandomGen) + can_merge = false; + if (src.type == ActionBusSource::HashDist && + !(src.hd->xbar_use & HashDistribution::IMMEDIATE_LOW)) + lo += 16; + hi = lo | size_masks[sizes_needed]; + } + if (lo / 32U != hi / 32U) { + /* Can't go across 32-bit boundary so chop it down as needed */ + hi = lo | 31U; + } + int bytes = hi / 8U - lo / 8U + 1; + int step = 4; + if (is_action_data) step = (lo % 128U) < 32 ? 2 : (lo % 128U) < 64 ? 4 : 8; + if (sizes_needed & 1) { + /* need 8-bit */ + if ((lo % 8U) && (lo / 8U != hi / 8U)) { + error(lineno, + "%s not correctly aligned for 8-bit use on " + "action bus", + src.toString(tbl).c_str()); + return; + } + unsigned start = (lo / 8U) % step; + int bytes_needed = (sizes_needed & 4) ? bytes : 1; + if ((use = find(tbl->stage, src, lo, hi, 1)) >= 0 || + (can_merge && (use = find_merge(tbl, lo, bytes_needed, 1)) >= 0) || + (use = find_free(tbl, start, 31, step, lo / 8U, bytes_needed)) >= 0) + do_alloc(tbl, src, use, lo / 8U, bytes_needed, offset); + else + error(lineno, "Can't allocate space on 8-bit part of action bus for %s", + src.toString(tbl).c_str()); + } + step = (lo % 128U) < 64 ? 4 : 8; + if (sizes_needed & 2) { + /* need 16-bit */ + if (lo % 16U) { + if (lo / 16U != hi / 16U) { + error(lineno, + "%s not correctly aligned for 16-bit use " + "on action bus", + src.toString(tbl).c_str()); + return; + } + if (can_merge && (use = find_merge(tbl, lo, bytes, 2)) >= 0) { + do_alloc(tbl, src, use, lo / 8U, bytes, offset); + return; + } + } + if (!(sizes_needed & 4) && bytes > 2) bytes = 2; + unsigned start = 32 + (lo / 8U) % step; + if ((use = find(tbl->stage, src, lo, hi, 2)) >= 0 || + (can_merge && (use = find_merge(tbl, lo, bytes, 2)) >= 0) || + (use = find_free(tbl, start, 63, step, lo / 8U, bytes)) >= 0 || + (use = find_free(tbl, start + 32, 95, 8, lo / 8U, bytes)) >= 0) + do_alloc(tbl, src, use, lo / 8U, bytes, offset); + else + error(lineno, "Can't allocate space on 16-bit part of action bus for %s", + src.toString(tbl).c_str()); + } + if (sizes_needed == 4) { + /* need only 32-bit */ + unsigned odd = (lo / 8U) & (4 & step); + unsigned start = (lo / 8U) % step; + if (lo % 32U) { + if (can_merge && (use = find_merge(tbl, lo, bytes, 4)) >= 0) { + do_alloc(tbl, src, use, lo / 8U, bytes, offset); + return; + } + } + if ((use = find(tbl->stage, src, lo, hi, 4)) >= 0 || + (can_merge && (use = find_merge(tbl, lo, bytes, 4)) >= 0) || + (use = find_free(tbl, 96 + start + odd, 127, 8, lo / 8U, bytes)) >= 0 || + (use = find_free(tbl, 64 + start + odd, 95, 8, lo / 8U, bytes)) >= 0 || + (use = find_free(tbl, 32 + start, 63, step, lo / 8U, bytes)) >= 0 || + (use = find_free(tbl, 0 + start, 31, step, lo / 8U, bytes)) >= 0) + do_alloc(tbl, src, use, lo / 8U, bytes, offset); + else + error(lineno, "Can't allocate space on action bus for %s", src.toString(tbl).c_str()); + } +} + +void ActionBus::pass3(Table *tbl) { + bool is_action_data = dynamic_cast(tbl) != nullptr; + LOG1("ActionBus::pass3(" << tbl->name() << ") " << (is_action_data ? "[action]" : "[immed]")); + for (auto &d : need_place) + for (auto &bits : d.second) alloc_field(tbl, d.first, bits.first, bits.second); + int rnguse = -1; + for (auto &slot : by_byte) { + for (auto &d : slot.second.data) { + if (d.first.type == ActionBusSource::RandomGen) { + if (rnguse >= 0 && rnguse != d.first.rng.unit) + error(lineno, "Can't use both rng units in a single table"); + rnguse = d.first.rng.unit; + } + } + } +} + +static int slot_sizes[] = { + 5, /* 8-bit or 32-bit */ + 6, /* 16-bit or 32-bit */ + 6, /* 16-bit or 32-bit */ + 4 /* 32-bit only */ +}; + +/** + * ActionBus::find + * @brief find an action bus slot that contains the requested thing. + * + * Overloads allow looking for different kinds of things -- a Format::Field, + * a HashDistribution, a RandomNumberGen, or something by name (generally a table output). + * @param f a Format::Field to look for + * @param name named slot to look for -- generally a table output, but may be a field + * @param hd a HashDistribution to look for + * @param rng a RandomNumberGen to look for + * @param lo, hi range of bits in the thing specified by the first arg + * @param size bitmask of needed size classes -- 3 bits that denote need for a 8/16/32 bit + * actionbus slot. Generally will only have 1 bit set, but might be 0. + */ +int ActionBus::find(const char *name, TableOutputModifier mod, int lo, int hi, int size, int *len) { + if (auto *tbl = ::get(Table::all, name)) + return find(ActionBusSource(tbl, mod), lo, hi, size, -1, len); + if (mod != TableOutputModifier::NONE) return -1; + for (auto &slot : by_byte) { + int offset = lo; + if (slot.second.name != name) continue; + if (size && !(size & static_cast(slot_sizes[slot.first / 32U]))) continue; + if (offset >= static_cast(slot.second.size)) continue; + if (len) *len = slot.second.size; + return slot.first + offset / 8; + } + return -1; +} + +int ActionBus::find(const ActionBusSource &src, int lo, int hi, int size, int pos, int *len) { + bool hd1Found = true; + int hd1Pos = -1; + for (auto &slot : by_byte) { + if (!slot.second.data.count(src)) continue; + int offset = slot.second.data[src]; + // FIXME -- HashDist is 16 bits in either half of the 32-bit immediate path; we call + // the high half (16..31), but we address it directly (as if it was 16 bits) for + // non-32 bit accesses. So we ignore the top bit of the offset bit index when + // accessing it for 8- or 16- bit slots. + // There should be a better way of doing this. + if ((src.type == ActionBusSource::HashDist || src.type == ActionBusSource::HashDistPair) && + size < 4) + offset &= 15; + // Table Color is 8 bits which is ORed into the top of the immediate; The offset is + // thus >= 24, but we want to ignore that here and just use the offset within the byte + if (src.type == ActionBusSource::TableColor) offset &= 7; + if (offset > lo) continue; + if (offset + static_cast(slot.second.size) <= hi) continue; + if (size && !(size & slot_sizes[slot.first / 32U])) continue; + if (len) *len = slot.second.size; + auto bus_pos = slot.first + (lo - offset) / 8; + if (pos >= 0 && bus_pos != pos) continue; + return bus_pos; + } + return -1; +} + +int ActionBus::find(Stage *stage, ActionBusSource src, int lo, int hi, int size, int *len) { + int rv = -1; + for (auto tbl : stage->tables) + if (tbl->action_bus && (rv = tbl->action_bus->find(src, lo, hi, size, -1, len)) >= 0) + return rv; + return rv; +} + +template +void ActionBus::write_action_regs(REGS ®s, Table *tbl, int home_row, unsigned action_slice) { + LOG2("--- ActionBus write_action_regs(" << tbl->name() << ", " << home_row << ", " + << action_slice << ")"); + bool is_action_data = dynamic_cast(tbl) != nullptr; + auto &action_hv_xbar = regs.rams.array.row[home_row / 2].action_hv_xbar; + unsigned side = home_row % 2; /* 0 == left, 1 == right */ + for (auto &el : by_byte) { + if (!is_action_data && !el.second.is_table_output()) { + // Nasty hack -- meter/stateful output uses the action bus on the meter row, + // so we need this routine to set it up, but we only want to do it for the + // meter bus output; the rest of this ActionBus is for immediate data (set + // up by write_immed_regs below) + continue; + } + LOG5(" " << el.first << ": " << el.second); + unsigned byte = el.first; + BUG_CHECK(byte == el.second.byte); + unsigned slot = Stage::action_bus_slot_map[byte]; + unsigned bit = 0, size = 0; + std::string srcname; + for (auto &data : el.second.data) { + // FIXME -- this loop feels like a hack -- the size SHOULD already be set in + // el.second.size (the max of the sizes of everything in the data we're looping + // over), so should not need recomputing. We do need to figure out the source + // bit location, and ignore things in other wide words, but that should be stored + // in the Slot object? What about wired-ors, writing two inputs to the same + // slot -- it is possible but is it useful? + unsigned data_bit = 0, data_size = 0; + if (data.first.type == ActionBusSource::Field) { + auto f = data.first.field; + if ((f->bit(data.second) >> 7) != action_slice) continue; + data_bit = f->bit(data.second) & 0x7f; + data_size = std::min(el.second.size, f->size - data.second); + srcname = "field " + tbl->find_field(f); + } else if (data.first.type == ActionBusSource::TableOutput) { + if (data.first.table->home_row() != home_row) { + // skip tables not on this home row + continue; + } + data_bit = data.second; + data_size = el.second.size; + srcname = "table " + data.first.table->name_; + } else { + // HashDist and RandomGen only work in write_immed_regs + BUG(); + } + LOG3(" byte " << byte << " (slot " << slot << "): " << srcname << " (" << data.second + << ".." << (data.second + data_size - 1) << ")" << " [" << data_bit + << ".." << (data_bit + data_size - 1) << "]"); + if (size) { + BUG_CHECK(bit == data_bit); // checked in pass1; maintained by pass3 + size = std::max(size, data_size); + } else { + bit = data_bit; + size = data_size; + } + } + if (size == 0) continue; + if (bit + size > 128) { + error(lineno, + "Action bus setup can't deal with field %s split across " + "SRAM rows", + el.second.name.c_str()); + continue; + } + unsigned bytemask = (1U << ((size + 7) / 8U)) - 1; + switch (Stage::action_bus_slot_size[slot]) { + case 8: + for (unsigned sbyte = bit / 8; sbyte <= (bit + size - 1) / 8; + sbyte++, byte++, slot++) { + unsigned code = 0, mask = 0; + switch (sbyte >> 2) { + case 0: + code = sbyte >> 1; + mask = 1; + break; + case 1: + code = 2; + mask = 3; + break; + case 2: + case 3: + code = 3; + mask = 7; + break; + default: + BUG(); + } + if ((sbyte ^ byte) & mask) { + error(lineno, "Can't put field %s into byte %d on action xbar", + el.second.name.c_str(), byte); + break; + } + auto &ctl = action_hv_xbar.action_hv_ixbar_ctl_byte[side]; + switch (code) { + case 0: + ctl.action_hv_ixbar_ctl_byte_1to0_ctl = slot / 2; + ctl.action_hv_ixbar_ctl_byte_1to0_enable = 1; + break; + case 1: + ctl.action_hv_ixbar_ctl_byte_3to2_ctl = slot / 2; + ctl.action_hv_ixbar_ctl_byte_3to2_enable = 1; + break; + case 2: + ctl.action_hv_ixbar_ctl_byte_7to4_ctl = slot / 4; + ctl.action_hv_ixbar_ctl_byte_7to4_enable = 1; + break; + case 3: + ctl.action_hv_ixbar_ctl_byte_15to8_ctl = slot / 8; + ctl.action_hv_ixbar_ctl_byte_15to8_enable = 1; + break; + } + if (!(bytemask & 1)) + LOG1("WARNING: " << SrcInfo(lineno) << ": putting " << el.second.name + << " on action bus byte " << byte + << " even though bit in bytemask is " + "not set"); + action_hv_xbar.action_hv_ixbar_input_bytemask[side] |= 1 << sbyte; + bytemask >>= 1; + } + break; + case 16: + byte &= ~1; + slot -= ACTION_DATA_8B_SLOTS; + bytemask <<= ((bit / 8) & 1); + for (unsigned word = bit / 16; word <= (bit + size - 1) / 16; + word++, byte += 2, slot++) { + unsigned code = 0, mask = 0; + switch (word >> 1) { + case 0: + code = 1; + mask = 3; + break; + case 1: + code = 2; + mask = 3; + break; + case 2: + case 3: + code = 3; + mask = 7; + break; + default: + BUG(); + } + if (((word << 1) ^ byte) & mask) { + error(lineno, "Can't put field %s into byte %d on action xbar", + el.second.name.c_str(), byte); + break; + } + auto &ctl = action_hv_xbar.action_hv_ixbar_ctl_halfword[slot / 8][side]; + unsigned subslot = slot % 8U; + switch (code) { + case 1: + ctl.action_hv_ixbar_ctl_halfword_3to0_ctl = subslot / 2; + ctl.action_hv_ixbar_ctl_halfword_3to0_enable = 1; + break; + case 2: + ctl.action_hv_ixbar_ctl_halfword_7to4_ctl = subslot / 2; + ctl.action_hv_ixbar_ctl_halfword_7to4_enable = 1; + break; + case 3: + ctl.action_hv_ixbar_ctl_halfword_15to8_ctl = subslot / 4; + ctl.action_hv_ixbar_ctl_halfword_15to8_enable = 1; + break; + } + action_hv_xbar.action_hv_ixbar_input_bytemask[side] |= (bytemask & 3) + << (word * 2); + bytemask >>= 2; + } + break; + case 32: { + byte &= ~3; + slot -= ACTION_DATA_8B_SLOTS + ACTION_DATA_16B_SLOTS; + unsigned word = bit / 32; + unsigned code = 1 + word / 2; + bit %= 32; + bytemask <<= bit / 8; + if (((word << 2) ^ byte) & 7) { + error(lineno, "Can't put field %s into byte %d on action xbar", + el.second.name.c_str(), byte); + break; + } + auto &ctl = action_hv_xbar.action_hv_ixbar_ctl_word[slot / 4][side]; + slot %= 4U; + switch (code) { + case 1: + ctl.action_hv_ixbar_ctl_word_7to0_ctl = slot / 2; + ctl.action_hv_ixbar_ctl_word_7to0_enable = 1; + break; + case 2: + ctl.action_hv_ixbar_ctl_word_15to8_ctl = slot / 2; + ctl.action_hv_ixbar_ctl_word_15to8_enable = 1; + break; + } + action_hv_xbar.action_hv_ixbar_input_bytemask[side] |= (bytemask & 15) + << (word * 4); + bytemask >>= 4; + break; + } + default: + BUG(); + } + if (bytemask) + LOG1("WARNING: " << SrcInfo(lineno) << ": excess bits " << hex(bytemask) + << " set in bytemask for " << el.second.name); + } +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void ActionBus::write_action_regs, mau_regs &, + Table *, int, unsigned) + +template +void ActionBus::write_immed_regs(REGS ®s, Table *tbl) { + LOG2("--- ActionBus write_immed_regs(" << tbl->name() << ")"); + auto &adrdist = regs.rams.match.adrdist; + int tid = tbl->logical_id; + unsigned rngmask = 0; + for (auto &f : by_byte) { + if (f.second.is_table_output()) continue; + LOG5(" " << f.first << ": " << f.second); + int slot = Stage::action_bus_slot_map[f.first]; + unsigned off = 0; + unsigned size = f.second.size; + if (!f.second.data.empty()) { + off = f.second.data.begin()->second; + if (f.second.data.begin()->first.type == ActionBusSource::Field) + off -= f.second.data.begin()->first.field->immed_bit(0); + for (auto &d : f.second.data) { + if (d.first.type == ActionBusSource::RandomGen) { + rngmask |= d.first.rng.unit << 4; + rngmask |= ((1 << (size / 8)) - 1) << d.second / 8; + } + } + } + switch (Stage::action_bus_slot_size[slot]) { + case 8: + for (unsigned b = off / 8; b <= (off + size - 1) / 8; b++) { + BUG_CHECK((b & 3) == (slot & 3)); + adrdist.immediate_data_8b_enable[tid / 8] |= 1U << ((tid & 7) * 4 + b); + // we write these ctl regs twice if we use both bytes in a pair. That will + // cause a WARNING in the log file if both uses are the same -- it should be + // impossible to get an ERROR for conflicting uses, as that should have caused + // an error in pass1 above, and never made it to this point. + setup_muxctl(adrdist.immediate_data_8b_ixbar_ctl[tid * 2 + b / 2], slot++ / 4); + } + break; + case 16: + slot -= ACTION_DATA_8B_SLOTS; + for (unsigned w = off / 16; w <= (off + size - 1) / 16; w++) { + BUG_CHECK((w & 1) == (slot & 1)); + setup_muxctl(adrdist.immediate_data_16b_ixbar_ctl[tid * 2 + w], slot++ / 2); + } + break; + case 32: + slot -= ACTION_DATA_8B_SLOTS + ACTION_DATA_16B_SLOTS; + setup_muxctl(adrdist.immediate_data_32b_ixbar_ctl[tid], slot); + break; + default: + BUG(); + } + } + if (rngmask) { + regs.rams.match.adrdist.immediate_data_rng_enable = 1; + regs.rams.match.adrdist.immediate_data_rng_logical_map_ctl[tbl->logical_id / 4] + .set_subfield(rngmask, 5 * (tbl->logical_id % 4U), 5); + } +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void ActionBus::write_immed_regs, mau_regs &, + Table *) + +std::string ActionBusSource::name(Table *tbl) const { + switch (type) { + case Field: + return tbl->find_field(field); + case TableOutput: + case TableColor: + case TableAddress: + return table->name(); + case NameRef: + case ColorRef: + case AddressRef: + return name_ref->name; + default: + return ""; + } +} + +std::string ActionBusSource::toString(Table *tbl) const { + std::stringstream tmp; + switch (type) { + case None: + return ""; + case Field: + return tbl->find_field(field); + case HashDist: + tmp << "hash_dist " << hd->id; + return tmp.str(); + case RandomGen: + tmp << "rng " << rng.unit; + return tmp.str(); + case TableOutput: + return table->name(); + case TableColor: + return table->name_ + " color"; + case TableAddress: + return table->name_ + " address"; + case Ealu: + return "ealu"; + case XcmpData: + tmp << "xcmp(" << xcmp_data.xcmp_group << ":" << xcmp_data.xcmp_byte << ")"; + return tmp.str(); + case NameRef: + case ColorRef: + case AddressRef: + tmp << "name "; + if (name_ref) + tmp << name_ref->name; + else + tmp << "(meter)"; + if (type == ColorRef) tmp << " color"; + if (type == AddressRef) tmp << " address"; + return tmp.str(); + default: + tmp << ""; + return tmp.str(); + } +} + +std::ostream &operator<<(std::ostream &out, TableOutputModifier mod) { + switch (mod) { + case TableOutputModifier::Color: + out << " color"; + break; + case TableOutputModifier::Address: + out << " address"; + break; + default: + break; + } + return out; +} + +std::ostream &operator<<(std::ostream &out, const ActionBus::Slot &sl) { + out << sl.name << " byte=" << sl.byte << " size=" << sl.size; + for (auto &d : sl.data) out << "\n\t" << d.first << ": " << d.second; + return out; +} + +std::ostream &operator<<(std::ostream &out, const ActionBus &a) { + for (auto &slot : a.by_byte) out << slot.first << ": " << slot.second << std::endl; + for (auto &np : a.need_place) { + out << np.first << " {"; + const char *sep = " "; + for (auto &el : np.second) { + out << sep << el.first << ":" << el.second; + sep = ", "; + } + out << (sep + 1) << "}" << std::endl; + } + out << "byte_use: " << a.byte_use << std::endl; + for (auto &hvslice : a.action_hv_slice_use) { + for (auto v : hvslice) out << " " << hex(v, 4, '0'); + out << std::endl; + } + return out; +} + +void dump(const ActionBus *a) { std::cout << *a; } diff --git a/backends/tofino/bf-asm/action_bus.h b/backends/tofino/bf-asm/action_bus.h new file mode 100644 index 00000000000..16b86e43fe6 --- /dev/null +++ b/backends/tofino/bf-asm/action_bus.h @@ -0,0 +1,247 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_ACTION_BUS_H_ +#define BACKENDS_TOFINO_BF_ASM_ACTION_BUS_H_ + +#include + +#include "backends/tofino/bf-asm/tables.h" + +// static struct MeterBus_t {} MeterBus; +struct MeterBus_t {}; + +struct ActionBusSource { + enum { + None, + Field, + HashDist, + HashDistPair, + RandomGen, + TableOutput, + TableColor, + TableAddress, + Ealu, + XcmpData, + NameRef, + ColorRef, + AddressRef + } type; + union { + Table::Format::Field *field; + HashDistribution *hd; + struct { + HashDistribution *hd1, *hd2; + } hd_tuple; + Table *table; + Table::Ref *name_ref; + RandomNumberGen rng; + struct { + short xcmp_group, xcmp_byte; + } xcmp_data; + }; + ActionBusSource() : type(None) { field = nullptr; } + ActionBusSource(Table::Format::Field *f) : type(Field) { // NOLINT(runtime/explicit) + field = f; + } + ActionBusSource(HashDistribution *h) : type(HashDist) { hd = h; } // NOLINT(runtime/explicit) + ActionBusSource(HashDistribution *h1, HashDistribution *h2) : type(HashDistPair) { + hd_tuple.hd1 = h1; + hd_tuple.hd2 = h2; + } + ActionBusSource(Table *t, + TableOutputModifier m = TableOutputModifier::NONE) // NOLINT(runtime/explicit) + : type(TableOutput) { + switch (m) { + case TableOutputModifier::Color: + type = TableColor; + break; + case TableOutputModifier::Address: + type = TableAddress; + break; + default: + break; + } + table = t; + } + ActionBusSource(Table::Ref *t, + TableOutputModifier m = TableOutputModifier::NONE) // NOLINT(runtime/explicit) + : type(NameRef) { + switch (m) { + case TableOutputModifier::Color: + type = ColorRef; + break; + case TableOutputModifier::Address: + type = AddressRef; + break; + default: + break; + } + name_ref = t; + } + ActionBusSource(MeterBus_t, + TableOutputModifier m = TableOutputModifier::NONE) // NOLINT(runtime/explicit) + : type(NameRef) { + switch (m) { + case TableOutputModifier::Color: + type = ColorRef; + break; + case TableOutputModifier::Address: + type = AddressRef; + break; + default: + break; + } + name_ref = nullptr; + } + ActionBusSource(RandomNumberGen r) : type(RandomGen) { // NOLINT(runtime/explicit) + field = nullptr; + rng = r; + } + ActionBusSource(InputXbar::Group grp, int byte) : type(XcmpData) { + BUG_CHECK(grp.type == InputXbar::Group::XCMP, "Not xcmp ixbar"); + field = nullptr; + xcmp_data.xcmp_group = grp.index; + xcmp_data.xcmp_byte = byte; + } + bool operator==(const ActionBusSource &a) const { + if (type == XcmpData) + return a.type == XcmpData && xcmp_data.xcmp_group == a.xcmp_data.xcmp_group && + xcmp_data.xcmp_byte == a.xcmp_data.xcmp_byte; + if (type == HashDistPair && hd_tuple.hd2 != a.hd_tuple.hd2) return false; + return type == a.type && field == a.field; + } + bool operator<(const ActionBusSource &a) const { + if (type != a.type) return type < a.type; + switch (type) { + case HashDistPair: + return hd_tuple.hd1 == a.hd_tuple.hd1 ? hd_tuple.hd2 < a.hd_tuple.hd2 + : hd_tuple.hd1 < a.hd_tuple.hd1; + case XcmpData: + return xcmp_data.xcmp_group == a.xcmp_data.xcmp_group + ? xcmp_data.xcmp_byte < a.xcmp_data.xcmp_byte + : xcmp_data.xcmp_group < a.xcmp_data.xcmp_group; + default: + return field < a.field; + } + } + std::string name(Table *tbl) const; + std::string toString(Table *tbl) const; + friend std::ostream &operator<<(std::ostream &, const ActionBusSource &); +}; + +class ActionBus { + protected: + // Check two ActionBusSource refs to ensure that they are compatible (can be at the same + // location on the aciton bus -- basically the same data) + static bool compatible(const ActionBusSource &a, unsigned a_off, const ActionBusSource &b, + unsigned b_off); + struct Slot { + std::string name; + unsigned byte, size; // size in bits + ordered_map data; + // offset in the specified source is in this slot -- corresponding bytes for different + // action data formats will go into the same slot. + Slot(std::string n, unsigned b, unsigned s) : name(n), byte(b), size(s) {} + Slot(std::string n, unsigned b, unsigned s, ActionBusSource src, unsigned off) + : name(n), byte(b), size(s) { + data.emplace(src, off); + } + unsigned lo(Table *tbl) const; // low bit on the action data bus + bool is_table_output() const { + for (auto &d : data) { + BUG_CHECK(d.first.type != ActionBusSource::NameRef); + if (d.first.type == ActionBusSource::TableOutput) return true; + } + return false; + } + }; + friend std::ostream &operator<<(std::ostream &, const Slot &); + friend std::ostream &operator<<(std::ostream &, const ActionBus &); + ordered_map by_byte; + ordered_map> need_place; + // bytes from the given sources are needed on the action bus -- the pairs in the map + // are (offset,use) where offset is offset in bits, and use is a bitset of the needed + // uses (bit index == log2 of the access size in bytes) + + std::vector> action_hv_slice_use; + // which bytes of input to the ixbar are used in each action_hv_xbar slice, for each + // 128-bit slice of the action bus. + bitvec byte_use; // bytes on the action data (input) bus or immediate bus in use + // for wide action tables, this may be >16 bytes... + + void setup_slot(int lineno, Table *tbl, const char *name, unsigned idx, ActionBusSource src, + unsigned sz, unsigned off); + + int find_free(Table *tbl, unsigned min, unsigned max, unsigned step, unsigned lobyte, + unsigned bytes); + int find_merge(Table *tbl, int offset, int bytes, int use); + bool check_atcam_sharing(Table *tbl1, Table *tbl2); + bool check_slot_sharing(ActionBus::Slot &slot, bitvec &action_bus); + + ActionBus() : lineno(-1) {} + ActionBus(Table *, VECTOR(pair_t) &); + + public: + int lineno; + static std::unique_ptr create(); + static std::unique_ptr create(Table *, VECTOR(pair_t) &); + + void pass1(Table *tbl); + void pass2(Table *tbl) {} + void pass3(Table *tbl); + template + void write_immed_regs(REGS ®s, Table *tbl); + template + void write_action_regs(REGS ®s, Table *tbl, int homerow, unsigned action_slice); + + void do_alloc(Table *tbl, ActionBusSource src, unsigned use, int lobyte, int bytes, + unsigned offset); + static const unsigned size_masks[8]; + virtual void alloc_field(Table *, ActionBusSource src, unsigned offset, unsigned sizes_needed); + void need_alloc(Table *tbl, const ActionBusSource &src, unsigned lo, unsigned hi, + unsigned size); + void need_alloc(Table *tbl, Table *attached, TableOutputModifier mod, unsigned lo, unsigned hi, + unsigned size) { + need_alloc(tbl, ActionBusSource(attached, mod), lo, hi, size); + } + + int find(const char *name, TableOutputModifier mod, int lo, int hi, int size, int *len = 0); + int find(const char *name, int lo, int hi, int size, int *len = 0) { + return find(name, TableOutputModifier::NONE, lo, hi, size, len); + } + int find(const std::string &name, TableOutputModifier mod, int lo, int hi, int size, + int *len = 0) { + return find(name.c_str(), mod, lo, hi, size, len); + } + int find(const std::string &name, int lo, int hi, int size, int *len = 0) { + return find(name.c_str(), lo, hi, size, len); + } + int find(const ActionBusSource &src, int lo, int hi, int size, int pos = -1, int *len = 0); + int find(Table *attached, TableOutputModifier mod, int lo, int hi, int size, int *len = 0) { + return find(ActionBusSource(attached, mod), lo, hi, size, -1, len); + } + static int find(Stage *stage, ActionBusSource src, int lo, int hi, int size, int *len = 0); + unsigned size() { + unsigned rv = 0; + for (auto &slot : by_byte) rv += slot.second.size; + return rv; + } + auto slots() const { return Values(by_byte); } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_ACTION_BUS_H_ */ diff --git a/backends/tofino/bf-asm/action_table.cpp b/backends/tofino/bf-asm/action_table.cpp new file mode 100644 index 00000000000..7e24acee618 --- /dev/null +++ b/backends/tofino/bf-asm/action_table.cpp @@ -0,0 +1,794 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "action_bus.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "input_xbar.h" +#include "instruction.h" +#include "lib/algorithm.h" + +// template specialization declarations +#include "tofino/action_table.h" + +/// See 6.2.8.4.3 of the MAU Micro-Architecture document. +const unsigned MAX_AD_SHIFT = 5U; + +std::string ActionTable::find_field(Table::Format::Field *field) { + for (auto &af : action_formats) { + auto name = af.second->find_field(field); + if (!name.empty() && name[0] != '<') return af.first + ":" + name; + } + return Table::find_field(field); +} + +int ActionTable::find_field_lineno(Table::Format::Field *field) { + int rv = -1; + for (auto &af : action_formats) + if ((rv = af.second->find_field_lineno(field)) >= 0) return rv; + return Table::find_field_lineno(field); +} + +Table::Format::Field *ActionTable::lookup_field(const std::string &name, + const std::string &action) const { + if (action == "*" || action == "") { + if (auto *rv = format ? format->field(name) : 0) return rv; + if (action == "*") + for (auto &fmt : action_formats) + if (auto *rv = fmt.second->field(name)) return rv; + } else { + if (action_formats.count(action)) { + if (auto *rv = action_formats.at(action)->field(name)) return rv; + } else if (auto *rv = format ? format->field(name) : 0) { + return rv; + } + } + for (auto *match_table : match_tables) { + BUG_CHECK((Table *)match_table != (Table *)this); + if (auto *rv = match_table->lookup_field(name)) return rv; + } + return 0; +} +void ActionTable::pad_format_fields() { + format->size = get_size(); + format->log2size = get_log2size(); + for (auto &fmt : action_formats) { + if (fmt.second->size < format->size) { + fmt.second->size = format->size; + fmt.second->log2size = format->log2size; + } + } +} + +void ActionTable::apply_to_field(const std::string &n, std::function fn) { + for (auto &fmt : action_formats) fmt.second->apply_to_field(n, fn); + if (format) format->apply_to_field(n, fn); +} +int ActionTable::find_on_actionbus(const ActionBusSource &src, int lo, int hi, int size, int pos) { + int rv; + if (action_bus && (rv = action_bus->find(src, lo, hi, size, pos)) >= 0) return rv; + for (auto *match_table : match_tables) { + BUG_CHECK((Table *)match_table != (Table *)this); + if ((rv = match_table->find_on_actionbus(src, lo, hi, size, pos)) >= 0) return rv; + } + return -1; +} + +int ActionTable::find_on_actionbus(const char *name, TableOutputModifier mod, int lo, int hi, + int size, int *len) { + int rv; + if (action_bus && (rv = action_bus->find(name, mod, lo, hi, size, len)) >= 0) return rv; + for (auto *match_table : match_tables) { + BUG_CHECK((Table *)match_table != (Table *)this); + if ((rv = match_table->find_on_actionbus(name, mod, lo, hi, size, len)) >= 0) return rv; + } + return -1; +} + +void ActionTable::need_on_actionbus(const ActionBusSource &src, int lo, int hi, int size) { + if (src.type == ActionBusSource::Field) { + auto f = src.field; + if (f->fmt == format.get()) { + Table::need_on_actionbus(src, lo, hi, size); + return; + } + for (auto &af : Values(action_formats)) { + if (f->fmt == af.get()) { + Table::need_on_actionbus(f, lo, hi, size); + return; + } + } + for (auto *match_table : match_tables) { + BUG_CHECK((Table *)match_table != (Table *)this); + if (f->fmt == match_table->get_format()) { + match_table->need_on_actionbus(f, lo, hi, size); + return; + } + } + BUG_CHECK(!"Can't find table associated with field"); + // TBD - Add allocation for ActionBusSource::HashDistPair. Compiler does + // action bus allocation so this path is never used. + } else if (src.type == ActionBusSource::HashDist) { + auto hd = src.hd; + for (auto &hash_dist : this->hash_dist) { + if (&hash_dist == hd) { + Table::need_on_actionbus(hd, lo, hi, size); + return; + } + } + for (auto *match_table : match_tables) { + if (match_table->find_hash_dist(hd->id) == hd) { + match_table->need_on_actionbus(hd, lo, hi, size); + return; + } + } + BUG_CHECK(!"Can't find table associated with hash_dist"); + } else if (src.type == ActionBusSource::RandomGen) { + auto rng = src.rng; + int attached_count = 0; + for (auto *match_table : match_tables) { + match_table->need_on_actionbus(rng, lo, hi, size); + ++attached_count; + } + if (attached_count > 1) { + error(-1, + "Assembler cannot allocate action bus space for rng %d as it " + "used by mulitple tables", + rng.unit); + } + } else { + error(-1, "Assembler cannot allocate action bus space for %s", src.toString(this).c_str()); + } +} + +void ActionTable::need_on_actionbus(Table *att, TableOutputModifier mod, int lo, int hi, int size) { + int attached_count = 0; + for (auto *match_table : match_tables) { + if (match_table->is_attached(att)) { + match_table->need_on_actionbus(att, mod, lo, hi, size); + ++attached_count; + } + } + if (attached_count > 1) { + error(att->lineno, + "Assembler cannot allocate action bus space for table %s as it " + "used by mulitple tables", + att->name()); + } +} + +/** + * Necessary for determining the actiondata_adr_exact/tcam_shiftcount register value. + */ +unsigned ActionTable::determine_shiftcount(Table::Call &call, int group, unsigned word, + int tcam_shift) const { + int lo_huffman_bits = + std::min(get_log2size() - 2, static_cast(ACTION_ADDRESS_ZERO_PAD)); + int extra_shift = ACTION_ADDRESS_ZERO_PAD - lo_huffman_bits; + if (call.args[0] == "$DIRECT") { + return 64 + extra_shift + tcam_shift; + } else if (call.args[0].field()) { + BUG_CHECK(call.args[0].field()->by_group[group]->bit(0) / 128U == word); + return call.args[0].field()->by_group[group]->bit(0) % 128U + extra_shift; + } else if (call.args[1].field()) { + return call.args[1].field()->bit(0) + ACTION_ADDRESS_ZERO_PAD; + } + return 0; +} + +/** + * Calculates the actiondata_adr_default value. Will default in the required huffman bits + * described in section 6.2.8.4.3 Action RAM Addressing of the uArch, as well as the + * per flow enable bit if indicated + */ +unsigned ActionTable::determine_default(Table::Call &call) const { + int huffman_ones = std::max(static_cast(get_log2size()) - 3, 0); + BUG_CHECK(huffman_ones <= ACTION_DATA_HUFFMAN_BITS); + unsigned huffman_mask = (1 << huffman_ones) - 1; + // lower_huffman_mask == 0x1f, upper_huffman_mask = 0x60 + unsigned lower_huffman_mask = (1U << ACTION_DATA_LOWER_HUFFMAN_BITS) - 1; + unsigned upper_huffman_mask = ((1U << ACTION_DATA_HUFFMAN_BITS) - 1) & ~lower_huffman_mask; + unsigned rv = (huffman_mask & upper_huffman_mask) << ACTION_DATA_HUFFMAN_DIFFERENCE; + rv |= huffman_mask & lower_huffman_mask; + if (call.args[1].name() && call.args[1] == "$DEFAULT") { + rv |= 1 << ACTION_DATA_PER_FLOW_ENABLE_START_BIT; + } + return rv; +} + +/** + * Calculates the actiondata_adr_mask value for a given table. + */ +unsigned ActionTable::determine_mask(Table::Call &call) const { + int lo_huffman_bits = + std::min(get_log2size() - 2, static_cast(ACTION_DATA_LOWER_HUFFMAN_BITS)); + unsigned rv = 0; + if (call.args[0] == "$DIRECT") { + rv |= ((1U << ACTION_ADDRESS_BITS) - 1) & (~0U << lo_huffman_bits); + } else if (call.args[0].field()) { + rv = ((1U << call.args[0].size()) - 1) << lo_huffman_bits; + } + return rv; +} + +/** + * Calculates the actiondata_adr_vpn_shiftcount register. As described in section 6.2.8.4.3 + * for action data tables sized at 256, 512 and 1024, the Huffman bits for these addresses are + * no longer at the bottom of the address, but rather near the top. For direct action data + * addresses, a hole in the address needs to be created. + */ +unsigned ActionTable::determine_vpn_shiftcount(Table::Call &call) const { + if (call.args[0].name() && call.args[0] == "$DIRECT") { + return std::max(0, static_cast(get_log2size()) - 2 - ACTION_DATA_LOWER_HUFFMAN_BITS); + } + return 0; +} + +int ActionTable::get_start_vpn() { + // Based on the format width, the starting vpn is determined as follows (See + // Section 6.2.8.4.3 in MAU MicroArchitecture Doc) + // WIDTH LOG2SIZE START_VPN + // <= 128 bits - 7 - 0 + // = 256 bits - 8 - 0 + // = 512 bits - 9 - 1 + // = 1024 bits - 10 - 3 + int size = get_log2size(); + if (size <= 8) return 0; + if (size == 9) return 1; + if (size == 10) return 3; + return 0; +} + +void ActionTable::vpn_params(int &width, int &depth, int &period, const char *&period_name) const { + width = 1; + depth = layout_size(); + period = format ? 1 << std::max(static_cast(format->log2size) - 7, 0) : 0; + // Based on the format width, the vpn are numbered as follows (See Section + // 6.2.8.4.3 in MAU MicroArchitecture Doc) + // WIDTH PERIOD VPN'S + // <= 128 bits - +1 - 0, 1, 2, 3, ... + // = 256 bits - +2 - 2, 4, 6, 8, ... + // = 512 bits - +4 - 1, 5, 9, 13, ... + // = 1024 bits - +8 - 3, 11, 19, 27, ... + for (auto &fmt : Values(action_formats)) + period = std::max(period, 1 << std::max(static_cast(fmt->log2size) - 7, 0)); + period_name = "action data width"; +} + +void ActionTable::setup(VECTOR(pair_t) & data) { + action_id = -1; + setup_layout(layout, data); + for (auto &kv : MapIterChecked(data, true)) { + if (kv.key == "format") { + const char *action = nullptr; + if (kv.key.type == tCMD) { + if (!PCHECKTYPE(kv.key.vec.size > 1, kv.key[1], tSTR)) continue; + if (action_formats.count((action = kv.key[1].s))) { + error(kv.key.lineno, "Multiple formats for action %s", kv.key[1].s); + continue; + } + } + if (CHECKTYPEPM(kv.value, tMAP, kv.value.map.size > 0, "non-empty map")) { + auto *fmt = new Format(this, kv.value.map, true); + if (fmt->size < 8) { // pad out to minimum size + fmt->size = 8; + fmt->log2size = 3; + } + if (action) + action_formats[action].reset(fmt); + else + format.reset(fmt); + } + } + } + if (!format && action_formats.empty()) error(lineno, "No format in action table %s", name()); + for (auto &kv : MapIterChecked(data, true)) { + if (kv.key == "format") { + /* done above to be done before action_bus and vpns */ + } else if (kv.key.type == tCMD && kv.key[0] == "format") { + /* done above to be done before action_bus */ + } else if (kv.key == "actions") { + if (CHECKTYPE(kv.value, tMAP)) actions.reset(new Actions(this, kv.value.map)); + } else if (kv.key == "action_bus") { + if (CHECKTYPE(kv.value, tMAP)) action_bus = ActionBus::create(this, kv.value.map); + } else if (kv.key == "action_id") { + if (CHECKTYPE(kv.value, tINT)) action_id = kv.value.i; + } else if (kv.key == "vpns") { + if (kv.value == "null") + no_vpns = true; + else if (CHECKTYPE(kv.value, tVEC)) + setup_vpns(layout, &kv.value.vec); + } else if (kv.key == "home_row") { + home_lineno = kv.value.lineno; + // Builds the map of home rows possible per word, as different words of the + // action row is on different home rows + if (CHECKTYPE2(kv.value, tINT, tVEC)) { + int word = 0; + if (kv.value.type == tINT) { + if (kv.value.i >= 0 || kv.value.i < LOGICAL_SRAM_ROWS) + home_rows_per_word[word].setbit(kv.value.i); + else + error(kv.value.lineno, "Invalid home row %" PRId64 "", kv.value.i); + } else { + for (auto &v : kv.value.vec) { + if (CHECKTYPE2(v, tINT, tVEC)) { + if (v.type == tINT) { + if (v.i >= 0 || v.i < LOGICAL_SRAM_ROWS) + home_rows_per_word[word].setbit(v.i); + else + error(v.lineno, "Invalid home row %" PRId64 "", v.i); + } else if (v.type == tVEC) { + for (auto &v2 : v.vec) { + if (CHECKTYPE(v2, tINT)) { + if (v2.i >= 0 || v2.i < LOGICAL_SRAM_ROWS) + home_rows_per_word[word].setbit(v2.i); + else + error(v.lineno, "Invalid home row %" PRId64 "", v2.i); + } + } + } + } + word++; + } + } + } + } else if (kv.key == "p4") { + if (CHECKTYPE(kv.value, tMAP)) + p4_table = P4Table::get(P4Table::ActionData, kv.value.map); + } else if (kv.key == "context_json") { + setup_context_json(kv.value); + } else if (kv.key == "row" || kv.key == "logical_row" || kv.key == "column" || + kv.key == "word") { + /* already done in setup_layout */ + } else if (kv.key == "logical_bus") { + if (CHECKTYPE2(kv.value, tSTR, tVEC)) { + if (kv.value.type == tSTR) { + if (*kv.value.s != 'A' && *kv.value.s != 'O' && *kv.value.s != 'S') + error(kv.value.lineno, "Invalid logical bus %s", kv.value.s); + } else { + for (auto &v : kv.value.vec) { + if (CHECKTYPE(v, tSTR)) { + if (*v.s != 'A' && *v.s != 'O' && *v.s != 'S') + error(v.lineno, "Invalid logical bus %s", v.s); + } + } + } + } + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } + if (Target::SRAM_GLOBAL_ACCESS()) + alloc_global_srams(); + else + alloc_rams(true, stage->sram_use, 0); + if (!action_bus) action_bus = ActionBus::create(); +} + +void ActionTable::pass1() { + LOG1("### Action table " << name() << " pass1 " << loc()); + if (default_action.empty()) default_action = get_default_action(); + if (!p4_table) + p4_table = P4Table::alloc(P4Table::ActionData, this); + else + p4_table->check(this); + alloc_vpns(); + std::sort(layout.begin(), layout.end(), [](const Layout &a, const Layout &b) -> bool { + if (a.word != b.word) return a.word < b.word; + return a.row > b.row; + }); + int width = format ? (format->size - 1) / 128 + 1 : 1; + for (auto &fmt : action_formats) { +#if 0 + for (auto &fld : *fmt.second) { + if (auto *f = format ? format->field(fld.first) : 0) { + if (fld.second.bits != f->bits || fld.second.size != f->size) { + error(fmt.second->lineno, "Action %s format for field %s incompatible " + "with default format", fmt.first.c_str(), fld.first.c_str()); + continue; } } + for (auto &fmt2 : action_formats) { + if (fmt.second == fmt2.second) break; + if (auto *f = fmt2.second->field(fld.first)) { + if (fld.second.bits != f->bits || fld.second.size != f->size) { + error(fmt.second->lineno, "Action %s format for field %s incompatible " + "with action %s format", fmt.first.c_str(), fld.first.c_str(), + fmt2.first.c_str()); + break; } } } } +#endif + width = std::max(width, int((fmt.second->size - 1) / 128U + 1)); + } + unsigned depth = layout_size() / width; + std::vector slice_size(width, 0); + unsigned idx = 0; // ram index within depth + int word = 0; // word within wide table; + int home_row = -1; + std::map final_home_rows; + Layout *prev = nullptr; + for (auto row = layout.begin(); row != layout.end(); ++row) { + if (row->word > 0) word = row->word; + if (!prev || prev->word != word || home_rows_per_word[word].getbit(row->row) || + home_row / 2 - row->row / 2 > 5 /* can't go over 5 physical rows for timing */ + || (!Target::SUPPORT_OVERFLOW_BUS() && home_row >= 8 && row->row < 8) + /* can't flow between logical row 7 and 8 in JBay*/ + ) { + if (prev && prev->row == row->row) prev->home_row = false; + home_row = row->row; + row->home_row = true; + final_home_rows[word].setbit(row->row); + need_bus(row->lineno, stage->action_data_use, row->row, "action data"); + } + if (row->word >= 0) { + if (row->word > width) { + error(row->lineno, "Invalid word %u for row %d", row->word, row->row); + continue; + } + slice_size[row->word] += row->memunits.size(); + } else { + if (slice_size[word] + row->memunits.size() > depth) { + int split = depth - slice_size[word]; + row = layout.insert(row, Layout(*row)); + row->memunits.erase(row->memunits.begin() + split, row->memunits.end()); + row->vpns.erase(row->vpns.begin() + split, row->vpns.end()); + auto next = row + 1; + next->memunits.erase(next->memunits.begin(), next->memunits.begin() + split); + next->vpns.erase(next->vpns.begin(), next->vpns.begin() + split); + } + row->word = word; + if ((slice_size[word] += row->memunits.size()) == int(depth)) ++word; + } + prev = &*row; + } + if (!home_rows_per_word.empty()) { + for (word = 0; word < width; ++word) { + for (unsigned row : home_rows_per_word[word] - final_home_rows[word]) { + error(home_lineno, "home row %u not present in table %s", row, name()); + break; + } + } + } + home_rows_per_word = final_home_rows; + for (word = 0; word < width; ++word) + if (slice_size[word] != int(depth)) { + error(layout.front().lineno, "Incorrect size for word %u in layout of table %s", word, + name()); + break; + } + for (auto &r : layout) LOG4(" " << r); + action_bus->pass1(this); + if (actions) actions->pass1(this); + AttachedTable::pass1(); + SelectionTable *selector = nullptr; + for (auto mtab : match_tables) { + auto *s = mtab->get_selector(); + if (s && selector && s != selector) + error(lineno, "Inconsistent selectors %s and %s for table %s", s->name(), + selector->name(), name()); + if (s) selector = s; + } +} + +void ActionTable::pass2() { + LOG1("### Action table " << name() << " pass2 " << loc()); + if (match_tables.empty()) error(lineno, "No match table for action table %s", name()); + if (!format) format.reset(new Format(this)); + /* Driver does not support formats with different widths. Need all formats + * to be the same size, so pad them out */ + pad_format_fields(); + if (actions) actions->pass2(this); + if (action_bus) action_bus->pass2(this); +} + +/** + * FIXME: Due to get_match_tables function not being a const function (which itself should be + * a separate PR), in order to get all potentialy pack formats from all of the actions in all + * associated match tables, an initial pass is required to perform this lookup. + * + * Thus a map is saved in this pass containing a copy of an action, with a listing of all of + * the possible aliases. This will only currently work if the aliases are identical across + * actions, which at the moment, they are. We will need to change this functionality when + * actions could potentially be different across action profiles, either by gathering a union + * of the aliases across actions with the same action handle, or perhaps de-alias the pack + * formats before context JSON generation + */ +void ActionTable::pass3() { + LOG1("### Action table " << name() << " pass3 " << loc()); + action_bus->pass3(this); + + if (!actions) { + Actions *tbl_actions = nullptr; + for (auto mt : get_match_tables()) { + if (mt->actions) { + tbl_actions = mt->actions.get(); + } else if (auto tern = mt->to()) { + if (tern->indirect && tern->indirect->actions) { + tbl_actions = tern->indirect->actions.get(); + } + } + BUG_CHECK(tbl_actions); + for (auto &act : *tbl_actions) { + if (pack_actions.count(act.name) == 0) pack_actions[act.name] = &act; + } + } + } else { + for (auto &act : *actions) { + if (pack_actions.count(act.name) == 0) pack_actions[act.name] = &act; + } + } + + for (auto &fmt : action_formats) { + if (pack_actions.count(fmt.first) == 0) { + error(fmt.second->lineno, "Format for non-existant action %s", fmt.first.c_str()); + continue; + } + } +} + +template +static void flow_selector_addr(REGS ®s, int from, int to) { + BUG_CHECK(from > to); + BUG_CHECK((from & 3) == 3); + if (from / 2 == to / 2) { + /* R to L */ + regs.rams.map_alu.selector_adr_switchbox.row[from / 4] + .ctl.l_oflo_adr_o_mux_select.l_oflo_adr_o_sel_selector_adr_r_i = 1; + return; + } + if (from & 1) /* R down */ + regs.rams.map_alu.selector_adr_switchbox.row[from / 4] + .ctl.b_oflo_adr_o_mux_select.b_oflo_adr_o_sel_selector_adr_r_i = 1; + // else + // /* L down */ + // regs.rams.map_alu.selector_adr_switchbox.row[from/4].ctl + // .b_oflo_adr_o_mux_select.b_oflo_adr_o_sel_selector_adr_l_i = 1; + + /* Include all selection address switchboxes needed when the action RAMs + * reside on overflow rows */ + for (int row = from / 4 - 1; row >= to / 4; row--) + if (row != to / 4 || (to % 4) < 2) /* top to bottom */ + regs.rams.map_alu.selector_adr_switchbox.row[row] + .ctl.b_oflo_adr_o_mux_select.b_oflo_adr_o_sel_oflo_adr_t_i = 1; + + switch (to & 3) { + case 3: + /* flow down to R */ + regs.rams.map_alu.selector_adr_switchbox.row[to / 4].ctl.r_oflo_adr_o_mux_select = 1; + break; + case 2: + /* flow down to L */ + regs.rams.map_alu.selector_adr_switchbox.row[to / 4] + .ctl.l_oflo_adr_o_mux_select.l_oflo_adr_o_sel_oflo_adr_t_i = 1; + break; + default: + /* even physical rows are hardwired to flow down to both L and R */ + break; + } +} + +template +void ActionTable::write_regs_vt(REGS ®s) { + LOG1("### Action table " << name() << " write_regs " << loc()); + unsigned fmt_log2size = format ? format->log2size : 0; + unsigned width = format ? (format->size - 1) / 128 + 1 : 1; + for (auto &fmt : Values(action_formats)) { + fmt_log2size = std::max(fmt_log2size, fmt->log2size); + width = std::max(width, (fmt->size - 1) / 128U + 1); + } + unsigned depth = layout_size() / width; + bool push_on_overflow = false; // true if we overflow from bottom to top + unsigned idx = 0; + int word = 0; + Layout *home = nullptr; + int prev_logical_row = -1; + decltype(regs.rams.array.switchbox.row[0].ctl) *home_switch_ctl = 0, *prev_switch_ctl = 0; + auto &adrdist = regs.rams.match.adrdist; + auto &icxbar = adrdist.adr_dist_action_data_adr_icxbar_ctl; + for (Layout &logical_row : layout) { + unsigned row = logical_row.row / 2; + unsigned side = logical_row.row & 1; /* 0 == left 1 == right */ + unsigned top = logical_row.row >= 8; /* 0 == bottom 1 == top */ + auto vpn = logical_row.vpns.begin(); + auto &switch_ctl = regs.rams.array.switchbox.row[row].ctl; + auto &map_alu_row = regs.rams.map_alu.row[row]; + if (logical_row.home_row) { + home = &logical_row; + home_switch_ctl = &switch_ctl; + action_bus->write_action_regs(regs, this, logical_row.row, word); + if (side) + switch_ctl.r_action_o_mux_select.r_action_o_sel_action_rd_r_i = 1; + else + switch_ctl.r_l_action_o_mux_select.r_l_action_o_sel_action_rd_l_i = 1; + for (auto mtab : match_tables) + icxbar[mtab->logical_id].address_distr_to_logical_rows |= 1U << logical_row.row; + } else { + BUG_CHECK(home); + // FIXME use DataSwitchboxSetup for this somehow? + if (&switch_ctl == home_switch_ctl) { + /* overflow from L to R action */ + switch_ctl.r_action_o_mux_select.r_action_o_sel_oflo_rd_l_i = 1; + } else { + if (side) { + /* overflow R up */ + switch_ctl.t_oflo_rd_o_mux_select.t_oflo_rd_o_sel_oflo_rd_r_i = 1; + } else { + /* overflow L up */ + switch_ctl.t_oflo_rd_o_mux_select.t_oflo_rd_o_sel_oflo_rd_l_i = 1; + } + if (prev_switch_ctl != &switch_ctl) { + if (prev_switch_ctl != home_switch_ctl) + prev_switch_ctl->t_oflo_rd_o_mux_select.t_oflo_rd_o_sel_oflo_rd_b_i = 1; + else if (home->row & 1) + home_switch_ctl->r_action_o_mux_select.r_action_o_sel_oflo_rd_b_i = 1; + else + home_switch_ctl->r_l_action_o_mux_select.r_l_action_o_sel_oflo_rd_b_i = 1; + } + } + /* if we're skipping over full rows and overflowing over those rows, need to + * propagate overflow from bottom to top. This effectively uses only the + * odd (right side) overflow busses. L ovfl can still go to R action */ + for (int r = prev_logical_row / 2 - 1; r > static_cast(row); r--) { + prev_switch_ctl = ®s.rams.array.switchbox.row[r].ctl; + prev_switch_ctl->t_oflo_rd_o_mux_select.t_oflo_rd_o_sel_oflo_rd_b_i = 1; + } + + auto &oflo_adr_xbar = map_alu_row.vh_xbars.adr_dist_oflo_adr_xbar_ctl[side]; + if ((home->row >= 8) == top) { + oflo_adr_xbar.adr_dist_oflo_adr_xbar_source_index = home->row % 8; + oflo_adr_xbar.adr_dist_oflo_adr_xbar_source_sel = 0; + } else { + BUG_CHECK(home->row >= 8); + BUG_CHECK(options.target == TOFINO); + oflo_adr_xbar.adr_dist_oflo_adr_xbar_source_index = 0; + oflo_adr_xbar.adr_dist_oflo_adr_xbar_source_sel = 3; + push_on_overflow = true; + for (auto mtab : match_tables) + if (!icxbar[mtab->logical_id].address_distr_to_overflow) + icxbar[mtab->logical_id].address_distr_to_overflow = 1; + } + oflo_adr_xbar.adr_dist_oflo_adr_xbar_enable = 1; + } + SelectionTable *selector = get_selector(); + if (selector) { + if (logical_row.row != selector->home_row()) { + if (logical_row.row > selector->home_row()) + error(lineno, "Selector data from %s on row %d cannot flow up to %s on row %d", + selector->name(), selector->home_row(), name(), logical_row.row); + else + flow_selector_addr(regs, selector->home_row(), logical_row.row); + } + } + for (auto &memunit : logical_row.memunits) { + int logical_col = memunit.col; + unsigned col = logical_col + 6 * side; + auto &ram = regs.rams.array.row[row].ram[col]; + auto &unitram_config = map_alu_row.adrmux.unitram_config[side][logical_col]; + if (logical_row.home_row) unitram_config.unitram_action_subword_out_en = 1; + ram.unit_ram_ctl.match_ram_write_data_mux_select = UnitRam::DataMux::NONE; + ram.unit_ram_ctl.match_ram_read_data_mux_select = + home == &logical_row ? UnitRam::DataMux::ACTION : UnitRam::DataMux::OVERFLOW; + unitram_config.unitram_type = UnitRam::ACTION; + if (!no_vpns) unitram_config.unitram_vpn = *vpn++; + unitram_config.unitram_logical_table = action_id >= 0 ? action_id : logical_id; + if (gress == INGRESS || gress == GHOST) + unitram_config.unitram_ingress = 1; + else + unitram_config.unitram_egress = 1; + unitram_config.unitram_enable = 1; + auto &ram_mux = map_alu_row.adrmux.ram_address_mux_ctl[side][logical_col]; + auto &adr_mux_sel = ram_mux.ram_unitram_adr_mux_select; + if (selector) { + int shift = std::min(fmt_log2size - 2, MAX_AD_SHIFT); + auto &shift_ctl = regs.rams.map_alu.mau_selector_action_adr_shift[row]; + if (logical_row.row == selector->layout[0].row) { + /* we're on the home row of the selector, so use it directly */ + if (home == &logical_row) + adr_mux_sel = UnitRam::AdrMux::SELECTOR_ALU; + else + adr_mux_sel = UnitRam::AdrMux::SELECTOR_ACTION_OVERFLOW; + if (side) + shift_ctl.mau_selector_action_adr_shift_right = shift; + else + shift_ctl.mau_selector_action_adr_shift_left = shift; + } else { + /* not on the home row -- use overflows */ + if (home == &logical_row) + adr_mux_sel = UnitRam::AdrMux::SELECTOR_OVERFLOW; + else + adr_mux_sel = UnitRam::AdrMux::SELECTOR_ACTION_OVERFLOW; + if (side) + shift_ctl.mau_selector_action_adr_shift_right_oflo = shift; + else + shift_ctl.mau_selector_action_adr_shift_left_oflo = shift; + } + } else { + if (home == &logical_row) { + adr_mux_sel = UnitRam::AdrMux::ACTION; + } else { + adr_mux_sel = UnitRam::AdrMux::OVERFLOW; + ram_mux.ram_oflo_adr_mux_select_oflo = 1; + } + } + if (gress == EGRESS) + regs.cfg_regs.mau_cfg_uram_thread[col / 4U] |= 1U << (col % 4U * 8U + row); + regs.rams.array.row[row].actiondata_error_uram_ctl[timing_thread(gress)] |= + 1 << (col - 2); + if (++idx == depth) { + idx = 0; + home = nullptr; + ++word; + } + } + prev_switch_ctl = &switch_ctl; + prev_logical_row = logical_row.row; + } + if (push_on_overflow) adrdist.oflo_adr_user[0] = adrdist.oflo_adr_user[1] = AdrDist::ACTION; + if (actions) actions->write_regs(regs, this); +} + +// Action data address huffman encoding +// { 0, {"xxx", "xxxxx"} }, +// { 8, {"xxx", "xxxx0"} }, +// { 16, {"xxx", "xxx01"} }, +// { 32, {"xxx", "xx011"} }, +// { 64, {"xxx", "x0111"} }, +// { 128, {"xxx", "01111"} }, +// { 256, {"xx0", "11111"} }, +// { 512, {"x01", "11111"} }, +// { 1024, {"011", "11111"} }; + +// Track the actions added to json per action table. gen_tbl_cfg can be called +// multiple times for the same action for each stage table in case of an action +// table split across multiple stages, but must be added to json only once. +static std::map> actions_in_json; +void ActionTable::gen_tbl_cfg(json::vector &out) const { + // FIXME -- this is wrong if actions have different format sizes + unsigned number_entries = (layout_size() * 128 * 1024) / (1 << format->log2size); + json::map &tbl = *base_tbl_cfg(out, "action_data", number_entries); + json::map &stage_tbl = *add_stage_tbl_cfg(tbl, "action_data", number_entries); + for (auto &act : pack_actions) { + auto *fmt = format.get(); + if (action_formats.count(act.first)) fmt = action_formats.at(act.first).get(); + add_pack_format(stage_tbl, fmt, true, true, act.second); + auto p4Name = p4_name(); + if (!p4Name) { + error(lineno, "No p4 table name found for table : %s", name()); + continue; + } + std::string tbl_name = p4Name; + std::string act_name = act.second->name; + if (actions_in_json.count(tbl_name) == 0) { + actions_in_json[tbl_name].insert(act_name); + act.second->gen_simple_tbl_cfg(tbl["actions"]); + } else { + auto acts_added = actions_in_json[tbl_name]; + if (acts_added.count(act_name) == 0) { + actions_in_json[tbl_name].emplace(act_name); + act.second->gen_simple_tbl_cfg(tbl["actions"]); + } + } + } + stage_tbl["memory_resource_allocation"] = + gen_memory_resource_allocation_tbl_cfg("sram", layout); + // FIXME: what is the check for static entries? + tbl["static_entries"] = json::vector(); + std::string hr = how_referenced(); + if (hr.empty()) hr = indirect ? "indirect" : "direct"; + tbl["how_referenced"] = hr; + merge_context_json(tbl, stage_tbl); +} + +DEFINE_TABLE_TYPE_WITH_SPECIALIZATION(ActionTable, TARGET_CLASS) // NOLINT(readability/fn_size) diff --git a/backends/tofino/bf-asm/alias_array.h b/backends/tofino/bf-asm/alias_array.h new file mode 100644 index 00000000000..0c4fb161e8e --- /dev/null +++ b/backends/tofino/bf-asm/alias_array.h @@ -0,0 +1,142 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_ALIAS_ARRAY_H_ +#define BACKENDS_TOFINO_BF_ASM_ALIAS_ARRAY_H_ + +#include + +#include "bfas.h" // for BUG_CHECK + +template +class alias_array; + +template +class alias_array_base { + protected: + class iterator { + T **ptr; + + public: + explicit iterator(T **p) : ptr(p) {} + iterator &operator++() { + ++ptr; + return *this; + } + iterator &operator--() { + --ptr; + return *this; + } + iterator &operator++(int) { + auto copy = *this; + ++ptr; + return copy; + } + iterator &operator--(int) { + auto copy = *this; + --ptr; + return copy; + } + bool operator==(const iterator &i) const { return ptr == i.ptr; } + bool operator!=(const iterator &i) const { return ptr != i.ptr; } + T &operator*() const { return **ptr; } + T *operator->() const { return *ptr; } + }; + + public: + virtual T &operator[](size_t) = 0; + virtual const T &operator[](size_t) const = 0; + virtual size_t size() const = 0; + virtual iterator begin() = 0; + virtual iterator end() = 0; + virtual bool modified() const = 0; + virtual void set_modified(bool v = true) = 0; + virtual bool disabled() const = 0; + virtual bool disable() = 0; + virtual bool disable_if_zero() = 0; + virtual void enable() = 0; +}; + +template +class alias_array : public alias_array_base { + T *data[S]; + using typename alias_array_base::iterator; + + public: + alias_array(const std::initializer_list &v) { + auto it = v.begin(); + for (auto &e : data) { + BUG_CHECK(it != v.end(), "Not enough initializers for alias array"); + e = *it++; + } + BUG_CHECK(it == v.end(), "Too many initializers for alias array"); + } + T &operator[](size_t idx) { + BUG_CHECK(idx < S, "alias array index %zd out of bounds %zd", idx, S); + return *data[idx]; + } + const T &operator[](size_t idx) const { + BUG_CHECK(idx < S, "alias array index %zd out of bounds %zd", idx, S); + return *data[idx]; + } + size_t size() const { return S; } + iterator begin() { return iterator(data); } + iterator end() { return iterator(data + S); } + bool modified() const { + for (size_t i = 0; i < S; i++) + if (data[i]->modified()) return true; + return false; + } + void set_modified(bool v = true) { + for (size_t i = 0; i < S; i++) data[i]->set_modified(v); + } + bool disabled() const { + bool rv = true; + for (size_t i = 0; i < S; i++) + if (!data[i]->disabled()) rv = false; + return rv; + } + bool disable() { + bool rv = true; + for (size_t i = 0; i < S; i++) + if (!data[i]->disable()) rv = false; + return rv; + } + void enable() { + for (size_t i = 0; i < S; i++) data[i]->enable(); + } + bool disable_if_unmodified() { + bool rv = true; + for (size_t i = 0; i < S; i++) + if (!data[i]->disable_if_unmodified()) rv = false; + return rv; + } + bool disable_if_zero() { + bool rv = true; + for (size_t i = 0; i < S; i++) + if (!data[i]->disable_if_zero()) rv = false; + return rv; + } + bool disable_if_reset_value() { + bool rv = true; + for (size_t i = 0; i < S; i++) + if (!data[i]->disable_if_reset_value()) rv = false; + return rv; + } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_ALIAS_ARRAY_H_ */ diff --git a/backends/tofino/bf-asm/alloc.h b/backends/tofino/bf-asm/alloc.h new file mode 100644 index 00000000000..e3aac68e5e1 --- /dev/null +++ b/backends/tofino/bf-asm/alloc.h @@ -0,0 +1,230 @@ +#ifndef BACKENDS_TOFINO_BF_ASM_ALLOC_H_ +#define BACKENDS_TOFINO_BF_ASM_ALLOC_H_ + +#include + +#include +#include +#include + +namespace BFN { + +template +class Alloc1Dbase { + int size_; + T *data; + Alloc1Dbase() = delete; + Alloc1Dbase(const Alloc1Dbase &) = delete; + Alloc1Dbase &operator=(const Alloc1Dbase &) = delete; + Alloc1Dbase &operator=(Alloc1Dbase &&) = delete; + + public: + explicit Alloc1Dbase(int sz) : size_(sz) { data = sz ? new T[sz]{} : nullptr; } + Alloc1Dbase(Alloc1Dbase &&a) noexcept : size_(a.size_), data(a.data) { a.data = 0; } + virtual ~Alloc1Dbase() { delete[] data; } + + typedef T *iterator; + typedef T *const_iterator; + T &operator[](int i) { + if (i < 0 || i >= size_) throw std::out_of_range("Alloc1D"); + return data[i]; + } + const T &operator[](int i) const { + if (i < 0 || i >= size_) throw std::out_of_range("Alloc1D"); + return data[i]; + } + bool operator==(const Alloc1Dbase &t) const { + return std::equal(data, data + size_, t.data, t.data + t.size_); + } + bool operator!=(const Alloc1Dbase &t) const { return !(*this == t); } + + int size() const { return size_; } + void clear() { std::fill(data, data + size_, T()); } + T *begin() { return data; } + T *end() { return data + size_; } +}; + +template +class Alloc1D : public Alloc1Dbase { + public: + Alloc1D() : Alloc1Dbase(S) {} + Alloc1Dbase &base() { return *this; } + bool operator!=(const Alloc1D &t) const { return Alloc1Dbase::operator!=(t); } +}; + +template +class Alloc3Dbase; + +template +class Alloc2Dbase { + int nrows, ncols; + T *data; + template + class rowref { + U *row; + int ncols; + friend class Alloc2Dbase; + friend class Alloc3Dbase; + rowref(U *r, int c) : row(r), ncols(c) {} + + public: + typedef U *iterator; + typedef const U *const_iterator; + U &operator[](int i) const { + if (i < 0 || i >= ncols) throw std::out_of_range("Alloc2D"); + return row[i]; + } + U *begin() const { return row; } + U *end() const { return row + ncols; } + }; + Alloc2Dbase() = delete; + Alloc2Dbase(const Alloc2Dbase &) = delete; + Alloc2Dbase &operator=(const Alloc2Dbase &) = delete; + Alloc2Dbase &operator=(Alloc2Dbase &&) = delete; + friend class Alloc3Dbase; + + public: + Alloc2Dbase(int r, int c) : nrows(r), ncols(c) { + size_t sz = r * c; + data = sz ? new T[sz]{} : nullptr; + } + Alloc2Dbase(Alloc2Dbase &&a) noexcept : nrows(a.nrows), ncols(a.ncols), data(a.data) { + a.data = 0; + } + virtual ~Alloc2Dbase() { delete[] data; } + + rowref operator[](int i) { + if (i < 0 || i >= nrows) throw std::out_of_range("Alloc2D"); + return {data + i * ncols, ncols}; + } + rowref operator[](int i) const { + if (i < 0 || i >= nrows) throw std::out_of_range("Alloc2D"); + return {data + i * ncols, ncols}; + } + T &at(int i, int j) { + if (i < 0 || i >= nrows || j < 0 || j >= ncols) throw std::out_of_range("Alloc2D"); + return data[i * ncols + j]; + } + const T &at(int i, int j) const { + if (i < 0 || i >= nrows || j < 0 || j >= ncols) throw std::out_of_range("Alloc2D"); + return data[i * ncols + j]; + } + T &operator[](std::pair i) { + if (i.first < 0 || i.first >= nrows || i.second < 0 || i.second >= ncols) + throw std::out_of_range("Alloc2D"); + return data[i.first * ncols + i.second]; + } + const T &operator[](std::pair i) const { + if (i.first < 0 || i.first >= nrows || i.second < 0 || i.second >= ncols) + throw std::out_of_range("Alloc2D"); + return data[i.first * ncols + i.second]; + } + bool operator==(const Alloc2Dbase &t) const { + int sz = nrows * ncols; + if (nrows != t.nrows || ncols != t.ncols) return false; + return std::equal(data, data + sz, t.data); + } + bool operator!=(const Alloc2Dbase &t) const { return !(*this == t); } + + int rows() const { return nrows; } + int cols() const { return ncols; } + void clear() { std::fill(data, data + nrows * ncols, T()); } +}; + +template +class Alloc2D : public Alloc2Dbase { + public: + Alloc2D() : Alloc2Dbase(R, C) {} + Alloc2Dbase &base() { return *this; } +}; + +template +class Alloc3Dbase { + int nmats, nrows, ncols; + T *data; + template + class matref { + U *matrix; + int nrows, ncols; + friend class Alloc3Dbase; + + public: + typename Alloc2Dbase::template rowref operator[](int i) const { + if (i < 0 || i >= nrows) throw std::out_of_range("Alloc3D"); + return {matrix + i * ncols, ncols}; + } + U &operator[](std::pair i) const { + if (i.first < 0 || i.first >= nrows || i.second < 0 || i.second >= ncols) + throw std::out_of_range("Alloc3D"); + return matrix[i.first * ncols + i.second]; + } + }; + Alloc3Dbase() = delete; + Alloc3Dbase(const Alloc3Dbase &) = delete; + Alloc3Dbase &operator=(const Alloc3Dbase &) = delete; + Alloc3Dbase &operator=(Alloc3Dbase &&) = delete; + + public: + Alloc3Dbase(int m, int r, int c) : nmats(m), nrows(r), ncols(c) { + size_t sz = m * r * c; + data = sz ? new T[sz]{} : nullptr; + } + Alloc3Dbase(Alloc3Dbase &&a) noexcept + : nmats(a.nmats), nrows(a.nrows), ncols(a.ncols), data(a.data) { + a.data = 0; + } + virtual ~Alloc3Dbase() { delete[] data; } + + matref operator[](int i) { + if (i < 0 || i >= nmats) throw std::out_of_range("Alloc3D"); + return {data + i * nrows * ncols, nrows, ncols}; + } + matref operator[](int i) const { + if (i < 0 || i >= nmats) throw std::out_of_range("Alloc3D"); + return {data + i * nrows * ncols, nrows, ncols}; + } + T &at(int i, int j, int k) { + if (i < 0 || i >= nmats || j < 0 || j >= nrows || k < 0 || k >= ncols) + throw std::out_of_range("Alloc3D"); + return data[i * nrows * ncols + j * ncols + k]; + } + const T &at(int i, int j, int k) const { + if (i < 0 || i >= nmats || j < 0 || j >= nrows || k < 0 || k >= ncols) + throw std::out_of_range("Alloc3D"); + return data[i * nrows * ncols + j * ncols + k]; + } + T &operator[](std::tuple i) { + if (std::get<0>(i) < 0 || std::get<0>(i) >= nmats || std::get<1>(i) < 0 || + std::get<1>(i) >= nrows || std::get<2>(i) < 0 || std::get<2>(i) >= ncols) + throw std::out_of_range("Alloc3D"); + return data[std::get<0>(i) * nrows * ncols + std::get<1>(i) * ncols + std::get<2>(i)]; + } + const T &operator[](std::tuple i) const { + if (std::get<0>(i) < 0 || std::get<0>(i) >= nmats || std::get<1>(i) < 0 || + std::get<1>(i) >= nrows || std::get<2>(i) < 0 || std::get<2>(i) >= ncols) + throw std::out_of_range("Alloc3D"); + return data[std::get<0>(i) * nrows * ncols + std::get<1>(i) * ncols + std::get<2>(i)]; + } + bool operator==(const Alloc3Dbase &t) const { + int sz = nmats * nrows * ncols; + if (nmats != t.nmats || nrows != t.nrows || ncols != t.ncols) return false; + return std::equal(data, data + sz, t.data); + } + bool operator!=(const Alloc3Dbase &t) const { return !(*this == t); } + + int matrixes() const { return nmats; } + int rows() const { return nrows; } + int cols() const { return ncols; } + void clear() { std::fill(data, data + nmats * nrows * ncols, T()); } +}; + +template +class Alloc3D : public Alloc3Dbase { + public: + Alloc3D() : Alloc3Dbase(B, R, C) {} + Alloc3Dbase &base() { return *this; } +}; + +} // namespace BFN + +#endif /* BACKENDS_TOFINO_BF_ASM_ALLOC_H_ */ diff --git a/backends/tofino/bf-asm/asm-parse.ypp b/backends/tofino/bf-asm/asm-parse.ypp new file mode 100644 index 00000000000..8c9c5e1a463 --- /dev/null +++ b/backends/tofino/bf-asm/asm-parse.ypp @@ -0,0 +1,446 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +%{ +#define YYDEBUG 1 +#include "backends/tofino/bf-asm/asm-types.h" +#include +#include +#include "backends/tofino/bf-asm/sections.h" +#include +#include +static int yylex(); +static void yyerror(const char *, ...); +static int lineno; +static std::map> line_file_map; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +/* DANGER -- The value/command functions take non-const references to + * value_t and MOVE them, so the source should not be used or cleaned + * up afterwards. This matches up with how bison actions work -- in + * the normal case it does NOT try to destroy stuff on the value stack, + * but rather just pops it and lets it go. Do not try to use them + * outside of bison action code */ +static value_t value(int64_t v, int lineno_adj) { + value_t rv = {tINT, lineno - lineno_adj}; + rv.i = v; + return rv; } +static value_t value(VECTOR(uintptr_t) &v, int lineno_adj) { + value_t rv{tBIGINT, lineno - lineno_adj}; + rv.bigi = v; + return rv; } +static value_t value(int lo, int hi, int lineno_adj) { + value_t rv{tRANGE, lineno - lineno_adj}; + rv.range.lo = lo; + rv.range.hi = hi; + return rv; } +static value_t value(char *v, int lineno_adj) { + value_t rv{tSTR, lineno - lineno_adj}; + rv.s = v; + return rv; } +static value_t value(match_t v, int lineno_adj) { + value_t rv{tMATCH, lineno - lineno_adj}; + rv.m = v; + return rv; } +static value_t value(VECTOR(match_t) v, int lineno_adj) { + value_t rv{tBIGMATCH, lineno - lineno_adj}; + rv.bigm = v; + return rv; } +static value_t value(VECTOR(value_t) &v, int lineno_adj) { + value_t rv{tVEC, lineno - lineno_adj}; + if (v.size > 0) rv.lineno = v.data[0].lineno; + rv.vec = v; + return rv; } +static value_t value(VECTOR(pair_t) &v, int lineno_adj) { + value_t rv{tMAP, lineno - lineno_adj}; + if (v.size > 0) rv.lineno = v.data[0].key.lineno; + rv.map = v; + return rv; } +static value_t empty_vector(int lineno_adj) { + value_t rv{tVEC, lineno - lineno_adj}; + memset(&rv.vec, 0, sizeof(rv.vec)); + return rv; } +static value_t empty_map(int lineno_adj) { + value_t rv{tMAP, lineno - lineno_adj}; + memset(&rv.vec, 0, sizeof(rv.vec)); + return rv; } +static value_t singleton_map(const value_t &k, const value_t &v) { + value_t rv{tMAP, k.lineno}; + VECTOR_init1(rv.map, pair_t(k, v)); + return rv; } +static value_t command(char *cmd, const VECTOR(value_t) &args, int lineno_adj) { + value_t rv{tCMD, lineno - lineno_adj}; + if (args.size && args.data[0].lineno < rv.lineno) + rv.lineno = args.data[0].lineno; + rv.vec = args; + VECTOR_insert(rv.vec, 0, 1); + rv[0] = value(cmd, 0); + rv[0].lineno = rv.lineno; + return rv; } +static value_t command(char *cmd, value_t &arg, int lineno_adj) { + value_t rv{tCMD, lineno - lineno_adj}; + if (arg.lineno < rv.lineno) + rv.lineno = arg.lineno; + VECTOR_init2(rv.vec, value(cmd, 0), arg); + rv[0].lineno = rv.lineno; + return rv; } +static value_t command(char *cmd, value_t &&arg, int lineno_adj) { + return command(cmd, arg, lineno_adj); } +static value_t command(char *cmd, bool merge, value_t &a1, value_t &a2, int lineno_adj) { + if (merge && a1.type == tCMD && a1 == cmd && a1.vec.size > 2) { + free(cmd); + VECTOR_add(a1.vec, a2); + return a1; } + if (merge && a2.type == tCMD && a2 == cmd && a2.vec.size > 2) { + free(cmd); + VECTOR_insert(a2.vec, 1); + a2.vec[1] = a1; + return a2; } + value_t rv{tCMD, lineno - lineno_adj}; + if (a1.lineno < rv.lineno) + rv.lineno = a1.lineno; + VECTOR_init3(rv.vec, value(cmd, 0), a1, a2); + rv[0].lineno = rv.lineno; + return rv; } + +#define VAL(...) value(__VA_ARGS__, yychar == '\n' ? 1 : 0) +#define CMD(...) command(__VA_ARGS__, yychar == '\n' ? 1 : 0) + +#pragma GCC diagnostic pop +%} + +%define parse.error verbose +%define lr.default-reduction accepting + +%nonassoc LOW_PREC +%left '|' '^' +%left '&' +%left '<' '>' +%nonassoc UNARY + +%union { + int64_t i; + VECTOR(uintptr_t) bigi; + char *str; + match_t match; + VECTOR(match_t) bigm; + value_t value; + VECTOR(value_t) vec; + pair_t pair; + VECTOR(pair_t) map; +} + +%token INDENT UNINDENT DOTDOT +%token INT +%token BIGINT +%token ID +%token STR +%token MATCH +%token BIGMATCH + +%type param param_expr list_element key value elements opt_indent_elements + indent_elements flow_value +%type opt_params params comma_params linewrapped_value_list list_elements value_list dotvals +%type map_element pair +%type map_elements pair_list + +%destructor { free($$); } +%destructor { VECTOR_fini($$); } +%destructor { free_value(&$$); } +%destructor { VECTOR_foreach($$, free_value); VECTOR_fini($$); } +%destructor { free_pair(&$$); } +%destructor { VECTOR_foreach($$, free_pair); VECTOR_fini($$); } + +%printer { fprintf(yyoutput, "%" PRId64, $$); } +%printer { fprintf(yyoutput, "0x%" PRIuPTR, $$.data[$$.size-1]); + for (int i = $$.size-2; i >= 0; i--) + fprintf(yyoutput, "%016" PRIuPTR, $$.data[i]); } +%printer { if ($$) fprintf(yyoutput, "'%s'", $$); else fprintf(yyoutput, "null"); } +%printer { print_match(yyoutput, $$); } +%printer { fprintf(yyoutput, "%s", value_desc(&$$)); } +%printer { fprintf(yyoutput, "vec of size %d", $$.size); } +%printer { fprintf(yyoutput, "map of size %d", $$.size); } + +%% + +start: INDENT sections UNINDENT | sections | /* epsilon */; + +sections: sections section | section ; + +section : ID opt_params ':' + { $$ = Section::start_section(lineno, $1, $2); } + '\n' opt_indent_elements + { if (!$4) Section::asm_section($1, $2, $6); + VECTOR_foreach($2, free_value); + VECTOR_fini($2); + free_value(&$6); + free($1); } + | ID opt_params ':' + { $$ = Section::start_section(lineno, $1, $2); } + value '\n' + { if (!$4) Section::asm_section($1, $2, $5); + VECTOR_foreach($2, free_value); + VECTOR_fini($2); + free_value(&$5); + free($1); } +; + +opt_params: /* empty */ { memset(&$$, 0, sizeof($$)); } + | params + ; +params : param %prec LOW_PREC { VECTOR_init1($$, $1); } + | params param { $$ = $1; VECTOR_add($$, $2); } + ; +comma_params + : param ',' value { VECTOR_init2($$, $1, $3); } + | comma_params ',' value { $$ = $1; VECTOR_add($$, $3); } + | param_expr ',' value { VECTOR_init2($$, $1, $3); } + | '(' value ')' ',' value { VECTOR_init2($$, $2, $5); } + ; +param : INT { $$ = VAL($1); } + | ID { $$ = VAL($1); } + | '-' INT { $$ = VAL(-$2); } + | '!' ID { $$ = CMD(strdup("!"), VAL($2)); } + | INT DOTDOT INT { $$ = VAL($1, $3); } + | ID '(' value ')' { $$ = CMD($1, $3); } + | ID '(' value_list ')' { $$ = CMD($1, $3); } + | flow_value { $$ = $1; } + ; +param_expr + : param '^' value { $$ = CMD(strdup("^"), true, $1, $3); } + | param '|' value { $$ = CMD(strdup("|"), true, $1, $3); } + | param '&' value { $$ = CMD(strdup("&"), true, $1, $3); } + /* rule duplication to get precedence correct */ + | param_expr '^' value { $$ = CMD(strdup("^"), true, $1, $3); } + | param_expr '|' value { $$ = CMD(strdup("|"), true, $1, $3); } + | param_expr '&' value { $$ = CMD(strdup("&"), true, $1, $3); } + ; + +opt_indent_elements: { $$ = empty_map(1); } + | indent_elements + ; + +indent_elements + : INDENT elements UNINDENT { $$ = $2; } + | INDENT error { $$ = lineno; } error_resync UNINDENT { $$ = empty_map(lineno-$3); } + ; +elements: list_elements { $$ = VAL($1); } + | list_elements error error_resync { $$ = VAL($1); } + | map_elements { $$ = VAL($1); } + | map_elements error error_resync { $$ = VAL($1); } + ; +map_elements: map_elements map_element { $$ = $1; VECTOR_add($$, $2); } + | map_element { VECTOR_init1($$, $1); } + ; +list_elements: list_elements list_element { $$ = $1; VECTOR_add($$, $2); } + | list_element { VECTOR_init1($$, $1); } + ; + +map_element + : key ':' value '\n' { $$ = pair_t($1, $3); } + | key ':' '\n' indent_elements { $$ = pair_t($1, $4); } + | key ':' '\n' list_elements { $$ = pair_t($1, VAL($4)); } + | key ':' '\n' { $$ = pair_t($1, empty_map(1)); } + | '?' value ':' value '\n' { $$ = pair_t($2, $4); } + | '?' value ':' '\n' indent_elements { $$ = pair_t($2, $5); } + | '?' value ':' '\n' list_elements { $$ = pair_t($2, VAL($5)); } + | '?' value '\n' ':' value '\n' { $$ = pair_t($2, $5); } + ; + +list_element + : '-' key ':' value '\n' { $$ = singleton_map($2, $4); } + | '-' key ':' value '\n' INDENT map_elements UNINDENT { + VECTOR_insert($7, 0); + $7.data[0] = pair_t($2, $4); + $$ = VAL($7); } + | '-' '?' value ':' value '\n' { $$ = singleton_map($3, $5); } + | '-' '?' value ':' value '\n' INDENT map_elements UNINDENT { + VECTOR_insert($8, 0); + $8.data[0] = pair_t($3, $5); + $$ = VAL($8); } + | '-' value '\n' { $$ = $2; } + | '-' ID comma_params '\n' { $$ = command($2, $3, yychar == '\n' ? 2 : 1); } + | '-' ID comma_params ',' '\n' linewrapped_value_list + { VECTOR_addcopy($3, $6.data, $6.size); + $$ = command($2, $3, yychar == '\n' ? 2 : 1); + VECTOR_fini($6); } + | '-' ID param ',' '\n' linewrapped_value_list + { VECTOR_insert($6, 0); $6.data[0] = $3; + $$ = command($2, $6, yychar == '\n' ? 2 : 1); } + | '-' key ':' '\n' indent_elements { $$ = singleton_map($2, $5); } + | '-' '?' value ':' '\n' indent_elements { $$ = singleton_map($3, $6); } + | '-' '\n' { $$ = value(strdup(""), yychar == '\n' ? 2 : 1); } + ; + +key : ID { $$ = VAL($1); } + | ID params { $$ = CMD($1, $2); } + | INT { $$ = VAL($1); } + | BIGINT { $$ = VAL($1); } + | MATCH { $$ = VAL($1); } + | BIGMATCH { $$ = VAL($1); } + | INT DOTDOT INT { $$ = VAL($1, $3); } + | ID '(' value_list ')' { $$ = CMD($1, $3); } + | ID '(' value ')' { $$ = CMD($1, $3); } + | ID '(' ')' { $$ = VAL($1); } + | flow_value + ; + +value: key + | '-' value %prec UNARY { if (($$=$2).type == tINT) $$.i = -$$.i; else $$ = CMD(strdup("-"), $2); } + | '!' value %prec UNARY { $$ = CMD(strdup("!"), $2); } + | dotvals INT { VECTOR_add($1, VAL($2)); $$ = VAL($1); } + | value '^' value { $$ = CMD(strdup("^"), true, $1, $3); } + | value '|' value { $$ = CMD(strdup("|"), true, $1, $3); } + | value '&' value { $$ = CMD(strdup("&"), true, $1, $3); } + | value '<' '<' value { $$ = CMD(strdup("<<"), false, $1, $4); } + | value '>' '>' value { $$ = CMD(strdup(">>"), false, $1, $4); } + | '(' value ')' { $$ = $2; } + | STR { $$ = VAL($1); } + ; + +flow_value + : '[' value_list ']' { $$ = VAL($2); } + | '[' value ']' { VECTOR(value_t) tmp; VECTOR_init1(tmp, $2); $$ = VAL(tmp); } + | '[' value_list error error_resync ']' { $$ = VAL($2); } + | '[' value error error_resync ']' { + VECTOR(value_t) tmp; VECTOR_init1(tmp, $2); $$ = VAL(tmp); } + | '{' pair_list '}' { $$ = VAL($2); } + | '{' pair_list error error_resync '}' { $$ = VAL($2); } + | '[' ']' { $$ = empty_vector(yychar == '\n' ? 1 : 0); } + | '[' error error_resync ']' { $$ = empty_vector(yychar == '\n' ? 1 : 0); } + | '{' '}' { $$ = empty_map(yychar == '\n' ? 1 : 0); } + | '{' error error_resync '}' { $$ = empty_map(yychar == '\n' ? 1 : 0); } + ; + +value_list + : value_list ',' value { $$ = $1; VECTOR_add($$, $3); } + | value ',' value { VECTOR_init2($$, $1, $3); } + ; +linewrapped_value_list + : value_list '\n' { $$ = $1; } + | value '\n' { VECTOR_init1($$, $1); } + | value_list ',' '\n' linewrapped_value_list + { $$ = $1; VECTOR_addcopy($$, $4.data, $4.size); VECTOR_fini($4); } + | value ',' '\n' linewrapped_value_list + { VECTOR_init1($$, $1); VECTOR_addcopy($$, $4.data, $4.size); VECTOR_fini($4); } + | INDENT value_list '\n' UNINDENT { $$ = $2; } + | INDENT value '\n' UNINDENT { VECTOR_init1($$, $2); } + | INDENT value_list ',' '\n' linewrapped_value_list UNINDENT + { $$ = $2; VECTOR_addcopy($$, $5.data, $5.size); VECTOR_fini($5); } + | INDENT value ',' '\n' linewrapped_value_list UNINDENT + { VECTOR_init1($$, $2); VECTOR_addcopy($$, $5.data, $5.size); VECTOR_fini($5); } + ; + +pair_list + : pair_list ',' pair { $$ = $1; VECTOR_add($$, $3); } + | pair { VECTOR_init1($$, $1); } + ; +pair: value ':' value { $$ = pair_t($1, $3); } + ; + +dotvals : dotvals INT '.' { $$ = $1; VECTOR_add($$, VAL($2)); } + | INT '.' { VECTOR_init1($$, VAL($1)); } + +error_resync: /* epsilon */ | error_resync indent_elements { free_value(&$2); } + | error_resync INT | error_resync ID { free($2); } | error_resync MATCH + | error_resync BIGMATCH { VECTOR_fini($2); } + | error_resync BIGINT { VECTOR_fini($2); } | error_resync ':' | error_resync '-' + | error_resync ',' | error_resync '(' | error_resync ')' | error_resync DOTDOT + | error_resync '\n' | error_resync flow_value { free_value(&$2); } + ; + +%% + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wdeprecated-register" +#pragma GCC diagnostic ignored "-Wsign-compare" +#include "backends/tofino/bf-asm/gen/lex-yaml.c" +#pragma GCC diagnostic pop + +int error_count = 0; +int warn_count = 0; + +std::ostream &operator<<(std::ostream &out, const SrcInfo &s) { + auto it = line_file_map.upper_bound(s.lineno); + it--; + out << it->second.first << ':' << (s.lineno - it->first + it->second.second); + return out; +} + +void warning(int lineno, const char *fmt, va_list args) { + auto it = line_file_map.upper_bound(lineno); + if (it == line_file_map.begin()) { + fprintf(stderr, ": warning: "); + } else { + --it; + fprintf(stderr, "%s:%d: warning: ", it->second.first.c_str(), + lineno - it->first + it->second.second); } + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n"); + fflush(stderr); + warn_count++; } + +void error(int lineno, const char *fmt, va_list args) { + auto it = line_file_map.upper_bound(lineno); + if (it == line_file_map.begin()) { + fprintf(stderr, ": error: "); + } else { + --it; + fprintf(stderr, "%s:%d: error: ", it->second.first.c_str(), + lineno - it->first + it->second.second); } + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n"); + fflush(stderr); + error_count++; } + +static void yyerror(const char *fmt, ...) { + va_list args; + va_start(args, fmt); + error(lineno, fmt, args); + va_end(args); +} + +int asm_parse_file(const char *name, FILE *in) { +#ifdef YYDEBUG + if (const char *p = getenv("YYDEBUG")) + yydebug = atoi(p); +#endif /* YYDEBUG */ + yyrestart(in); + line_file_map[lineno++] = std::make_pair(name, 0); + if (yyparse()) + error_count++; + return error_count; +} + +int asm_parse_string(const char* in) { + YY_BUFFER_STATE buf; +#ifdef YYDEBUG + if (const char *p = getenv("YYDEBUG")) + yydebug = atoi(p); +#endif /* YYDEBUG */ + // Reset state in case func is called multiple times + BEGIN(INITIAL); + buf = yy_scan_string(in); + if (yyparse()) + error_count++; + yy_delete_buffer(buf); + return error_count; +} + +std::map *Section::sections = 0; diff --git a/backends/tofino/bf-asm/asm-types.cpp b/backends/tofino/bf-asm/asm-types.cpp new file mode 100644 index 00000000000..bfcc5e524d8 --- /dev/null +++ b/backends/tofino/bf-asm/asm-types.cpp @@ -0,0 +1,320 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "asm-types.h" + +#include +#include + +#include "misc.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +void VECTOR(pair_t)::push_back(const char *s, value_t &&v) { // NOLINT(whitespace/operators) + pair_t entry{{tSTR, v.lineno}, v}; + entry.key.s = strdup(s); + VECTOR_push(*this, entry); + memset(&v, 0, sizeof(v)); +} + +void push_back(VECTOR(pair_t) & m, const char *s, value_t &&v) { // NOLINT(whitespace/operators) + m.push_back(s, std::move(v)); +} + +VECTOR(value_t) & VECTOR(value_t)::add(value_t &&v) { + VECTOR_add(*this, std::move(v)); + return *this; +} +VECTOR(value_t) & VECTOR(value_t)::add(int v) { + value_t tmp{tINT, v}; + VECTOR_add(*this, tmp); + return *this; +} +VECTOR(value_t) & VECTOR(value_t)::add(const char *v) { + value_t tmp{tSTR, -1}; + tmp.s = const_cast(v); + VECTOR_add(*this, tmp); + return *this; +} + +/** check a value and see if it is a list of maps -- if so, concatenate the + * maps into a single map and replace the list with that */ +void collapse_list_of_maps(value_t &v, bool singleton_only) { + if (v.type != tVEC || v.vec.size == 0) return; + for (int i = 0; i < v.vec.size; i++) { + if (v[i].type != tMAP) return; + if (singleton_only && v[i].map.size != 1) return; + } + VECTOR(pair_t) map = v[0].map; + for (int i = 1; i < v.vec.size; i++) { + VECTOR_addcopy(map, v[i].map.data, v[i].map.size); + VECTOR_fini(v[i].map); + } + VECTOR_fini(v.vec); + v.type = tMAP; + v.map = map; +} + +std::unique_ptr toJson(value_t &v) { + switch (v.type) { + case tINT: + return json::mkuniq(v.i); + case tBIGINT: + if (v.bigi.size == 1 && v.bigi.data[0] < INT64_MAX) + return json::mkuniq(v.bigi.data[0]); + // fall through + case tRANGE: + case tMATCH: + return json::mkuniq(value_desc(v)); + case tSTR: + if (v == "true") return json::mkuniq(); + if (v == "false") return json::mkuniq(); + if (v == "null") return std::unique_ptr(); + return json::mkuniq(v.s); + case tVEC: + return toJson(v.vec); + case tMAP: + return toJson(v.map); + case tCMD: + return toJson(v.vec); + default: + assert(0); + } + return std::unique_ptr(); +} + +std::unique_ptr toJson(VECTOR(value_t) & v) { + auto rv = json::mkuniq(); + auto &vec = *rv; + for (auto &el : v) vec.push_back(toJson(el)); + return rv; +} + +std::unique_ptr toJson(pair_t &kv) { + auto rv = json::mkuniq(); + auto &map = *rv; + map[toJson(kv.key)] = toJson(kv.value); + return rv; +} + +std::unique_ptr toJson(VECTOR(pair_t) & m) { + auto rv = json::mkuniq(); + auto &map = *rv; + for (auto &kv : m) map[toJson(kv.key)] = toJson(kv.value); + return rv; +} + +bool get_bool(const value_t &v) { + if (v == "true") + return true; + else if (v == "false") + return false; + else if (CHECKTYPE(v, tINT)) + return v.i != 0; + return false; +} + +bitvec get_bitvec(const value_t &v, unsigned max_bits, const char *error_message) { + bitvec bv; + if (CHECKTYPE2(v, tINT, tBIGINT)) { + if (v.type == tINT) { + bv.setraw(v.i); + } else { + if (!v.bigi.size) return bv; + bv.setraw(v.bigi.data, v.bigi.size); + } + } + if (!max_bits) return bv; + int bits = bv.max().index() + 1; + if (error_message && bits > max_bits) error(v.lineno, "%s", error_message); + bv.clrrange(max_bits, bits); + return bv; +} + +uint64_t get_int64(const value_t &v, unsigned max_bits, const char *error_message) { + BUG_CHECK(max_bits <= 64); + bool too_large = false; + uint64_t value = 0; + if (CHECKTYPE2(v, tINT, tBIGINT)) { + if (v.type == tINT) { + value = (uint64_t)v.i; + } else { + if (!v.bigi.size) return 0; + if (sizeof(uintptr_t) == sizeof(uint32_t)) { + value = ((uint64_t)v.bigi.data[1] << 32) + v.bigi.data[0]; + too_large = v.bigi.size > 2; + } else { + BUG_CHECK(sizeof(uintptr_t) == sizeof(uint64_t)); + value = v.bigi.data[0]; + too_large = v.bigi.size > 1; + } + } + } + if (!max_bits) return value; + uint64_t masked = value; + if (max_bits < 64) masked &= (1ULL << max_bits) - 1; + if (error_message && (too_large || masked != value)) error(v.lineno, "%s", error_message); + return masked; +} + +static int chkmask(const match_t &m, int maskbits) { + uint64_t mask = bitMask(maskbits); + int shift = 0; + while (mask && ((m.word0 | m.word1) >> shift)) { + if ((mask & m.word0 & m.word1) && (mask & m.word0 & m.word1) != mask) return -1; + mask <<= maskbits; + shift += maskbits; + } + return shift - maskbits; +} + +std::ostream &operator<<(std::ostream &out, match_t m) { + int shift, bits; + if ((shift = chkmask(m, (bits = 4))) >= 0) + out << "0x"; + else if ((shift = chkmask(m, (bits = 3))) >= 0) + out << "0o"; + else if ((shift = chkmask(m, (bits = 1))) >= 0) + out << "0b"; + else if ((shift = chkmask(m, (bits = 0))) == 0) + out << "0b*"; + else + assert(0); + uint64_t mask = bitMask(bits) << shift; + for (; mask; shift -= bits, mask >>= bits) + if (mask & m.word0 & m.word1) + out << '*'; + else + out << "0123456789abcdef"[(m.word1 & mask) >> shift]; + return out; +} + +void print_match(FILE *fp, match_t m) { + std::stringstream tmp; + tmp << m; + fputs(tmp.str().c_str(), fp); +} + +const char *value_type_desc[] = {"integer", "bigint", "range", + "identifier", "match pattern", "big match", + "list", "key: value pairs", "operation"}; + +const char *value_desc(const value_t *p) { + static char buffer[32]; + switch (p->type) { + case tINT: + snprintf(buffer, sizeof(buffer), "%" PRId64 "", p->i); + return buffer; + case tBIGINT: + return ""; + case tRANGE: + snprintf(buffer, sizeof(buffer), "%d..%d", p->range.lo, p->range.hi); + return buffer; + case tMATCH: + return ""; + case tBIGMATCH: + return ""; + case tSTR: + return p->s; + case tVEC: + return ""; + case tMAP: + return ""; + case tCMD: + if (p->vec.size > 0 && p->vec.data[0].type == tSTR) return p->vec.data[0].s; + return ""; + } + assert(false && "unknown value type"); + return ""; +} + +void free_value(value_t *p) { + switch (p->type) { + case tBIGINT: + VECTOR_fini(p->bigi); + break; + case tSTR: + free(p->s); + break; + case tVEC: + case tCMD: + VECTOR_foreach(p->vec, free_value); + VECTOR_fini(p->vec); + break; + case tMAP: + VECTOR_foreach(p->map, free_pair); + VECTOR_fini(p->map); + break; + default: + break; + } +} + +bool operator==(const struct value_t &a, const struct value_t &b) { + int i; + if (a.type != b.type) { + if (a.type == tINT && b.type == tBIGINT) { + if (a.i < 0 || (size_t)a.i != b.bigi.data[0]) return false; + for (i = 1; i < b.bigi.size; i++) + if (b.bigi.data[i]) return false; + return true; + } else if (a.type == tBIGINT && b.type == tINT) { + if (b.i < 0 || (size_t)b.i != a.bigi.data[0]) return false; + for (i = 1; i < a.bigi.size; i++) + if (a.bigi.data[i]) return false; + return true; + } + return false; + } + switch (a.type) { + case tINT: + return a.i == b.i; + case tBIGINT: + for (i = 0; i < a.bigi.size && i < b.bigi.size; i++) + if (a.bigi.data[i] != b.bigi.data[i]) return false; + for (; i < a.bigi.size; i++) + if (a.bigi.data[i]) return false; + for (; i < b.bigi.size; i++) + if (b.bigi.data[i]) return false; + return true; + case tRANGE: + return a.range.lo == b.range.lo && a.range.hi == b.range.hi; + case tSTR: + return !strcmp(a.s, b.s); + case tMATCH: + return a.m.word0 == b.m.word0 && a.m.word1 == b.m.word1; + case tVEC: + case tCMD: + if (a.vec.size != b.vec.size) return false; + for (int i = 0; i < a.vec.size; i++) + if (a.vec.data[i] != b.vec.data[i]) return false; + return true; + case tMAP: + if (a.map.size != b.map.size) return false; + for (int i = 0; i < a.map.size; i++) { + if (a.map.data[i].key != b.map.data[i].key) return false; + if (a.map.data[i].value != b.map.data[i].value) return false; + } + return true; + case tBIGMATCH: + default: + break; + } + assert(false && "unknown value type"); + return ""; +} +#pragma GCC diagnostic pop diff --git a/backends/tofino/bf-asm/asm-types.h b/backends/tofino/bf-asm/asm-types.h new file mode 100644 index 00000000000..994e5f74e10 --- /dev/null +++ b/backends/tofino/bf-asm/asm-types.h @@ -0,0 +1,494 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_ASM_TYPES_H_ +#define BACKENDS_TOFINO_BF_ASM_ASM_TYPES_H_ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "backends/tofino/bf-asm/json.h" +#include "backends/tofino/bf-asm/map.h" +#include "bfas.h" +#include "lib/bitops.h" +#include "lib/bitvec.h" +#include "mask_counter.h" +#include "vector.h" + +enum gress_t { INGRESS, EGRESS, GHOST, NUM_GRESS_T }; + +/* All timing related uses combine the INGRESS and GHOST threads (they run in lockstep), so + * we remap GHOST->INGRESS when dealing with timing */ +inline gress_t timing_thread(gress_t gress) { return gress == GHOST ? INGRESS : gress; } +/* imem similarly shares color between INGRESS and GHOST */ +inline gress_t imem_thread(gress_t gress) { return gress == GHOST ? INGRESS : gress; } + +struct match_t { + uint64_t word0, word1; +#ifdef __cplusplus + operator bool() const { return (word0 | word1) != 0; } + bool operator==(const match_t &a) const { return word0 == a.word0 && word1 == a.word1; } + bool matches(uint64_t v) const { + return (v | word1) == word1 && ((~v & word1) | word0) == word0; + } + bool matches(const match_t &v) const { + assert(0); + return false; + } + unsigned dirtcam(unsigned width, unsigned bit); +#endif /* __cplusplus */ +}; + +DECLARE_VECTOR(match_t); + +struct wmatch_t { + bitvec word0, word1; +#ifdef __cplusplus + wmatch_t() = default; + wmatch_t(const wmatch_t &) = default; + wmatch_t(wmatch_t &&) = default; + wmatch_t &operator=(const wmatch_t &) = default; + wmatch_t &operator=(wmatch_t &&) = default; + wmatch_t(const match_t &v) : word0(v.word0), word1(v.word1) {} // NOLINT(runtime/explicit) + wmatch_t(const VECTOR(match_t) & v) { // NOLINT(runtime/explicit) + for (int i = 0; i < v.size; ++i) { + word0.putrange(i * 64, 64, v.data[i].word0); + word1.putrange(i * 64, 64, v.data[i].word1); + } + } + operator bool() const { return word0 || word1; } + bool operator==(const wmatch_t &a) const { return word0 == a.word0 && word1 == a.word1; } + bool matches(bitvec v) const { return (v | word1) == word1 && ((word1 - v) | word0) == word0; } + bool matches(const wmatch_t &v) const { + assert(0); + return false; + } + unsigned dirtcam(unsigned width, unsigned bit); +#endif /* __cplusplus */ +}; + +enum value_type { tINT, tBIGINT, tRANGE, tSTR, tMATCH, tBIGMATCH, tVEC, tMAP, tCMD }; +extern const char *value_type_desc[]; + +struct value_t; +struct pair_t; +#ifdef __cplusplus +DECLARE_VECTOR( + value_t, value_t &operator[](int) const; value_t & back() const; + value_t * begin() const { return data; } value_t * end() const; value_t & front() const; + VECTOR(value_t) & add(value_t &&); VECTOR(value_t) & add(int); + VECTOR(value_t) & add(const char *);) +DECLARE_VECTOR( + pair_t, void push_back(const char *, value_t &&); // NOLINT(whitespace/operators) + pair_t & operator[](int) const; pair_t * operator[](const char *) const; pair_t & back() const; + pair_t * begin() const { return data; } pair_t * end() const; pair_t & front() const;) +#else +DECLARE_VECTOR(value_t) +DECLARE_VECTOR(pair_t) +#endif /* __cplusplus */ +DECLARE_VECTOR(uintptr_t); + +struct value_t { + enum value_type type; + int lineno; + union { + int64_t i; + VECTOR(uintptr_t) bigi; + struct { + int lo; + int hi; + } range; + char *s; + match_t m; + VECTOR(match_t) bigm; + VECTOR(value_t) vec; + VECTOR(pair_t) map; + }; +#ifdef __cplusplus + value_t &operator[](int i) const { + assert(type == tVEC || type == tCMD); + return vec[i]; + } + bool startsWith(const char *pfx) const { + if (type == tSTR) return strncmp(s, pfx, strlen(pfx)) == 0; + if (type == tCMD && vec.size > 0 && vec[0].type == tSTR) + return strncmp(vec[0].s, pfx, strlen(pfx)) == 0; + return false; + } + bool checkSize() const { + if (type == tVEC) return (vec.size > 0); + if (type == tMAP) return (map.size > 0); + if (type == tCMD) return (vec.size > 0); + return true; + } +#endif /* __cplusplus */ +}; + +struct pair_t { + struct value_t key, value; +#ifdef __cplusplus + pair_t() = default; + pair_t(const value_t &k, const value_t &v) : key(k), value(v) {} +#endif /* __cplusplus */ +}; + +void free_value(value_t *p); +const char *value_desc(const value_t *v); +static inline void free_pair(pair_t *p) { + free_value(&p->key); + free_value(&p->value); +} +bool get_bool(const value_t &v); + +// If max_bits is zero, no testing or masking is carried out. +// If error_message is set, values larger than max_bits will error, otherwise the value is masked. +bitvec get_bitvec(const value_t &v, unsigned max_bits = 0, const char *error_message = nullptr); +uint64_t get_int64(const value_t &v, unsigned max_bits = 0, const char *error_message = nullptr); + +#ifdef __cplusplus +bool operator==(const struct value_t &, const struct value_t &); +inline bool operator==(const struct value_t &a, const char *b) { + if (a.type == tCMD && a.vec.size > 0 && a[0].type == tSTR) return !strcmp(a[0].s, b); + return a.type == tSTR && !strcmp(a.s, b); +} +inline bool operator==(const char *a, const struct value_t &b) { + if (b.type == tCMD && b.vec.size > 0 && b[0].type == tSTR) return !strcmp(a, b[0].s); + return b.type == tSTR && !strcmp(a, b.s); +} +inline bool operator==(const struct value_t &a, int b) { return a.type == tINT && a.i == b; } +inline bool operator==(int a, const struct value_t &b) { return b.type == tINT && a == b.i; } + +inline const char *value_desc(const value_t &v) { return value_desc(&v); } + +template +inline bool operator!=(A a, B b) { + return !(a == b); +} + +inline value_t &VECTOR(value_t)::operator[](int i) const { + assert(i >= 0 && i < size); + return data[i]; +} +inline pair_t &VECTOR(pair_t)::operator[](int i) const { + assert(i >= 0 && i < size); + return data[i]; +} +inline pair_t *VECTOR(pair_t)::operator[](const char *k) const { + for (int i = 0; i < size; i++) + if (data[i].key == k) return &data[i]; + return 0; +} +inline value_t *VECTOR(value_t)::end() const { return data + size; } +inline value_t &VECTOR(value_t)::front() const { + assert(0 < size); + return data[0]; +} +inline value_t &VECTOR(value_t)::back() const { + assert(0 < size); + return data[size - 1]; +} +inline pair_t *VECTOR(pair_t)::end() const { return data + size; } +inline pair_t &VECTOR(pair_t)::front() const { + assert(0 < size); + return data[0]; +} +inline pair_t &VECTOR(pair_t)::back() const { + assert(0 < size); + return data[size - 1]; +} + +/* can't call VECTOR(pair_t)::push_back directly except from the compilation unit where + * it is defined, due to gcc bug. Workaround via global function */ +extern void push_back(VECTOR(pair_t) & m, const char *s, + value_t &&v); // NOLINT(whitespace/operators) + +inline void fini(value_t &v) { free_value(&v); } +inline void fini(pair_t &p) { free_pair(&p); } +inline void fini(VECTOR(value_t) & v) { + VECTOR_foreach(v, free_value); + VECTOR_fini(v); +} +inline void fini(VECTOR(pair_t) & v) { + VECTOR_foreach(v, free_pair); + VECTOR_fini(v); +} +void collapse_list_of_maps(value_t &, bool singleton_only = false); + +std::unique_ptr toJson(value_t &); +std::unique_ptr toJson(VECTOR(value_t) &); +std::unique_ptr toJson(pair_t &); +std::unique_ptr toJson(VECTOR(pair_t) &); + +#endif /* __cplusplus */ + +#define CHECKTYPE(V, T) \ + ((V).type == (T) || (error((V).lineno, "Syntax error, expecting %s", value_type_desc[T]), 0)) +#define CHECKTYPESIZE(V, T) \ + (CHECKTYPE(V, T) && \ + ((V).checkSize() || (error((V).lineno, "Syntax error, empty %s", value_type_desc[T]), 0))) +#define PCHECKTYPE(P, V, T) \ + (((P) && (V).type == (T)) || \ + (error((V).lineno, "Syntax error, expecting %s", value_type_desc[T]), 0)) +#define CHECKTYPEM(V, T, M) \ + ((V).type == (T) || (error((V).lineno, "Syntax error, expecting %s", M), 0)) +#define CHECKTYPEPM(V, T, P, M) \ + (((V).type == (T) && (P)) || (error((V).lineno, "Syntax error, expecting %s", M), 0)) +#define PCHECKTYPEM(P, V, T, M) \ + (((P) && (V).type == (T)) || (error((V).lineno, "Syntax error, expecting %s", M), 0)) +#define CHECKTYPE2(V, T1, T2) \ + ((V).type == (T1) || (V).type == (T2) || \ + (error((V).lineno, "Syntax error, expecting %s or %s but got %s", value_type_desc[T1], \ + value_type_desc[T2], value_desc(V)), \ + 0)) +#define CHECKTYPE3(V, T1, T2, T3) \ + ((V).type == (T1) || (V).type == (T2) || (V).type == (T3) || \ + (error((V).lineno, "Syntax error, expecting %s or %s or %s", value_type_desc[T1], \ + value_type_desc[T2], value_type_desc[T3]), \ + 0)) +#define PCHECKTYPE2(P, V, T1, T2) \ + (((P) && ((V).type == (T1) || (V).type == (T2))) || \ + (error((V).lineno, "Syntax error, expecting %s or %s", value_type_desc[T1], \ + value_type_desc[T2]), \ + 0)) +#define CHECKTYPE2M(V, T1, T2, M) \ + ((V).type == (T1) || (V).type == (T2) || \ + (error((V).lineno, "Syntax error, expecting %s but got %s", M, value_desc(V)), 0)) +#define PCHECKTYPE2M(P, V, T1, T2, M) \ + (((P) && ((V).type == (T1) || (V).type == (T2))) || \ + (error((V).lineno, "Syntax error, expecting %s", M), 0)) +#define VALIDATE_RANGE(V) \ + ((V).type != tRANGE || (V).range.lo <= (V).range.hi || \ + (error((V).lineno, "Invalid range %d..%d", (V).range.lo, (V).range.hi), 0)) + +inline value_t *get(VECTOR(pair_t) & map, const char *key) { + for (auto &kv : map) + if (kv.key == key) return &kv.value; + return 0; +} +inline const value_t *get(const VECTOR(pair_t) & map, const char *key) { + for (auto &kv : map) + if (kv.key == key) return &kv.value; + return 0; +} + +#ifdef __cplusplus + +template +inline void parse_vector(std::vector &vec, const VECTOR(value_t) & data) { + for (auto &v : data) vec.emplace_back(v); +} +template <> +inline void parse_vector(std::vector &vec, const VECTOR(value_t) & data) { + for (auto &v : data) + if (CHECKTYPE(v, tINT)) vec.push_back(v.i); +} +template <> +inline void parse_vector(std::vector &vec, const VECTOR(value_t) & data) { + for (auto &v : data) + if (CHECKTYPE(v, tINT)) vec.push_back(v.i); +} +template <> +inline void parse_vector(std::vector &vec, const VECTOR(value_t) & data) { + for (auto &v : data) + if (CHECKTYPE(v, tSTR)) vec.emplace_back(v.s); +} +template +inline void parse_vector(std::vector &vec, const value_t &data) { + if (data.type == tVEC) + parse_vector(vec, data.vec); + else + vec.emplace_back(data); +} +template <> +inline void parse_vector(std::vector &vec, const value_t &data) { + if (CHECKTYPE2(data, tINT, tVEC)) { + if (data.type == tVEC) + parse_vector(vec, data.vec); + else + vec.push_back(data.i); + } +} +template <> +inline void parse_vector(std::vector &vec, const value_t &data) { + if (CHECKTYPE2(data, tINT, tVEC)) { + if (data.type == tVEC) + parse_vector(vec, data.vec); + else + vec.push_back(data.i); + } +} +template <> +inline void parse_vector(std::vector &vec, const value_t &data) { + if (CHECKTYPE2(data, tSTR, tVEC)) { + if (data.type == tVEC) + parse_vector(vec, data.vec); + else + vec.push_back(data.s); + } +} + +std::ostream &operator<<(std::ostream &out, match_t m); +void print_match(FILE *fp, match_t m); + +inline std::ostream &operator<<(std::ostream &out, gress_t gress) { + switch (gress) { + case INGRESS: + out << "ingress"; + break; + case EGRESS: + out << "egress"; + break; + case GHOST: + out << "ghost"; + break; + default: + out << "(invalid gress " << static_cast(gress) << ")"; + } + return out; +} + +template +inline std::string to_string(T val) { + std::stringstream tmp; + tmp << val; + return tmp.str(); +} + +class MapIterChecked { + /* Iterate through a map (VECTOR(pair_t)), giving errors for non-string and + * duplicate keys (and skipping them) */ + const VECTOR(pair_t) & map; + bool allow; // allow non-string keys + std::set duplicates_allowed; + std::map keys_seen; + class iter { + MapIterChecked *self; + pair_t *p; + void check() { + while (p != self->map.end()) { + if (self->allow && p->key.type != tSTR) break; + if (!CHECKTYPE(p->key, tSTR)) { + p++; + continue; + } + if (self->duplicates_allowed.count(p->key.s)) break; + if (self->keys_seen.count(p->key.s)) { + error(p->key.lineno, "Duplicate element %s", p->key.s); + warning(self->keys_seen[p->key.s], "previous element %s", p->key.s); + p++; + continue; + } + self->keys_seen[p->key.s] = p->key.lineno; + break; + } + } + + public: + iter(MapIterChecked *s, pair_t *p_) : self(s), p(p_) { check(); } + pair_t &operator*() const { return *p; } + pair_t *operator->() const { return p; } + bool operator==(iter &a) const { return p == a.p; } + iter &operator++() { + p++; + check(); + return *this; + } + }; + + public: + explicit MapIterChecked(const VECTOR(pair_t) & map_, bool o = false, + const std::set &dup = {}) + : map(map_), allow(o), duplicates_allowed(dup) {} + MapIterChecked(const VECTOR(pair_t) & map_, const std::set &dup) + : map(map_), allow(false), duplicates_allowed(dup) {} + iter begin() { return iter(this, map.begin()); } + iter end() { return iter(this, map.end()); } +}; + +class MatchIter { + /* Iterate through the integers that match a match_t */ + match_t m; + class iter : public MaskCounter { + MatchIter *self; + + public: + explicit iter(MatchIter *s) : MaskCounter(s->m.word0 & s->m.word1), self(s) { + if (!(self->m.word1 | self->m.word0)) overflow(); + } + unsigned operator*() const { + return this->operator unsigned() | (self->m.word1 & ~self->m.word0); + } + iter &end() { + overflow(); + return *this; + } + }; + + public: + explicit MatchIter(match_t m_) : m(m_) {} + iter begin() { return iter(this); } + iter end() { return iter(this).end(); } +}; + +class SrcInfo { + int lineno; + friend std::ostream &operator<<(std::ostream &, const SrcInfo &); + + public: + explicit SrcInfo(int l) : lineno(l) {} +}; + +struct RegisterSetBase { + virtual ~RegisterSetBase() = default; +}; + +struct ParserRegisterSet : public RegisterSetBase {}; + +/// An interface for parsing a section of a .bfa file +class Parsable { + public: + /// @param data entire map/sequence of elements + virtual void input(VECTOR(value_t) args, value_t data) = 0; + virtual ~Parsable() = default; +}; + +/// An interface for writing into registers +class Configurable { + public: + virtual void write_config(RegisterSetBase ®s, json::map &json, bool legacy = true) = 0; + virtual ~Configurable() = default; +}; + +/// An interface for generating context.json +class Contextable { + public: + virtual void output(json::map &ctxtJson) = 0; + virtual ~Contextable() = default; +}; + +#endif /* __cplusplus */ + +#endif /* BACKENDS_TOFINO_BF_ASM_ASM_TYPES_H_ */ diff --git a/backends/tofino/bf-asm/atcam_match.cpp b/backends/tofino/bf-asm/atcam_match.cpp new file mode 100644 index 00000000000..4b36885f439 --- /dev/null +++ b/backends/tofino/bf-asm/atcam_match.cpp @@ -0,0 +1,558 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "action_bus.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "input_xbar.h" +#include "instruction.h" +#include "lib/algorithm.h" +#include "lib/hex.h" +#include "misc.h" + +void AlgTcamMatchTable::setup(VECTOR(pair_t) & data) { + common_init_setup(data, false, P4Table::MatchEntry); + for (auto &kv : MapIterChecked(data, {"meter", "stats", "stateful"})) { + if (common_setup(kv, data, P4Table::MatchEntry)) { + } else if (kv.key == "number_partitions") { + if (CHECKTYPE(kv.value, tINT)) number_partitions = kv.value.i; + } else if (kv.key == "partition_field_name") { + if (CHECKTYPE(kv.value, tSTR)) { + partition_field_name = kv.value.s; + if (auto *p = find_p4_param(partition_field_name)) + if (!p->key_name.empty()) partition_field_name = p->key_name; + } + } else if (kv.key == "subtrees_per_partition") { + if (CHECKTYPE(kv.value, tINT)) max_subtrees_per_partition = kv.value.i; + } else if (kv.key == "bins_per_partition") { + if (CHECKTYPE(kv.value, tINT)) bins_per_partition = kv.value.i; + } else if (kv.key == "atcam_subset_width") { + if (CHECKTYPE(kv.value, tINT)) atcam_subset_width = kv.value.i; + } else if (kv.key == "shift_granularity") { + if (CHECKTYPE(kv.value, tINT)) shift_granularity = kv.value.i; + } else if (kv.key == "search_bus" || kv.key == "result_bus") { + // already dealt with in Table::setup_layout via common_init_setup + } else { + common_sram_setup(kv, data); + } + } + common_sram_checks(); +} + +// TODO: This could probably be rewritten in a simpler way. Below +// function checks the ways extracted from assembly for atcam and assumes the +// way no's are not sorted with column priority. Therefore the code sorts the +// first ram column and sets the column priority based on this column. Then this +// ordering is used to check if column priority is maintained if the ways are +// traversed in this column priority order for all other columns +void AlgTcamMatchTable::setup_column_priority() { + int no_ways = ways.size(); + int no_entries_per_way = ways[0].rams.size(); + // FIXME-P4C: Ideally RAM's 6 & 7 can be on both left and right RAM Units. + // Brig currently does not support this behavior and RAM 6 is always on + // left, while RAM 7 on right. Once this supported is added below function + // must be modified accordingly to accommodate these rams in lrams and rrams + // and the traversal mechanism must be changed to determine column priority + std::set lrams = {2, 3, 4, 5, 6}; + std::set rrams = {7, 8, 9, 10, 11}; + // Check if column is on left(0) or right(1) RAMs + + std::vector> first_entry_priority; + // Determine the side and which way corresponds to which column + int side = -1; + for (int w = 0; w < no_ways; w++) { + int col = ways[w].rams[0].col; + int row = ways[w].rams[0].row; + if (side == 0) { + if (lrams.find(col) == lrams.end()) { + error(lineno, + "ram(%d, %d) is not on correct side compared to rest in column " + "priority", + row, col); + } + } else if (side == 1) { + if (rrams.find(col) == rrams.end()) { + error(lineno, + "ram(%d, %d) is not on correct side compare to rest of column " + "priority", + row, col); + } + } else if (lrams.find(col) != lrams.end()) { + side = 0; + } else if (rrams.find(col) != rrams.end()) { + side = 1; + } else { + error(lineno, "ram(%d, %d) invalid for ATCAM", row, col); + } + first_entry_priority.push_back(std::make_pair(w, col)); + } + + // Sort ways based on column priority for first column + std::sort(first_entry_priority.begin(), first_entry_priority.end(), + [side](const std::pair &a, const std::pair &b) { + return side == 0 ? a.second < b.second : a.second > b.second; + }); + + int index = 0; + for (auto &entry : first_entry_priority) { + col_priority_way[index] = entry.first; + index++; + } + + // Ensure that the remaining columns match up with the first column ram + for (int i = 1; i < no_entries_per_way; i++) { + auto way_it = col_priority_way.begin(); + side = -1; + int prev_col = -1; + int prev_row = -1; + while (way_it != col_priority_way.end()) { + int row = ways[way_it->second].rams[i].row; + int col = ways[way_it->second].rams[i].col; + if (way_it != col_priority_way.begin()) { + if (!(((side == 0 && prev_col < col && lrams.find(col) != lrams.end()) || + (side == 1 && prev_col > col && rrams.find(col) != rrams.end())) && + prev_row == row)) { + error(lineno, + "ram(%d, %d) and ram(%d, %d) column priority is not " + "compatible", + prev_row, prev_col, row, col); + } + } + way_it++; + prev_col = col; + prev_row = row; + if (lrams.find(col) != lrams.end()) + side = 0; + else if (rrams.find(col) != rrams.end()) + side = 1; + else + error(lineno, "ram(%d, %d) invalid for ATCAM", row, col); + } + } +} + +/** + * Guarantees that the order of the entries provided in the ATCAM table format are order + * in HW priority 0-4, (where in HW entry 4 will be favored). This is required to guarantee that + * the entries in the ATCAM pack format are in priority order for the driver. + * + * @seealso bf-p4c/mau/table_format.cpp - no_overhead_atcam_result_bus_words + */ +void AlgTcamMatchTable::verify_entry_priority() { + int result_bus_word = -1; + for (int i = 0; i < static_cast(group_info.size()); i++) { + BUG_CHECK(group_info[i].result_bus_word >= 0); + if (result_bus_word == -1) { + result_bus_word = group_info[i].result_bus_word; + } else if (result_bus_word != group_info[i].result_bus_word) { + error(format->lineno, "ATCAM tables can at most have only one overhead word"); + return; + } + auto mg_it = group_info[i].match_group.find(result_bus_word); + if (mg_it == group_info[i].match_group.end() || mg_it->second != i) { + error(format->lineno, + "Each ATCAM entry must coordinate its entry with the " + "correct priority"); + return; + } + } + + if (word_info[result_bus_word].size() != group_info.size()) { + error(format->lineno, "ATCAM tables do not chain to the same overhead word"); + return; + } + + for (int i = 0; i < static_cast(word_info[result_bus_word].size()); i++) { + if (i != word_info[result_bus_word][i]) { + error(format->lineno, "ATCAM priority not correctly formatted in the compiler"); + return; + } + } +} + +/** + * @seealso bf-p4c/mau/table_format.cpp - no_overhead_atcam_result_bus_words. This matches + * this function exactly + */ +void AlgTcamMatchTable::no_overhead_determine_result_bus_usage() { + int result_bus_word = -1; + int shared_groups = 0; + for (int i = group_info.size() - 1; i >= 0; i--) { + if (result_bus_word == -1) { + result_bus_word = group_info[i].match_group.begin()->first; + } + bool is_shared_group = false; + + if (group_info[i].match_group.size() > 1) + is_shared_group = true; + else if (group_info[i].match_group.begin()->first != result_bus_word) + is_shared_group = true; + + if (is_shared_group) { + if (i > 1) error(format->lineno, "ATCAM chaining of shared groups is not correct"); + shared_groups++; + } + + group_info[i].result_bus_word = result_bus_word; + group_info[i].match_group[result_bus_word] = i; + } + + word_info[result_bus_word].clear(); + for (int i = 0; i < static_cast(group_info.size()); i++) { + word_info[result_bus_word].push_back(i); + } + + if (shared_groups > 2) + error(format->lineno, "ATCAM cannot safely send hit signals to same result bus"); +} + +void AlgTcamMatchTable::verify_format(Target::Tofino targ) { + SRamMatchTable::verify_format(targ); + if (!error_count) verify_entry_priority(); +} + +void AlgTcamMatchTable::pass1() { + LOG1("### ATCAM match table " << name() << " pass1 " << loc()); + SRamMatchTable::pass1(); + if (format) { + setup_column_priority(); + find_tcam_match(); + } +} + +void AlgTcamMatchTable::setup_nibble_mask(Table::Format::Field *match, int group, + std::map &elems, bitvec &mask) { + for (auto &el : Values(elems)) { + int bit = match->bit(el.offset); + if (match->hi(bit) < bit + el.width - 1) + error(el.field->lineno, "match bits for %s not contiguous in match(%d)", + el.field->desc().c_str(), group); + // Determining the nibbles dedicated to s0q1 or s1q0 + int start_bit = bit; + int end_bit = start_bit + el.width - 1; + int start_nibble = start_bit / 4U; + int end_nibble = end_bit / 4U; + mask.setrange(start_nibble, end_nibble - start_nibble + 1); + } +} + +void AlgTcamMatchTable::find_tcam_match() { + std::map exact; + std::map> tcam; + unsigned off = 0; + /* go through the match fields and find duplicates -- those are the tcam matches */ + for (auto match_field : match) { + auto phv_p = dynamic_cast(match_field); + if (phv_p == nullptr) { + BUG(); + continue; + } + auto phv_ref = *phv_p; + auto sl = *phv_ref; + if (!sl) continue; + if (exact.count(sl)) { + if (tcam.count(sl)) + error(phv_ref.lineno, "%s appears more than twice in atcam match", + phv_ref.desc().c_str()); + if ((sl.size() % 4U) != 0) { + if ((sl.size() == 1) && (phv_ref.desc().find("$valid") != std::string::npos)) { + } else + warning(phv_ref.lineno, "tcam match field %s not a multiple of 4 bits", + phv_ref.desc().c_str()); + } + tcam.emplace(sl, std::make_pair(exact.at(sl), + match_element{new Phv::Ref(phv_ref), off, sl->size()})); + exact.erase(sl); + } else { + exact.emplace(sl, match_element{new Phv::Ref(phv_ref), off, sl->size()}); + } + off += sl.size(); + } + for (auto e : exact) + for (auto t : tcam) + if (e.first.overlaps(t.first)) + error(e.second.field->lineno, "%s overlaps %s in atcam match", + e.second.field->desc().c_str(), t.second.first.field->desc().c_str()); + if (error_count > 0) return; + + /* for the tcam pairs, treat first as s0q1 and second as s1q0 */ + for (auto &el : Values(tcam)) { + s0q1[el.first.offset] = el.first; + s1q0[el.second.offset] = el.second; + } + /* now find the bits in each group that match with the tcam pairs, ensure that they + * are nibble-aligned, and setup the nibble masks */ + for (unsigned i = 0; i < format->groups(); i++) { + if (Format::Field *match = format->field("match", i)) { + setup_nibble_mask(match, i, s0q1, s0q1_nibbles); + setup_nibble_mask(match, i, s1q0, s1q0_nibbles); + if (!(s0q1_nibbles & s1q0_nibbles).empty()) + error(format->lineno, "Cannot determine if a ternary nibble is s0q1 or s1q0"); + } else { + error(format->lineno, "no 'match' field in format group %d", i); + } + } +} + +void AlgTcamMatchTable::pass2() { + LOG1("### ATCAM match table " << name() << " pass2 " << loc()); + if (logical_id < 0) choose_logical_id(); + for (auto &ixb : input_xbar) ixb->pass2(); + setup_word_ixbar_group(); + ixbar_subgroup.resize(word_ixbar_group.size()); + ixbar_mask.resize(word_ixbar_group.size()); + // FIXME -- need a method of specifying these things in the asm code? + // FIXME -- should at least check that these are sane + for (unsigned i = 0; i < word_ixbar_group.size(); ++i) { + if (word_ixbar_group[i] < 0) { + // Word with no match data, only version/valid; used for direct lookup + // tables -- can it happen with an atcam table? + continue; + } + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + bitvec ixbar_use = input_xbar[0]->hash_group_bituse(word_ixbar_group[i]); + // Which 10-bit address group to use for this word -- use the lowest one with + // a bit set in the hash group. Can it be different for different words? + ixbar_subgroup[i] = ixbar_use.min().index() / EXACT_HASH_ADR_BITS; + // Assume that any hash bits usuable for select are used for select + ixbar_mask[i] = ixbar_use.getrange(EXACT_HASH_FIRST_SELECT_BIT, EXACT_HASH_SELECT_BITS); + } + if (actions) actions->pass2(this); + if (action_bus) action_bus->pass2(this); + if (gateway) gateway->pass2(); + if (idletime) idletime->pass2(); + if (format) format->pass2(this); + for (auto &hd : hash_dist) hd.pass2(this); +} + +void AlgTcamMatchTable::pass3() { + LOG1("### ATCAM match table " << name() << " pass3 " << loc()); + SRamMatchTable::pass3(); + if (action_bus) action_bus->pass3(this); +} + +template +void AlgTcamMatchTable::write_regs_vt(REGS ®s) { + LOG1("### ATCAM match table " << name() << " write_regs " << loc()); + SRamMatchTable::write_regs(regs); + + for (auto &row : layout) { + auto &rams_row = regs.rams.array.row[row.row]; + for (auto &ram : row.memunits) { + auto &way = way_map[ram]; + BUG_CHECK(ram.stage == INT_MIN && ram.row == row.row, "bogus %s in row %d", ram.desc(), + row.row); + auto &ram_cfg = rams_row.ram[ram.col]; + ram_cfg.match_nibble_s0q1_enable = version_nibble_mask.getrange(way.word * 32U, 32) & + ~s1q0_nibbles.getrange(way.word * 32U, 32); + ram_cfg.match_nibble_s1q0_enable = + 0xffffffffUL & ~s0q1_nibbles.getrange(way.word * 32U, 32); + } + } +} + +std::unique_ptr AlgTcamMatchTable::gen_memory_resource_allocation_tbl_cfg() const { + if (col_priority_way.size() == 0) + error(lineno, "No column priority determined for table %s", name()); + unsigned fmt_width = format ? (format->size + 127) / 128 : 0; + json::vector mras; + for (auto &entry : col_priority_way) { + json::map mra; + mra["column_priority"] = entry.first; + json::vector mem_units; + json::vector &mem_units_and_vpns = mra["memory_units_and_vpns"] = json::vector(); + auto &way = ways[entry.second]; + unsigned vpn_ctr = 0; + for (auto &ram : way.rams) { + if (mem_units.empty()) + vpn_ctr = layout_get_vpn(ram); + else + BUG_CHECK(vpn_ctr == layout_get_vpn(ram)); + mem_units.push_back(json_memunit(ram)); + if (mem_units.size() == fmt_width) { + json::map tmp; + tmp["memory_units"] = std::move(mem_units); + mem_units = json::vector(); + json::vector vpns; + // Because the entries in the context JSON are reversed, the VPNs have to + // be reversed as well + for (unsigned i = 0; i < format->groups(); i++) { + vpns.push_back(vpn_ctr + format->groups() - 1 - i); + } + vpn_ctr += format->groups(); + tmp["vpns"] = std::move(vpns); + mem_units_and_vpns.push_back(std::move(tmp)); + } + } + BUG_CHECK(mem_units.empty()); + mras.push_back(std::move(mra)); + } + return json::mkuniq(std::move(mras)); +} + +std::string AlgTcamMatchTable::get_match_mode(const Phv::Ref &pref, int offset) const { + for (auto &p : s0q1) { + if ((p.first == offset) && (*p.second.field == pref)) return "s0q1"; + } + for (auto &p : s1q0) { + if ((p.first == offset) && (*p.second.field == pref)) return "s1q0"; + } + return "unused"; +} + +void AlgTcamMatchTable::gen_unit_cfg(json::vector &units, int size) const { + json::map tbl; + tbl["direction"] = P4Table::direction_name(gress); + tbl["handle"] = + p4_table ? is_alpm() ? p4_table->get_alpm_atcam_table_handle() : p4_table->get_handle() : 0; + tbl["name"] = name(); + tbl["size"] = size; + tbl["table_type"] = "match"; + json::map &stage_tbl = + *add_common_sram_tbl_cfgs(tbl, "algorithmic_tcam_unit", "algorithmic_tcam_match"); + // Assuming atcam next hit table cannot be multiple tables + stage_tbl["default_next_table"] = + !hit_next.empty() ? hit_next[0].next_table_id() : Target::END_OF_PIPE(); + stage_tbl["memory_resource_allocation"] = gen_memory_resource_allocation_tbl_cfg(); + // Hash functions not necessary currently for ATCAM matches, as the result comes from + // the partition_field_name + stage_tbl["hash_functions"] = json::vector(); + add_pack_format(stage_tbl, format.get(), false); + units.push_back(std::move(tbl)); +} + +bool AlgTcamMatchTable::has_directly_attached_synth2port() const { + auto mt = this; + if (auto a = mt->get_attached()) { + if (a->selector && is_directly_referenced(a->selector)) return true; + for (auto &m : a->meters) { + if (is_directly_referenced(m)) return true; + } + for (auto &s : a->stats) { + if (is_directly_referenced(s)) return true; + } + for (auto &s : a->statefuls) { + if (is_directly_referenced(s)) return true; + } + } + return false; +} + +void AlgTcamMatchTable::gen_alpm_cfg(json::map &tbl) const { + tbl["default_action_handle"] = get_default_action_handle(); + tbl["action_profile"] = action_profile(); + // FIXME -- setting next_table_mask unconditionally only works because we process the + // stage table in stage order (so we'll end up with the value from the last stage table, + // which is what we want.) Should we check in case the ordering ever changes? + tbl["default_next_table_mask"] = next_table_adr_mask; + // FIXME -- the driver currently always assumes this is 0, so we arrange for it to be + // when choosing the action encoding. But we should be able to choose something else + tbl["default_next_table_default"] = 0; + // FIXME-JSON: PD related, check glass examples for false (ALPM) + tbl["is_resource_controllable"] = true; + tbl["uses_range"] = false; + if (p4_table && p4_table->disable_atomic_modify) tbl["disable_atomic_modify"] = true; + tbl["ap_bind_indirect_res_to_match"] = json::vector(); + tbl["static_entries"] = json::vector(); + if (context_json) { + add_json_node_to_table(tbl, "ap_bind_indirect_res_to_match"); + } + LOG1("populate alpm " << name()); + // FIXME-DRIVER + // 'actions' and 'table_refs' on the alpm are redundant as they are + // already present in the atcam table. These should probably be cleaned + // up from the context json and driver parsing. + if (actions) { + actions->gen_tbl_cfg(tbl["actions"]); + } else if (action && action->actions) { + action->actions->gen_tbl_cfg(tbl["actions"]); + } + add_all_reference_tables(tbl); + json::map &alpm_match_attributes = tbl["match_attributes"]; + alpm_match_attributes["max_subtrees_per_partition"] = max_subtrees_per_partition; + alpm_match_attributes["partition_field_name"] = get_partition_field_name(); + alpm_match_attributes["lpm_field_name"] = get_lpm_field_name(); + alpm_match_attributes["bins_per_partition"] = bins_per_partition; + alpm_match_attributes["atcam_subset_width"] = atcam_subset_width; + alpm_match_attributes["shift_granularity"] = shift_granularity; + if (context_json) { + add_json_node_to_table(alpm_match_attributes, "excluded_field_msb_bits"); + } + auto pa_hdl = get_partition_action_handle(); + // Throw an error if partition action handle is not set. The alpm + // pre-classifier should have a single action which sets the partition + // handle. If no handle is present, it is either not generated by the + // compiler or assembler is not able to find it within actions. In + // either case this is a problem as driver will error out + if (pa_hdl.empty()) + error(lineno, "Cannot find partition action handle for ALPM table %s", name()); + // backward-compatible mode + if (pa_hdl.size() == 1) { + alpm_match_attributes["set_partition_action_handle"] = *pa_hdl.begin(); + } else { + json::vector &action_handles = alpm_match_attributes["set_partition_action_handle"] = + json::vector(); + for (auto hdl : pa_hdl) action_handles.push_back(hdl); + } + alpm_match_attributes["stage_tables"] = json::vector(); +} + +void AlgTcamMatchTable::gen_tbl_cfg(json::vector &out) const { + json::map *atcam_tbl_ptr; + unsigned number_entries = get_number_entries(); + if (is_alpm()) { + // Add ALPM ATCAM config to ALPM table (generated by pre-classifier in + // previous ostage) + json::map *alpm_tbl_ptr = base_tbl_cfg(out, "match", number_entries); + if (!alpm_tbl_ptr) { + error(lineno, "No alpm table generated by alpm pre-classifier"); + return; + } + json::map &alpm_tbl = *alpm_tbl_ptr; + gen_alpm_cfg(alpm_tbl); + json::map &alpm_match_attributes = alpm_tbl["match_attributes"]; + json::map &atcam_tbl = alpm_match_attributes["atcam_table"]; + base_alpm_atcam_tbl_cfg(atcam_tbl, "match", number_entries); + atcam_tbl_ptr = &atcam_tbl; + } else { + atcam_tbl_ptr = base_tbl_cfg(out, "match", number_entries); + } + json::map &tbl = *atcam_tbl_ptr; + common_tbl_cfg(tbl); + json::map &match_attributes = tbl["match_attributes"]; + match_attributes["match_type"] = "algorithmic_tcam"; + if (actions) { + actions->gen_tbl_cfg(tbl["actions"]); + } else if (action && action->actions) { + action->actions->gen_tbl_cfg(tbl["actions"]); + } + json::vector &units = match_attributes["units"]; + gen_unit_cfg(units, number_entries); + match_attributes["number_partitions"] = number_partitions; + match_attributes["partition_field_name"] = partition_field_name; + add_all_reference_tables(tbl); + if (units.size() > 1 && has_directly_attached_synth2port()) + error(lineno, + "The ability to split directly addressed counters/meters/stateful " + "resources across multiple logical tables of an algorithmic tcam match table " + "is not currently supported."); + // Empty stage table node in atcam. These are moved inside the + // units->MatchTable->stage_table node + match_attributes["stage_tables"] = json::vector(); +} + +DEFINE_TABLE_TYPE(AlgTcamMatchTable) diff --git a/backends/tofino/bf-asm/attached_table.cpp b/backends/tofino/bf-asm/attached_table.cpp new file mode 100644 index 00000000000..921bd97f4f5 --- /dev/null +++ b/backends/tofino/bf-asm/attached_table.cpp @@ -0,0 +1,524 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "action_bus.h" +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "input_xbar.h" +#include "instruction.h" +#include "lib/algorithm.h" +#include "misc.h" + +void AttachedTable::pass1() { + if (default_action.empty()) default_action = get_default_action(); + // Per Flow Enable - Validate and Set pfe and address bits + if (per_flow_enable_param == "false") per_flow_enable = false; + + if (!Target::SUPPORT_OVERFLOW_BUS() && stage->overflow_bus_use[7]) + error(layout[0].lineno, "table %s, %s has no overflow bus between logical row 7 and 8", + name(), Target::name()); +} + +unsigned AttachedTable::per_flow_enable_bit(MatchTable *m) const { + if (!per_flow_enable || per_flow_enable_param.empty()) return 0; + unsigned pfe_bit = 0; + if (m) { + auto addr = m->find_address_field(this); + auto address_bits = addr ? addr->size : 0; + if (auto f = m->lookup_field(per_flow_enable_param)) { + // Get pfe bit position from format entry + // This value is then adjusted based on address + if (f->size == 1) + pfe_bit = f->bit(0); + else + error(lineno, "pfe bit %s is not a 1 bit in table %s format", + per_flow_enable_param.c_str(), m->name()); + if (addr) + pfe_bit -= addr->bit(0); + else + pfe_bit = 0; // we use the primary shift to get at the pfe bit + } else if (per_flow_enable_param == "true" && addr) { + pfe_bit = addr->bit(addr->size - 1) - addr->bit(0) + default_pfe_adjust(); + } else { + // FIXME -- should be an error, but the compiler can hit this for a shared attached + // table that is defaulted in one match table and in the overhead in another. We + // should no longer be generating code that tries to set per_flow_enable: in the + // attached table (it should be in the call in the match table) at all, but we still + // have issues? Comments in the compiler indicate those should go away + // and this can be an error again. + warning(lineno, "can't find per_flow_enable param %s in format for %s", + per_flow_enable_param.c_str(), m->name()); + } + } else { + for (auto mt : match_tables) { + auto bit = per_flow_enable_bit(mt); + if (bit && pfe_bit && bit != pfe_bit) { + // this should be ok, but the driver can't handle it currently + warning(lineno, + "pfe_bit %s at different locations in different match tables," + " which will cause driver problems", + per_flow_enable_param.c_str()); + } else { + pfe_bit = bit; + } + } + } + return pfe_bit; +} + +// --------------- +// Meter ALU | Row +// Used | +// --------------- +// 0 | 1 +// 1 | 3 +// 2 | 5 +// 3 | 7 +// --------------- +void AttachedTable::add_alu_index(json::map &stage_tbl, std::string alu_index) const { + if (layout.size() <= 0) + error(lineno, "Invalid meter alu setup. A meter ALU should be allocated for table %s", + name()); + stage_tbl[alu_index] = get_alu_index(); +} + +SelectionTable *AttachedTable::get_selector() const { + SelectionTable *rv = nullptr; + for (auto *mtab : match_tables) { + auto *sel = mtab->get_selector(); + if (sel && rv && rv != sel) return nullptr; // inconsistent + if (sel) rv = sel; + } + return rv; +} + +SelectionTable *AttachedTables::get_selector() const { + if (selector) return dynamic_cast(static_cast
(selector)); + return nullptr; +} + +StatefulTable *AttachedTable::get_stateful() const { + StatefulTable *rv = nullptr; + for (auto *mtab : match_tables) { + auto *s = mtab->get_stateful(); + if (s && rv && rv != s) return nullptr; // inconsistent + if (s) rv = s; + } + return rv; +} + +StatefulTable *AttachedTables::get_stateful(std::string name) const { + for (auto &s : statefuls) { + if (name == s->name() || name.empty()) + return dynamic_cast(static_cast
(s)); + } + return nullptr; +} + +MeterTable *AttachedTable::get_meter() const { + MeterTable *rv = nullptr; + for (auto *mtab : match_tables) { + auto *m = mtab->get_meter(); + if (m && rv && rv != m) return nullptr; // inconsistent + if (m) rv = m; + } + return rv; +} + +MeterTable *AttachedTables::get_meter(std::string name) const { + for (auto &s : meters) { + if (name == s->name() || name.empty()) + return dynamic_cast(static_cast
(s)); + } + return nullptr; +} + +Table::Format::Field *AttachedTables::find_address_field(const AttachedTable *tbl) const { + if (selector == tbl && selector.args.size() > 0) return selector.args.at(0).field(); + for (auto &s : stats) + if (s == tbl && s.args.size() > 0) return s.args.at(0).field(); + for (auto &m : meters) + if (m == tbl && m.args.size() > 0) return m.args.at(0).field(); + for (auto &s : statefuls) + if (s == tbl) { + if (s.args.size() > 1) { + return s.args.at(1).field(); + } else if (s.args.size() > 0) { + // this special case is a hack in case we're calling this before + // pass1 has run on the match table with these attached tables + auto *f = s.args.at(0).field(); + if (f && f->size > 3) return f; + } + } + return nullptr; +} + +bool AttachedTables::run_at_eop() { + if (meters.size() > 0) return true; + for (auto &s : stats) + if (s->run_at_eop()) return true; + return false; +} + +bitvec AttachedTables::compute_reachable_tables() const { + bitvec rv; + if (selector) rv |= selector->reachable_tables(); + if (selector_length) rv |= selector->reachable_tables(); + for (auto &t : stats) rv |= t->reachable_tables(); + for (auto &t : meters) rv |= t->reachable_tables(); + for (auto &t : statefuls) rv |= t->reachable_tables(); + return rv; +} + +unsigned AttachedTable::determine_meter_shiftcount(Table::Call &call, int group, int word, + int tcam_shift) const { + if (call.args[0].name() && strcmp(call.args[0].name(), "$DIRECT") == 0) { + return direct_shiftcount() + tcam_shift; + } else if (auto f = call.args[0].field()) { + BUG_CHECK(int(f->by_group[group]->bit(0) / 128U) == word); + return f->by_group[group]->bit(0) % 128U + indirect_shiftcount(); + } else if (auto f = call.args[1].field()) { + return f->by_group[group]->bit(0) % 128U + METER_ADDRESS_ZERO_PAD; + } else if (auto f = call.args[2].field()) { + return f->by_group[group]->bit(0) % 128U + METER_ADDRESS_ZERO_PAD; + } else { + return 0; + } +} + +/** + * In match merge, addresses are generated from result buses containing match overhead. + * These buses (83 bits = 64 bits of RAM line + 19 bits of direct address) are sent through + * format and merge to potentially generate addresses for meters, counters, action data, + * etc. + * + * The addresses for meter/selector/stateful alu, counter, idletime, and action data + * have very similar setups, and will described in the section below. But generally + * the address can be formulated in 3 steps. + * + * 1. The 83 bit bus is right shifted to get the bits corresponding to the address. + * 2. This value is ANDed with a mask to pull only the relevant bits + * 3. The value is ORed with a default register to enable certain bits + * + * This is commonly referred to as shift-mask-default, and will happen for all of + * these addresses if necessary. + * + * The addresses are built up of 2 or 3 general pieces. + * + * 1. The RAM line location - which RAM/RAM line to look up the address. This will + * potentially contain a RAM line, a VPN, and Huffman bits. + * 2. A per flow enable bit - a bit to enable the associated table to run or not + * 3. A meter type - Specifically only for the meter_adr users (selectors, stateful, + * meter). Will indicate to the meter alu what particular instruction to run. + * + * The following portion will describe the registers required to build these addresses: + * + * 1. *_payload_shifter_en - will enable the address to be generated if set to true, i.e. + * if a match table does not have a counter, then the associated stats_payload_shifter_en + * will not be enabled. + * + * 2. *_exact/_tcam_shiftcount - the right shift per tind/exact match result bus. + * Addresses themselves can have a certain number of bits appended to the lsb, so + * the number of appended bits has to appear in the shiftcount + * + * 3. *_mask - the post shift AND mask of the relevant address bits from match overhead + * + * 4. *_default - the post mask OR value. Potentially useful for per flow enable bits/ + * meter types that are identical for every action + * + * 5. *_per_entry_mux_ctl - the post shift position of the per flow enable bit, if that + * bit is contained in overhead. This is always ORed in, separate from default + * + * 6. _type_position - only relevant for meter address users. This is the lsb of the + * meter type position if the meter position is in overhead. Note that if this register + * is used, then the meter type must be included in the mask. + * + * The purpose of the function of the determine_merge_regs is to look at the arguments of the + * call for an attached table, and use those to determine the values of these registers. + */ +void AttachedTable::determine_meter_merge_regs(MatchTable *match, int type, int bus, + const std::vector &args, + METER_ACCESS_TYPE default_type, unsigned &adr_mask, + unsigned &per_entry_en_mux_ctl, + unsigned &adr_default, + unsigned &meter_type_position) { + adr_mask = 0; + per_entry_en_mux_ctl = 0; + adr_default = 0; + meter_type_position = 0; + + int max_ptr_bits = EXACT_VPN_BITS + EXACT_WORD_BITS; + if (match->to()) max_ptr_bits = TCAM_VPN_BITS + TCAM_WORD_BITS; + + unsigned max_address = (1U << METER_ADDRESS_BITS) - 1; + BUG_CHECK((args.size() == 2 && default_type == METER_COLOR_ACCESS) || args.size() == 3, + "wrong size for meter args"); + if (args[0] == "$DIRECT") { + adr_mask |= (((1U << max_ptr_bits) - 1) << address_shift()) & max_address; + } else if (auto addr = args[0].field()) { + adr_mask |= (((1U << addr->size) - 1) << address_shift()) & max_address; + } + + if (args[1].name() && strcmp(args[1].name(), "$DEFAULT") == 0) { + adr_default |= (1 << METER_PER_FLOW_ENABLE_START_BIT); + } else if (auto pfe_field = args[1].field()) { + if (auto addr_field = args[0].field()) { + per_entry_en_mux_ctl = pfe_field->bit(0) - addr_field->bit(0) + address_shift(); + } else if (args[0].hash_dist() || args[0].count_mode()) { + per_entry_en_mux_ctl = 0; + } + } + + if (default_type == METER_COLOR_ACCESS) { + // meter color access -- has no meter type + } else if (args[2].name() && strcmp(args[2].name(), "$DEFAULT") == 0) { + adr_default |= default_type << METER_TYPE_START_BIT; + } else if (auto type_field = args[2].field()) { + if (auto addr_field = args[0].field()) { + meter_type_position = type_field->bit(0) - addr_field->bit(0) + address_shift(); + } else if (args[0].hash_dist() || args[0].count_mode()) { + if (auto pfe_field = args[1].field()) { + meter_type_position = type_field->bit(0) - pfe_field->bit(0); + } + } else { + meter_type_position = 0; + } + adr_mask |= ((1 << METER_TYPE_BITS) - 1) << METER_TYPE_START_BIT; + } +} + +const Table::Call *AttachedTables::get_call(const Table *tbl) const { + if (selector == tbl) return &selector; + for (auto &s : stats) + if (s == tbl) return &s; + for (auto &m : meters) + if (m == tbl) return &m; + for (auto &s : statefuls) + if (s == tbl) return &s; + return nullptr; +} + +/** + * Currently a call for an attached table (currently for counters/meters/stateful alus/selectors) + * is built up of a 2 part address/3 part address consisting of 3 parameters: + * + * 1. The location of the address + * 2. The location of the per flow enable bit + * 3. The location of the meter type (if necessary) + * + * Currently these locations can be: + * - Names that appear in the format of the table + * - For address location, a hash distribution unit + * - For address a $DIRECT keyword for a directly addressed table + * - For pfe and meter type, a $DEFAULT keyword indicating that the value is ORed in through + * the default register + * + * This function is responsible for validating this. Perhaps, in the future, we can have arguments + * both contain potential SHIFTs and ORs that can be interpreted by the registers + */ +bool AttachedTable::validate_call(Table::Call &call, MatchTable *self, size_t required_args, + int hash_dist_type, Table::Call &first_call) { + if (!self) return false; + if (call->stage != self->stage) { + error(call.lineno, "%s not in same stage as %s", call->name(), self->name()); + return false; + } else if (call->gress != self->gress) { + if (!(call->to() && + timing_thread(call->gress) == timing_thread(self->gress))) { + error(call.lineno, "%s not in same thread as %s", call->name(), self->name()); + return false; + } + } else if (call.args != first_call.args) { + error(call.lineno, + "All calls for the same address type must be identical, and " + "are not for %s and %s", + call->name(), first_call->name()); + } + + if (call.args.size() != required_args) { + error(call.lineno, "%s requires exactly %zu arguments", call->name(), required_args); + return false; + } + + if (call.args.size() == 0) return true; + if (call.args[0].name()) { + if (strcmp(call.args[0].name(), "$DIRECT") != 0) { + error(call.lineno, "Index %s for %s cannot be found", call.args[0].name(), + call->name()); + return false; + } + } else if (call.args[0].hash_dist()) { + call.args[0].hash_dist()->xbar_use |= hash_dist_type; + } else if (call.args[0].type == Table::Call::Arg::Counter) { + auto *salu = call->to(); + if (salu == nullptr) { + error(call.lineno, + "Index for %s cannot be a stateful counter, as it is not a " + "stateful alu", + call->name()); + return false; + } + salu->set_counter_mode(call.args[0].count_mode()); + + } else if (!call.args[0].field()) { + error(call.lineno, "Index for %s cannot be understood", call->name()); + } + + if (call.args.size() == 1) return true; + + if (call.args[1].name()) { + if (strcmp(call.args[1].name(), "$DEFAULT") != 0) { + error(call.lineno, "Per flow enable %s for %s cannot be found", call.args[1].name(), + call->name()); + return false; + } + } else if (!call.args[1].field()) { + error(call.lineno, "Per flow enable for %s cannot be understood", call->name()); + return false; + } + + if (call.args.size() == 2) return true; + + if (call.args[2].name()) { + if (strcmp(call.args[2].name(), "$DEFAULT") != 0) { + error(call.lineno, "Meter type %s for %s cannot be found", call.args[2].name(), + call->name()); + return false; + } + } else if (!call.args[2].field()) { + error(call.lineno, "Meter type for %s cannot be understood", call->name()); + return false; + } + return true; +} + +void AttachedTables::pass0(MatchTable *self) { + if (selector.check() && selector->set_match_table(self, true) != Table::SELECTION) + error(selector.lineno, "%s is not a selection table", selector->name()); + for (auto &s : stats) { + bool direct = false; + if (s.check() && s->set_match_table(self, !s.is_direct_call()) != Table::COUNTER) + error(s.lineno, "%s is not a counter table", s->name()); + } + for (auto &m : meters) + if (m.check()) { + auto type = m->set_match_table(self, !m.is_direct_call() > 0); + if (type != Table::METER && type != Table::STATEFUL) + error(m.lineno, "%s is not a meter table", m->name()); + } + for (auto &s : statefuls) { + if (!s.check()) continue; + if (s->set_match_table(self, !s.is_direct_call()) != Table::STATEFUL) + error(s.lineno, "%s is not a stateful table", s->name()); + } +} + +void AttachedTables::pass1(MatchTable *self) { + if (selector) { + selector->validate_call(selector, self, 3, HashDistribution::METER_ADDRESS, selector); + if (selector_length && selector_length->name() == selector->name()) { + selector_length->to()->validate_length_call(selector_length); + } else { + error(selector.lineno, + "Must provide selector length information when a selector " + "is called"); + } + } + for (auto &s : stats) { + if (s) { + s->validate_call(s, self, 2, HashDistribution::STATISTICS_ADDRESS, stats[0]); + } + } + + bool color_mapram_req = false; + for (auto &m : meters) { + if (m) { + m->validate_call(m, self, 3, HashDistribution::METER_ADDRESS, meters[0]); + if (m->uses_colormaprams()) color_mapram_req = true; + } + } + + if (color_mapram_req) { + if (meter_color) { + meter_color->validate_call(meter_color, self, 2, HashDistribution::STATISTICS_ADDRESS, + meter_color); + } else { + error(meters[0].lineno, + "Must provide a meter color mapram call when a meter " + "required color maprams is called"); + } + } + + for (auto &s : statefuls) { + if (s) { + s->validate_call(s, self, 3, HashDistribution::METER_ADDRESS, statefuls[0]); + } + } +} + +int AttachedTable::json_memunit(const MemUnit &r) const { + if (r.stage >= 0) { + return r.stage * Target::SRAM_STRIDE_STAGE() + r.row * Target::SRAM_STRIDE_ROW() + + r.col * Target::SRAM_STRIDE_COLUMN(); + } else if (r.row >= 0) { + // per-stage logical sram + return r.row * Target::SRAM_LOGICAL_UNITS_PER_ROW() + r.col; + } else { + // lamb + return r.col; + } +} + +template +void AttachedTables::write_merge_regs(REGS ®s, MatchTable *self, int type, int bus) { + for (auto &s : stats) s->write_merge_regs(regs, self, type, bus, s.args); + for (auto &m : meters) { + m->write_merge_regs(regs, self, type, bus, m.args); + if (m->uses_colormaprams()) { + if (meter_color) + m->to()->write_color_regs(regs, self, type, bus, meter_color.args); + else + m->to()->write_color_regs(regs, self, type, bus, m.args); + } + } + for (auto &s : statefuls) s->write_merge_regs(regs, self, type, bus, s.args); + if (auto s = get_selector()) s->write_merge_regs(regs, self, type, bus, selector.args); +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void AttachedTables::write_merge_regs, + mau_regs &, MatchTable *, int, int) + +template +void AttachedTables::write_tcam_merge_regs(REGS ®s, MatchTable *self, int bus, int tcam_shift) { + auto &merge = regs.rams.match.merge; + for (auto &st : stats) { + merge.mau_stats_adr_tcam_shiftcount[bus] = st->determine_shiftcount(st, 0, 0, tcam_shift); + break; + } + for (auto &m : meters) { + m->to()->setup_tcam_shift(regs, bus, tcam_shift, m, meter_color); + break; /* all must be the same, only config once */ + } + for (auto &s : statefuls) { + merge.mau_meter_adr_tcam_shiftcount[bus] = s->determine_shiftcount(s, 0, 0, tcam_shift); + break; /* all must be the same, only config once */ + } +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void AttachedTables::write_tcam_merge_regs, + mau_regs &, MatchTable *, int, int) diff --git a/backends/tofino/bf-asm/b2j.cpp b/backends/tofino/bf-asm/b2j.cpp new file mode 100644 index 00000000000..b6b345aa107 --- /dev/null +++ b/backends/tofino/bf-asm/b2j.cpp @@ -0,0 +1,48 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include "bson.h" + +int main(int ac, char **av) { + if (ac != 3) { + std::cerr << "usage " << av[0] << " " << std::endl; + return 1; + } + std::ifstream in(av[1]); + if (!in) { + std::cerr << "failed to open " << av[1] << std::endl; + return 1; + } + json::obj *data = nullptr; + if (!(in >> json::binary(data))) { + std::cerr << "failed to read bson" << std::endl; + return 1; + } + std::ofstream out(av[2]); + if (!out) { + std::cerr << "failed to open " << av[2] << std::endl; + return 1; + } + if (!(out << data)) { + std::cerr << "failed to write json" << std::endl; + return 1; + } + return 0; +} diff --git a/backends/tofino/bf-asm/bfas.cpp b/backends/tofino/bf-asm/bfas.cpp new file mode 100644 index 00000000000..94bd4476ed6 --- /dev/null +++ b/backends/tofino/bf-asm/bfas.cpp @@ -0,0 +1,623 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "bfas.h" + +#include + +#include + +#include +#include +#include +#include + +#include "backends/tofino/bf-asm/target.h" +#include "backends/tofino/bf-p4c/git_sha_version.h" // for BF_P4C_GIT_SHA +#include "backends/tofino/bf-p4c/version.h" +#include "constants.h" +#include "lib/indent.h" +#include "misc.h" +#include "parser-tofino-jbay.h" +#include "sections.h" +#include "top_level.h" + +#define MAJOR_VERSION 1 +#define MINOR_VERSION 0 + +const std::string SCHEMA_VERSION = CONTEXT_SCHEMA_VERSION; // NOLINT(runtime/string) + +option_t options = { + .binary = PIPE0, + .condense_json = true, + .debug_info = false, + .disable_egress_latency_padding = false, + .disable_gfm_parity = true, + .disable_long_branch = false, + .disable_power_gating = false, + .gen_json = false, + .high_availability_enabled = true, + .match_compiler = false, + .multi_parsers = true, // TODO Remove option after testing + .partial_input = false, + .singlewrite = true, + .stage_dependency_pattern = "", + .target = NO_TARGET, + .tof2lab44_workaround = false, + .version = CONFIG_OLD, + .werror = false, + .nowarn = false, + .log_hashes = false, + .output_dir = ".", + .num_stages_override = 0, + .tof1_egr_parse_depth_checks_disabled = false, +}; + +std::string asmfile_name; // NOLINT(runtime/string) +std::string asmfile_dir; // NOLINT(runtime/string) +std::string gfm_log_file_name = "mau.gfm.log"; // NOLINT(runtime/string) + +std::unique_ptr gfm_out; + +int log_error = 0; +extern char *program_name; + +/** + * @brief Maximum handle offset which can be used for table and parser handles. + * + * Selected bits in parser and table handles are dedicated to distinguish handles + * for different pipes. + * See comments in bf-asm/parser.h and bf-asm/p4_table.cpp to get more information + * about format of parser and table handles. + * Currently 4 bits are dedicated for pipe id. + */ +#define MAX_HANDLE_OFFSET 16 + +/** + * @brief Value OR-ed with table and parser handles to create unique handles. + * + * See comments in bf-asm/parser.h and bf-asm/p4_table.cpp to get more information + * about format of parser and table handles. + */ +unsigned unique_table_offset = 0; + +BaseAsmParser *asm_parser = nullptr; + +// Create target-specific section for parser +void createSingleAsmParser() { + if (asm_parser != nullptr) { + return; + } + asm_parser = new AsmParser; +} + +std::unique_ptr open_output(const char *name, ...) { + char namebuf[1024], *p = namebuf, *end = namebuf + sizeof(namebuf); + va_list args; + if (!options.output_dir.empty()) p += snprintf(p, end - p, "%s/", options.output_dir.c_str()); + va_start(args, name); + if (p < end) p += vsnprintf(p, end - p, name, args); + va_end(args); + if (p >= end) { + std::cerr << "File name too long: " << namebuf << "..." << std::endl; + snprintf(namebuf, sizeof(namebuf), "/dev/null"); + } + auto rv = std::unique_ptr(new std::ofstream(namebuf)); + if (!*rv) { + std::cerr << "Failed to open " << namebuf << " for writing: " << strerror(errno) + << std::endl; + } + return rv; +} + +std::string usage(std::string tfas) { + std::string u = "usage: "; + u.append(tfas); + u.append(" [-l:Mo:gqtvh] file..."); + return u; +} + +void output_all() { + auto targetName = "unknown"; + switch (options.target) { +#define SET_TOP_LEVEL(TARGET) \ + case Target::TARGET::tag: \ + new TopLevelRegs; \ + targetName = Target::TARGET::name; \ + break; + FOR_ALL_TARGETS(SET_TOP_LEVEL) + default: + std::cerr << "No target set" << std::endl; + error_count++; + return; + } + json::map ctxtJson; + const time_t now = time(NULL); + char build_date[1024]; + struct tm lt; + localtime_r(&now, <); + BUG_CHECK(<); + strftime(build_date, 1024, "%c", <); + ctxtJson["build_date"] = build_date; + ctxtJson["schema_version"] = SCHEMA_VERSION; + ctxtJson["compiler_version"] = BF_P4C_VERSION " (" BF_P4C_GIT_SHA ")"; + ctxtJson["target"] = targetName; + ctxtJson["program_name"] = asmfile_name; + ctxtJson["learn_quanta"] = json::vector(); + ctxtJson["parser"] = json::map(); + ctxtJson["phv_allocation"] = json::vector(); + ctxtJson["tables"] = json::vector(); + ctxtJson["mau_stage_characteristics"] = json::vector(); + ctxtJson["configuration_cache"] = json::vector(); + + Section::output_all(ctxtJson); + TopLevel::output_all(ctxtJson); + + json::map driver_options; + driver_options["hash_parity_enabled"] = !options.disable_gfm_parity; + driver_options["high_availability_enabled"] = options.high_availability_enabled; + if (options.target == TOFINO) + driver_options["tof1_egr_parse_depth_checks_disabled"] = + options.tof1_egr_parse_depth_checks_disabled; + ctxtJson["driver_options"] = std::move(driver_options); + + auto json_out = open_output("context.json"); + *json_out << &ctxtJson; + + delete TopLevel::all; +} + +void check_target_pipes(int pipe_id) { + if (pipe_id >= 0) { + if (pipe_id >= MAX_PIPE_COUNT) { + std::cerr << "Pipe number (" << pipe_id << ") exceeds implementation limit of pipes (" + << MAX_PIPE_COUNT << ")." << std::endl; + error_count++; + } else if (pipe_id < Target::NUM_PIPES()) { + options.binary = static_cast(PIPE0 + pipe_id); + } else { + std::cerr << "Pipe number (" << pipe_id << ") exceeds maximum number of pipes (" + << Target::NUM_PIPES() << ") for target " << Target::name() << "." + << std::endl; + error_count++; + } + } +} + +#define MATCH_TARGET_OPTION(TARGET, OPT) \ + if (!strcasecmp(OPT, Target::TARGET::name)) /* NOLINT(readability/braces) */ \ + options.target = Target::TARGET::tag; \ + else +#define OUTPUT_TARGET(TARGET) << " " << Target::TARGET::name + +// Do not build main() when BUILDING_FOR_GTEST. +#ifndef BUILDING_FOR_GTEST +int main(int ac, char **av) { + int srcfiles = 0; + const char *firstsrc = 0; + struct stat st; + bool asmfile = false; + bool disable_clog = true; + int pipe_id = -1; + extern void register_exit_signals(); + register_exit_signals(); + program_name = av[0]; + std::vector arguments(av, av + ac); + static std::set valid_noop_fill = {"and", "or", "alu_a", "alu_b", + "minu", "mins", "maxu", "maxs"}; + if (auto opt = getenv("BFAS_OPTIONS")) { + int add_at = 1; + while (auto p = strsep(&opt, " \t\r\n")) { + if (!*p) continue; + arguments.insert(arguments.begin() + add_at++, p); + } + av = &arguments[0]; + ac = arguments.size(); + } + for (int i = 1; i < ac; i++) { + int val, len; + if (av[i][0] == '-' && av[i][1] == 0) { + asm_parse_file("", stdin); + } else if (!strcmp(av[i], "--allpipes")) { + options.binary = FOUR_PIPE; + } else if (!strcmp(av[i], "--disable-egress-latency-padding")) { + options.disable_egress_latency_padding = true; + } else if (!strcmp(av[i], "--log-hashes")) { + options.log_hashes = true; + } else if (!strcmp(av[i], "--disable-longbranch")) { + options.disable_long_branch = true; + } else if (!strcmp(av[i], "--enable-longbranch")) { + if (options.target && Target::LONG_BRANCH_TAGS() == 0) { + error(-1, "target %s does not support --enable-longbranch", Target::name()); + options.disable_long_branch = true; + } else { + options.disable_long_branch = false; + } + } else if (!strcmp(av[i], "--gen_json")) { + options.gen_json = true; + options.binary = NO_BINARY; + } else if (!strcmp(av[i], "--high_availability_disabled")) { + options.high_availability_enabled = false; + } else if (!strcmp(av[i], "--no-condense")) { + options.condense_json = false; + } else if (!strcmp(av[i], "--no-bin")) { + options.binary = NO_BINARY; + } else if (!strcmp(av[i], "--no-warn")) { + options.nowarn = true; + } else if (!strcmp(av[i], "--old_json")) { + std::cerr << "Old context json is no longer supported" << std::endl; + error_count++; + } else if (!strcmp(av[i], "--partial")) { + options.partial_input = true; + } else if (sscanf(av[i], "--pipe%d%n", &val, &len) > 0 && !av[i][len] && val >= 0) { + pipe_id = val; + } else if (!strcmp(av[i], "--singlepipe")) { + options.binary = ONE_PIPE; + } else if (!strcmp(av[i], "--singlewrite")) { + options.singlewrite = true; + } else if (!strcmp(av[i], "--multi-parsers")) { + options.multi_parsers = true; + } else if (!strcmp(av[i], "--disable-tof2lab44-workaround")) { + options.tof2lab44_workaround = false; + } else if (!strcmp(av[i], "--tof2lab44-workaround")) { + options.tof2lab44_workaround = true; + } else if (!strcmp(av[i], "--stage_dependency_pattern")) { + ++i; + if (!av[i]) { + std::cerr << "No stage dependency pattern specified " << std::endl; + error_count++; + break; + } + options.stage_dependency_pattern = av[i]; + } else if (!strcmp(av[i], "--noop-fill-instruction")) { + ++i; + if (!av[i] || !valid_noop_fill.count(av[i])) { + std::cerr << "invalid fill instruction " << av[i] << std::endl; + } else { + options.fill_noop_slot = av[i]; + } + } else if (val = 0, sscanf(av[i], "--noop-fill-instruction=%n", &val), val > 0) { + if (!valid_noop_fill.count(av[i] + val)) { + std::cerr << "invalid fill instruction " << (av[i] + val) << std::endl; + } else { + options.fill_noop_slot = av[i] + val; + } + } else if (sscanf(av[i], "--table-handle-offset%d", &val) > 0 && val >= 0 && + val < MAX_HANDLE_OFFSET) { + unique_table_offset = val; + } else if (sscanf(av[i], "--num-stages-override%d", &val) > 0 && val >= 0) { + options.num_stages_override = val; + } else if (!strcmp(av[i], "--target")) { + ++i; + if (!av[i]) { + std::cerr << "No target specified '--target '" << std::endl; + error_count++; + break; + } + if (options.target != NO_TARGET) { + std::cerr << "Multiple target options" << std::endl; + error_count++; + break; + } + FOR_ALL_TARGETS(MATCH_TARGET_OPTION, av[i]) { + std::cerr << "Unknown target " << av[i] << std::endl; + error_count++; + std::cerr << "Supported targets:" FOR_ALL_TARGETS(OUTPUT_TARGET) << std::endl; + } + } else if (av[i][0] == '-' && av[i][1] == '-') { + FOR_ALL_TARGETS(MATCH_TARGET_OPTION, av[i] + 2) { + std::cerr << "Unrecognized option " << av[i] << std::endl; + error_count++; + } + } else if (av[i][0] == '-' || av[i][0] == '+') { + bool flag = av[i][0] == '+'; + for (char *arg = av[i] + 1; *arg;) switch (*arg++) { + case 'a': + options.binary = FOUR_PIPE; + break; + case 'C': + options.condense_json = true; + break; + case 'G': + options.gen_json = true; + options.binary = NO_BINARY; + break; + case 'g': + options.debug_info = true; + break; + case 'h': + std::cout << usage(av[0]) << std::endl; + return 0; + break; + case 'l': + ++i; + if (!av[i]) { + std::cerr << "No log file specified '-l '" << std::endl; + error_count++; + break; + } + disable_clog = false; + if (auto *tmp = new std::ofstream(av[i])) { + if (*tmp) { + /* FIXME -- tmp leaks, but if we delete it, the log + * redirect fails, and we crash on exit */ + std::clog.rdbuf(tmp->rdbuf()); + } else { + std::cerr << "Can't open " << av[i] << " for writing" << std::endl; + delete tmp; + } + } + break; + case 'M': + options.match_compiler = true; + options.condense_json = false; + break; + case 'o': + ++i; + if (!av[i]) { + std::cerr << "No output directory specified '-o '" + << std::endl; + error_count++; + break; + } + if (stat(av[i], &st)) { + if (mkdir(av[i], 0777) < 0) { + std::cerr << "Can't create output dir " << av[i] << ": " + << strerror(errno) << std::endl; + error_count++; + } + } else if (!S_ISDIR(st.st_mode)) { + std::cerr << av[i] << " exists and is not a directory" << std::endl; + error_count++; + } + options.output_dir = av[i]; + break; + case 'p': + options.disable_power_gating = true; + break; + case 'q': + std::clog.setstate(std::ios::failbit); + break; + case 's': + options.binary = ONE_PIPE; + break; + case 'T': + disable_clog = false; + if (*arg) { + Log::addDebugSpec(arg); + arg += strlen(arg); + } else if (++i < ac) { + Log::addDebugSpec(av[i]); + } + break; + case 't': + ++i; + if (!av[i]) { + std::cerr << "No target specified '-t '" << std::endl; + error_count++; + break; + } + if (options.target != NO_TARGET) { + std::cerr << "Multiple target options" << std::endl; + error_count++; + break; + } + FOR_ALL_TARGETS(MATCH_TARGET_OPTION, av[i]) { + std::cerr << "Unknown target " << av[i]; + error_count++; + } + break; + case 'v': + disable_clog = false; + Log::increaseVerbosity(); + break; + case 'W': + if (strcmp(arg, "error")) + options.werror = true; + else + std::cout << "Unknown warning option -W" << arg << std::endl; + arg += strlen(arg); + break; + default: + std::cerr << "Unknown option " << (flag ? '+' : '-') << arg[-1] + << std::endl; + error_count++; + } + } else if (FILE *fp = fopen(av[i], "r")) { + // asm_parse_file needs to know correct number of stages + if (options.num_stages_override) { + Target::OVERRIDE_NUM_MAU_STAGES(options.num_stages_override); + } + + createSingleAsmParser(); + + if (!srcfiles++) firstsrc = av[i]; + error_count += asm_parse_file(av[i], fp); + if (error_count > 0) return 1; + fclose(fp); + asmfile = true; + asmfile_name = get_filename(av[i]); + asmfile_dir = get_directory(av[i]); + } else { + std::cerr << "Can't read " << av[i] << ": " << strerror(errno) << std::endl; + error_count++; + } + } + + check_target_pipes(pipe_id); + + if (disable_clog) std::clog.setstate(std::ios_base::failbit); + if (!asmfile) { + std::cerr << "No assembly file specified" << std::endl; + error_count++; + } + if (error_count > 0) std::cerr << usage(av[0]) << std::endl; + + if (Log::verbosity() > 0) { + gfm_out = open_output("mau.gfm.log"); + } + + if (error_count == 0 && !options.partial_input) { + // Check if file has no sections + no_sections_error_exit(); + // Check if mandatory sections are present in assembly + bool no_section = false; + no_section |= no_section_error("deparser"); + no_section |= no_section_error("parser"); + no_section |= no_section_error("phv"); + no_section |= no_section_error("stage"); + if (no_section) exit(1); + } + if (error_count == 0) { + Section::process_all(); + } + if (error_count == 0) { + if (srcfiles == 1 && options.output_dir.empty()) { + if (const char *p = strrchr(firstsrc, '/')) + options.output_dir = p + 1; + else if (const char *p = strrchr(firstsrc, '\\')) + options.output_dir = p + 1; + else + options.output_dir = firstsrc; + if (const char *e = strrchr(&options.output_dir[0], '.')) + options.output_dir.resize(e - &options.output_dir[0]); + options.output_dir += ".out"; + if (stat(options.output_dir.c_str(), &st) ? mkdir(options.output_dir.c_str(), 0777) + : !S_ISDIR(st.st_mode)) + options.output_dir.clear(); + } + output_all(); + } + if (log_error > 0) warning(0, "%d config errors in log file", log_error); + return error_count > 0 || (options.werror && warn_count > 0) ? 1 : 0; +} +#endif /* !BUILDING_FOR_GTEST */ + +std::string toString(target_t target) { + switch (target) { + case TOFINO: + return "Tofino"; + case TOFINO2: + return "Tofino2"; + case TOFINO2H: + return "Tofino2H"; + case TOFINO2U: + return "Tofino2U"; + case TOFINO2M: + return "Tofino2M"; + case TOFINO2A0: + return "Tofino2A0"; + default: + BUG("Unexpected target value: 0x%x", target); + return ""; + } +} + +std::ostream &operator<<(std::ostream &out, target_t target) { return out << toString(target); } + +void no_sections_error_exit() { + if (Section::no_sections_in_assembly()) { + std::cerr << "No valid sections found in assembly file" << std::endl; + exit(1); + } +} + +bool no_section_error(const char *name) { + if (!Section::section_in_assembly(name)) { + std::cerr << "No '" << name << "' section found in assembly file" << std::endl; + return true; + } + return false; +} + +class Version : public Section { + Version() : Section("version") {} + + void input(VECTOR(value_t) args, value_t data) { + if (data.type == tINT || data.type == tVEC) { // version 1.0.0 + parse_version(data); + } else if (data.type == tMAP) { // version 1.0.1 + for (auto &kv : MapIterChecked(data.map, true)) { + if (kv.key == "version" && (kv.value.type == tVEC || kv.value.type == tINT)) { + parse_version(kv.value); + } else if (kv.key == "run_id" && kv.value.type == tSTR) { + _run_id = kv.value.s; + } else if (kv.key == "compiler") { + if (kv.value.type == tSTR) { + _compiler = kv.value.s; + } else if (kv.value.type == tINT) { + _compiler = std::to_string(kv.value.i); + } else if (kv.value.type == tVEC) { + const char *sep = ""; + for (auto &el : kv.value.vec) { + _compiler += sep; + if (el.type == tSTR) + _compiler += el.s; + else if (el.type == tINT) + _compiler += std::to_string(el.i); + else + error(el.lineno, "can't understand compiler version"); + sep = "."; + } + } + } else if (kv.key == "target") { + if (kv.value.type == tSTR) { + auto old = options.target; + FOR_ALL_TARGETS(MATCH_TARGET_OPTION, kv.value.s) { + error(kv.value.lineno, "Unknown target %s", kv.value.s); + } + if (old != NO_TARGET && old != options.target) { + options.target = old; + error(kv.value.lineno, "Inconsistent target %s (previously set to %s)", + kv.value.s, Target::name()); + } + createSingleAsmParser(); + } else { + error(kv.value.lineno, "Invalid target %s", value_desc(kv.value)); + } + } else { + warning(kv.key.lineno, "ignoring unknown item %s in version", + value_desc(kv.key)); + } + } + } else { + error(data.lineno, "Invalid version section"); + } + } + + void output(json::map &ctx_json) { + if (!_compiler.empty()) ctx_json["compiler_version"] = _compiler; + ctx_json["run_id"] = _run_id; + } + + private: + void parse_version(value_t data) { + if (data.type == tINT) { + if (data.i != MAJOR_VERSION) + error(data.lineno, "Version %" PRId64 " not supported", data.i); + } else if (data.vec.size >= 2) { + if (CHECKTYPE(data[0], tINT) && CHECKTYPE(data[1], tINT) && + (data[0].i != MAJOR_VERSION || data[1].i > MINOR_VERSION)) + error(data.lineno, "Version %" PRId64 ".%" PRId64 " not supported", data[0].i, + data[1].i); + } else { + error(data.lineno, "Version not understood"); + } + } + + std::string _run_id, _compiler; + static Version singleton_version; +} Version::singleton_version; diff --git a/backends/tofino/bf-asm/bfas.h b/backends/tofino/bf-asm/bfas.h new file mode 100644 index 00000000000..ed13b05d7b7 --- /dev/null +++ b/backends/tofino/bf-asm/bfas.h @@ -0,0 +1,182 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_BFAS_H_ +#define BACKENDS_TOFINO_BF_ASM_BFAS_H_ + +#include +#include +#include + +#include +#include +#include + +enum config_version_t { CONFIG_OLD = 1, CONFIG_NEW = 2, CONFIG_BOTH = 3 }; +enum target_t { + NO_TARGET = 0, + TOFINO, + TOFINO2, + JBAY = TOFINO2, + TOFINO2H, + TOFINO2U, + TOFINO2M, + TOFINO2A0, + TARGET_INDEX_LIMIT +}; +enum binary_type_t { + NO_BINARY = -3, + FOUR_PIPE = -2, // binary replicating to all 4 pipes + ONE_PIPE = -1, // binary for one pipe with pipe offset addresses + PIPE0 = 0, // binary with data just in pipe 0 + PIPE1, // binary with data just in pipe 1 + PIPE2, // binary with data just in pipe 2 + PIPE3, // binary with data just in pipe 3 + MAX_PIPE_COUNT, // Maximum number of pipes which bfas can create binary for +}; + +extern struct option_t { + binary_type_t binary; + bool condense_json; + bool debug_info; + bool disable_egress_latency_padding; + bool disable_gfm_parity; + bool disable_long_branch; + bool disable_power_gating; + bool gen_json; + bool high_availability_enabled; + bool match_compiler; + bool multi_parsers; + bool partial_input; + bool singlewrite; + std::string stage_dependency_pattern; + target_t target; + bool tof2lab44_workaround; + config_version_t version; + bool werror; + bool nowarn; + bool log_hashes; + std::string output_dir; + int num_stages_override; + bool tof1_egr_parse_depth_checks_disabled; + const char *fill_noop_slot; +} options; + +extern unsigned unique_action_handle; +struct value_t; + +extern std::string asmfile_name; +extern std::string asmfile_dir; +extern std::unique_ptr gfm_out; + +class BaseAsmParser; +extern BaseAsmParser *asm_parser; +void createSingleAsmParser(); + +std::string toString(target_t target); +std::ostream &operator<<(std::ostream &out, target_t target); + +int asm_parse_file(const char *name, FILE *in); +int asm_parse_string(const char *in); + +void no_sections_error_exit(); +bool no_section_error(const char *name); + +extern int error_count, warn_count; +extern void error(int lineno, const char *fmt, va_list); +void error(int lineno, const char *fmt, ...) __attribute__((format(printf, 2, 3))); +inline void error(int lineno, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + error(lineno, fmt, args); + va_end(args); +} +extern void warning(int lineno, const char *fmt, va_list); +void warning(int lineno, const char *fmt, ...) __attribute__((format(printf, 2, 3))); +inline void warning(int lineno, const char *fmt, ...) { +#ifdef BAREFOOT_INTERNAL + if (!options.nowarn) { + va_list args; + va_start(args, fmt); + warning(lineno, fmt, args); + va_end(args); + } +#endif /* BAREFOOT_INTERNAL */ +} + +inline const char *strip_prefix(const char *str, const char *pfx) { + if (const char *p = strstr(str, pfx)) return p + strlen(pfx); + return str; +} +void bug(const char *, int, const char * = 0, ...) __attribute__((format(printf, 3, 4))) +__attribute__((noreturn)); +inline void bug(const char *fname, int lineno, const char *fmt, ...) { +#ifdef NDEBUG + fprintf(stderr, "Assembler BUG"); +#else + fprintf(stderr, "%s:%d: Assembler BUG: ", fname, lineno); + if (fmt) { + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + } +#endif /* !NDEBUG */ + fprintf(stderr, "\n"); + fflush(stderr); + std::terminate(); +} + +extern std::unique_ptr open_output(const char *, ...) + __attribute__((format(printf, 1, 2))); + +#define SRCFILE strip_prefix(__FILE__, "bf-asm/") +#define BUG(...) \ + do { \ + bug(SRCFILE, __LINE__, ##__VA_ARGS__); \ + } while (0) +#define BUG_CHECK(e, ...) \ + do { \ + if (!(e)) BUG(__VA_ARGS__); \ + } while (0) + +class VersionIter { + unsigned left, bit; + void check() { + while (left && !(left & 1)) { + ++bit; + left >>= 1; + } + } + VersionIter() : left(0), bit(0) {} + + public: + explicit VersionIter(config_version_t v) : left(v), bit(0) { check(); } + VersionIter begin() { return *this; } + VersionIter end() { return VersionIter(); } + int operator*() const { return bit; } + bool operator==(VersionIter &a) { return (left << bit) == (a.left << a.bit); } + VersionIter &operator++() { + left &= ~1; + check(); + return *this; + } +}; + +extern unsigned unique_table_offset; + +#endif /* BACKENDS_TOFINO_BF_ASM_BFAS_H_ */ diff --git a/backends/tofino/bf-asm/bfdis.cpp b/backends/tofino/bf-asm/bfdis.cpp new file mode 100644 index 00000000000..bb675fb0d65 --- /dev/null +++ b/backends/tofino/bf-asm/bfdis.cpp @@ -0,0 +1,185 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include + +#include +#include + +#include "bson.h" +#include "disasm.h" +#include "fdstream.h" + +Disasm *disasm = nullptr; + +int read_bin(std::istream &in) { + uint32_t atom_typ = 0; + while (in.read((char *)&atom_typ, 4)) { + if ((atom_typ >> 24) == 'H') { + json::map hdr; + if (!(in >> json::binary(hdr))) return -1; + if (auto target = hdr["target"]) { + disasm = Disasm::create(target.to()); + } else { + std::cerr << "no target specified in the binary" << std::endl; + delete disasm; + disasm = nullptr; + } + } else if ((atom_typ >> 24) == 'C') { + // future context json embedding in binary + std::unique_ptr ctxt_json; + if (!(in >> json::binary(ctxt_json))) return -1; + } else if ((atom_typ >> 24) == 'P') { + uint32_t prsr_hdl = 0; + if (!in.read((char *)&prsr_hdl, 4)) return -1; + } else if ((atom_typ >> 24) == 'R') { + // R block -- writing a single 32-bit register via 32-bit PCIe address + uint32_t reg_addr = 0, reg_data = 0; + if (!in.read((char *)®_addr, 4)) return -1; + if (!in.read((char *)®_data, 4)) return -1; + if (disasm) disasm->input_binary(reg_addr, 'R', ®_data, 1); + } else if ((atom_typ >> 24) == 'B') { + // B block -- write a range of 32-bit registers via 64-bit PCIe address + // size of the range is specified as count * width (in bits), which must + // always be a multiple of 32 + + uint64_t addr = 0; + uint32_t count = 0; + uint32_t width = 0; + + if (!in.read((char *)&addr, 8)) return -1; + if (!in.read((char *)&width, 4)) return -1; + if (!in.read((char *)&count, 4)) return -1; + // printf("B%08" PRIx64 ": %xx%x", addr, width, count); + count = (uint64_t)count * width / 32; + std::vector data(count); + if (!in.read((char *)&data[0], count * 4)) return -1; + if (disasm) disasm->input_binary(addr, 'B', &data[0], count); + } else if ((atom_typ >> 24) == 'D') { + // D block -- write a range of 128-bit memory via 64-bit chip address + // size of the range is specified as count * width (in bits), which must + // always be a multiple of 64 + + uint64_t addr = 0; + uint32_t count = 0; + uint32_t width = 0; + + if (!in.read((char *)&addr, 8)) return -1; + if (!in.read((char *)&width, 4)) return -1; + if (!in.read((char *)&count, 4)) return -1; + // printf("D%011" PRIx64 ": %xx%x", addr, width, count); + width /= 8; + std::vector data(count * width / 4); + if (!in.read((char *)&data[0], count * width)) return -1; + if (disasm) disasm->input_binary(addr, 'D', &data[0], count * width / 4); + } else if ((atom_typ >> 24) == 'S') { + // S block -- 'scanset' writing multiple data to a single 32-bit PCIE address + uint64_t sel_addr = 0, reg_addr = 0; + uint32_t sel_data = 0, width = 0, count = 0; + + if (!in.read((char *)&sel_addr, 8)) return -1; + if (!in.read((char *)&sel_data, 4)) return -1; + if (!in.read((char *)®_addr, 8)) return -1; + if (!in.read((char *)&width, 4)) return -1; + if (!in.read((char *)&count, 4)) return -1; + count = (uint64_t)count * width / 32; + std::vector data(count); + if (!in.read((char *)&data[0], count * 4)) return -1; + if (disasm) disasm->input_binary(reg_addr, 'S', &data[0], count); + } else { + fprintf(stderr, "\n"); + fprintf(stderr, "Parse error: atom_typ=%x (%c)\n", atom_typ, atom_typ >> 24); + fprintf(stderr, "fpos=%" PRIu64 " <%" PRIx64 "h>\n", (uint64_t)in.tellg(), + (uint64_t)in.tellg()); + fprintf(stderr, "\n"); + + return -1; + } + } + + return in.eof() ? 0 : -1; +} + +int main(int ac, char **av) { + int error = 0; + for (int i = 1; i < ac; ++i) { + if (*av[i] == '-') { + for (char *arg = av[i] + 1; *arg;) switch (*arg++) { + case 'l': + ++i; + if (!av[i]) { + std::cerr << "No log file specified '-l '" << std::endl; + error_count++; + break; + } + if (auto *tmp = new std::ofstream(av[i])) { + if (*tmp) { + /* FIXME -- tmp leaks, but if we delete it, the log + * redirect fails, and we crash on exit */ + std::clog.rdbuf(tmp->rdbuf()); + } else { + std::cerr << "Can't open " << av[i] << " for writing" << std::endl; + delete tmp; + } + } + break; + case 'v': + Log::increaseVerbosity(); + break; + case 'T': + if (*arg) { + Log::addDebugSpec(arg); + arg += strlen(arg); + } else if (++i < ac) { + Log::addDebugSpec(av[i]); + } + break; + default: + fprintf(stderr, "ignoring argument -%c\n", *arg); + error = 1; + } + } else { + std::ifstream in(av[i], std::ios::binary); + if (!in) { + fprintf(stderr, "failed to open %s\n", av[i]); + error = 1; + continue; + } + unsigned char magic[4] = {}; + in.read((char *)magic, 4); + if (magic[0] == 0 && magic[3] && strchr("RDBH", magic[3])) { + in.seekg(0); + error |= read_bin(in); + } else if (magic[0] == 0x1f && magic[1] == 0x8b) { + if (auto *pipe = popen((std::string("zcat < ") + av[i]).c_str(), "r")) { + fdstream in(fileno(pipe)); + error |= read_bin(in); + pclose(pipe); + } else { + fprintf(stderr, "%s: Cannot open pipe to read\n", av[i]); + } + } else { + fprintf(stderr, "%s: Unknown file format\n", av[i]); + } + } + } + if (error == 1) fprintf(stderr, "usage: %s \n", av[0]); + return error; +} diff --git a/backends/tofino/bf-asm/bfdumpbin.cpp b/backends/tofino/bf-asm/bfdumpbin.cpp new file mode 100644 index 00000000000..078b9288758 --- /dev/null +++ b/backends/tofino/bf-asm/bfdumpbin.cpp @@ -0,0 +1,228 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include + +#include +#include + +#include "bson.h" +#include "fdstream.h" + +struct { + bool oneLine; + bool noHeader; + bool noCtxtJson; +} options; + +int dump_bin(std::istream &in) { + uint32_t atom_typ = 0; + while (in.read((char *)&atom_typ, 4)) { + if ((atom_typ >> 24) == 'H') { + json::map hdr; + if (!(in >> json::binary(hdr))) return -1; + if (!options.noHeader) + for (auto &el : hdr) std::cout << el.first << " = " << el.second << std::endl; + } else if ((atom_typ >> 24) == 'C') { + // future context json embedding in binary + std::unique_ptr ctxt_json; + if (!(in >> json::binary(ctxt_json))) return -1; + if (!options.noCtxtJson) std::cout << ctxt_json; + } else if ((atom_typ >> 24) == 'P') { + uint32_t prsr_hdl = 0; + if (!in.read((char *)&prsr_hdl, 4)) return -1; + printf("P: %08x (parser handle)\n", prsr_hdl); + } else if ((atom_typ >> 24) == 'R') { + // R block -- writing a single 32-bit register via 32-bit PCIe address + uint32_t reg_addr = 0, reg_data = 0; + if (!in.read((char *)®_addr, 4)) return -1; + if (!in.read((char *)®_data, 4)) return -1; + printf("R%08x: %08x\n", reg_addr, reg_data); + } else if ((atom_typ >> 24) == 'B') { + // B block -- write a range of 32-bit registers via 64-bit PCIe address + // size of the range is specified as count * width (in bits), which must + // always be a multiple of 32 + + uint64_t addr = 0; + uint32_t count = 0; + uint32_t width = 0; + + if (!in.read((char *)&addr, 8)) return -1; + if (!in.read((char *)&width, 4)) return -1; + if (!in.read((char *)&count, 4)) return -1; + printf("B%08" PRIx64 ": %xx%x", addr, width, count); + if ((uint64_t)count * width % 32 != 0) printf(" (not a multiple of 32 bits!)"); + count = (uint64_t)count * width / 32; + uint32_t data, prev; + int repeat = 0, col = 0; + for (unsigned i = 0; i < count; ++i) { + if (!in.read((char *)&data, 4)) return -1; + if (i != 0 && data == prev) { + repeat++; + continue; + } + if (repeat > 0) { + printf(" x%-7d", repeat + 1); + if (++col > 8) col = 0; + } + repeat = 0; + if (!options.oneLine && col++ % 8 == 0) printf("\n "); + printf(" %08x", prev = data); + } + if (repeat > 0) printf(" x%d", repeat + 1); + printf("\n"); + } else if ((atom_typ >> 24) == 'D') { + // D block -- write a range of 128-bit memory via 64-bit chip address + // size of the range is specified as count * width (in bits), which must + // always be a multiple of 64 + + uint64_t addr = 0; + uint32_t count = 0; + uint32_t width = 0; + + if (!in.read((char *)&addr, 8)) return -1; + if (!in.read((char *)&width, 4)) return -1; + if (!in.read((char *)&count, 4)) return -1; + printf("D%011" PRIx64 ": %xx%x", addr, width, count); + if ((uint64_t)count * width % 64 != 0) printf(" (not a multiple of 64 bits!)"); + + width /= 8; + + uint64_t chunk[2], prev_chunk[2]; + int repeat = 0, col = 0; + for (unsigned i = 0; i < count * width; i += 16) { + if (!in.read((char *)chunk, 16)) return -1; + if (i != 0 && chunk[0] == prev_chunk[0] && chunk[1] == prev_chunk[1]) { + repeat++; + continue; + } + if (repeat > 0) { + printf(" x%d", repeat + 1); + col = 0; + } + repeat = 0; + if (!options.oneLine && col++ % 2 == 0) printf("\n "); + printf(" %016" PRIx64 "%016" PRIx64, prev_chunk[1] = chunk[1], + prev_chunk[0] = chunk[0]); + } + + if (repeat > 0) { + printf(" x%d", repeat + 1); + col = 0; + } + + if (count * width % 16 == 8) { + if (!in.read((char *)chunk, 8)) return -1; + if (!options.oneLine && col % 2 == 0) printf("\n "); + printf(" %016" PRIx64, chunk[0]); + } + printf("\n"); + } else if ((atom_typ >> 24) == 'S') { + // S block -- 'scanset' writing multiple data to a single 32-bit PCIE address + uint64_t sel_addr = 0, reg_addr = 0; + uint32_t sel_data = 0, width = 0, count = 0; + + if (!in.read((char *)&sel_addr, 8)) return -1; + if (!in.read((char *)&sel_data, 4)) return -1; + if (!in.read((char *)®_addr, 8)) return -1; + if (!in.read((char *)&width, 4)) return -1; + if (!in.read((char *)&count, 4)) return -1; + printf("S%011" PRIx64 ": %x, %011" PRIx64 ": %xx%x", sel_addr, sel_data, reg_addr, + width, count); + if (width % 32 != 0) printf(" (not a multiple of 32 bits!)"); + count = (uint64_t)count * width / 32; + uint32_t data, prev; + int repeat = 0, col = 0; + for (unsigned i = 0; i < count; ++i) { + if (!in.read((char *)&data, 4)) return -1; + if (i != 0 && data == prev) { + repeat++; + continue; + } + if (repeat > 0) { + printf(" x%-7d", repeat + 1); + if (++col > 8) col = 0; + } + repeat = 0; + if (!options.oneLine && col++ % 8 == 0) printf("\n "); + printf(" %08x", prev = data); + } + if (repeat > 0) printf(" x%d", repeat + 1); + printf("\n"); + } else { + fprintf(stderr, "\n"); + fprintf(stderr, "Parse error: atom_typ=%x (%c)\n", atom_typ, atom_typ >> 24); + fprintf(stderr, "fpos=%" PRIu64 " <%" PRIx64 "h>\n", (uint64_t)in.tellg(), + (uint64_t)in.tellg()); + fprintf(stderr, "\n"); + + return -1; + } + } + + return in.eof() ? 0 : -1; +} + +int main(int ac, char **av) { + int error = 0; + for (int i = 1; i < ac; ++i) { + if (*av[i] == '-') { + for (char *arg = av[i] + 1; *arg;) switch (*arg++) { + case 'C': + options.noCtxtJson = true; + break; + case 'H': + options.noHeader = true; + break; + case 'L': + options.oneLine = true; + break; + default: + fprintf(stderr, "ignoring argument -%c\n", *arg); + error = 1; + } + } else { + std::ifstream in(av[i], std::ios::binary); + if (!in) { + fprintf(stderr, "failed to open %s\n", av[i]); + error = 1; + continue; + } + unsigned char magic[4] = {}; + in.read((char *)magic, 4); + if (magic[0] == 0 && magic[3] && strchr("RDBH", magic[3])) { + in.seekg(0); + error |= dump_bin(in); + } else if (magic[0] == 0x1f && magic[1] == 0x8b) { + if (auto *pipe = popen((std::string("zcat < ") + av[i]).c_str(), "r")) { + fdstream in(fileno(pipe)); + error |= dump_bin(in); + pclose(pipe); + } else { + fprintf(stderr, "%s: Cannot open pipe to read\n", av[i]); + } + } else { + fprintf(stderr, "%s: Unknown file format\n", av[i]); + } + } + } + if (error == 1) fprintf(stderr, "usage: %s \n", av[0]); + return error; +} diff --git a/backends/tofino/bf-asm/bflink b/backends/tofino/bf-asm/bflink new file mode 100755 index 00000000000..3195414ba5c --- /dev/null +++ b/backends/tofino/bf-asm/bflink @@ -0,0 +1,182 @@ +#!/bin/sh + +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 + +WALLE="" +CHIP="" +OUT="" +objs="" +object_files="" +base_program="" +debug_info=false +tmpdir="" +pipe_args="" +READLINK_COMMAND=$(which greadlink || which readlink) +execdir=$(dirname $($READLINK_COMMAND -f $0)) + +if [ x"$BFAS_OPTIONS" = x"-g" ]; then + debug_info=true +fi + +tempfile() { + file=$(basename $1 $2) + orig=file + ctr=1 + while [ -r $tmpdir/$file ]; do + file=$ctr-$file + ctr=$((ctr + 1)) + done + echo $file +} + +while [ $# -gt 0 ]; do + case $1 in + -b) + base_program="$2" + shift ; shift;; + -g) + debug_info=true + shift;; + -o) + OUT="$2" + shift ; shift ;; + --walle|-w) + WALLE="$2" + shift ; shift ;; + --target|-t) + CHIP="$2" + OUT="$2.bin" + shift ; shift ;; + --singlepipe|-s) + pipe_args="--top memories.pipe --top regs.pipe" + shift ;; + --allpipes|-a) + pipe_args="" + shift ;; + *.json.Z) + if [ -z "$tmpdir" ]; then + tmpdir=$(mktemp -d) + fi + file=$(tempfile $1 .Z) + gunzip -c $1 >$tmpdir/$file + objs="$objs $tmpdir/$file" + object_files="$object_files $1" + shift ;; + *.json.gz) + if [ -z "$tmpdir" ]; then + tmpdir=$(mktemp -d) + fi + file=$(tempfile $1 .gz) + gunzip -c $1 >$tmpdir/$file + objs="$objs $tmpdir/$file" + object_files="$object_files $1" + shift ;; + *.json.bz) + if [ -z "$tmpdir" ]; then + tmpdir=$(mktemp -d) + fi + file=$(tempfile $1 .bz) + bzcat $1 >$tmpdir/$file + objs="$objs $tmpdir/$file" + object_files="$object_files $1" + shift ;; + *.json.bz2) + if [ -z "$tmpdir" ]; then + tmpdir=$(mktemp -d) + fi + file=$(tempfile $1 .bz2) + bzcat $1 >$tmpdir/$file + objs="$objs $tmpdir/$file" + object_files="$object_files $1" + shift ;; + *.json) + objs="$objs $1" + object_files="$object_files $1" + shift ;; + *) + echo >&2 "Unknown argument $1" + shift ;; + esac +done + +if [ ! -x "$WALLE" ]; then + if [ -f $execdir/walle -a -x $execdir/walle ]; then + WALLE=$execdir/walle + elif [ -x $execdir/walle.py ]; then + WALLE=$execdir/walle.py + elif [ -x $execdir/walle/walle.py ]; then + WALLE=$execdir/walle/walle.py + elif [ -e "$WALLE" ]; then + echo "$WALLE must be executable" + exit 1 + else + echo "4: $WALLE" + echo >&2 "Can't find walle" + exit 1 + fi +fi + +if [ -z "$CHIP" ]; then + for jf in $objs; do + if [ $(basename $jf) = regs.top.cfg.json ]; then + CHIP=$(grep '"_type"' $jf | sed -e 's/.*"regs\.//' -e 's/[_"].*//') + break + fi + done + if [ -z "$CHIP" ]; then + echo >&2 "Can't find target, assuming tofino" + CHIP=tofino + fi + if [ -z "$OUT" ]; then + OUT=$CHIP.bin + fi +fi + +schema_arg="" +if [ -r $CHIP/chip.schema ]; then + schema_arg="--schema $CHIP/chip.schema" +elif [ -r $execdir/$CHIP/chip.schema ]; then + schema_arg="--schema $execdir/$CHIP/chip.schema" +fi + +#echo "$WALLE --target $CHIP $schema_arg -o $OUT $objs $pipe_args" +$WALLE --target $CHIP $schema_arg -o $OUT $objs $pipe_args +rc=$? + +# cleanup +output_dir=$(dirname $OUT) +if [ -z "$output_dir" ]; then + output_dir="./" +fi +if ! $debug_info; then + rm -f $object_files +fi +if [ ! -z "$base_program" ] ; then + pp=$output_dir/${base_program}.p4i + if ! $debug_info && test -e $pp ; then rm -f $pp; fi +fi +if ! $debug_info && test -e $output_dir/bfas.config.log ; then + rm -f $output_dir/bfas.config.log +fi +# if we uncompressed, remove the directory +if [ -d "$tmpdir" ]; then + rm -rf $tmpdir +fi + +# exit with a return code if walle failed +exit $rc diff --git a/backends/tofino/bf-asm/binary_output.h b/backends/tofino/bf-asm/binary_output.h new file mode 100644 index 00000000000..23d56608a8d --- /dev/null +++ b/backends/tofino/bf-asm/binary_output.h @@ -0,0 +1,72 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_BINARY_OUTPUT_H_ +#define BACKENDS_TOFINO_BF_ASM_BINARY_OUTPUT_H_ + +#include +#include + +namespace binout { + +class tag { + char data[4] = {0, 0, 0, 0}; + + public: + tag(char ch) { data[3] = ch; } // NOLINT(runtime/explicit) + friend std::ostream &operator<<(std::ostream &out, const tag &e) { + return out.write(e.data, 4); + } +}; + +class byte4 { + char data[4]; + + public: + byte4(uint32_t v) { // NOLINT(runtime/explicit) + data[0] = v & 0xff; + data[1] = (v >> 8) & 0xff; + data[2] = (v >> 16) & 0xff; + data[3] = (v >> 24) & 0xff; + } + friend std::ostream &operator<<(std::ostream &out, const byte4 &e) { + return out.write(e.data, 4); + } +}; + +class byte8 { + char data[8]; + + public: + byte8(uint64_t v) { // NOLINT(runtime/explicit) + data[0] = v & 0xff; + data[1] = (v >> 8) & 0xff; + data[2] = (v >> 16) & 0xff; + data[3] = (v >> 24) & 0xff; + data[4] = (v >> 32) & 0xff; + data[5] = (v >> 40) & 0xff; + data[6] = (v >> 48) & 0xff; + data[7] = (v >> 56) & 0xff; + } + friend std::ostream &operator<<(std::ostream &out, const byte8 &e) { + return out.write(e.data, 8); + } +}; + +} // end namespace binout + +#endif /* BACKENDS_TOFINO_BF_ASM_BINARY_OUTPUT_H_ */ diff --git a/backends/tofino/bf-asm/bson.cpp b/backends/tofino/bf-asm/bson.cpp new file mode 100644 index 00000000000..b6f0d8d261a --- /dev/null +++ b/backends/tofino/bf-asm/bson.cpp @@ -0,0 +1,320 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "bson.h" + +#include + +#include "lib/hex.h" + +namespace { +uint8_t get8(std::istream &in) { + char data; + in.read(&data, sizeof(data)); + return data & 0xffU; +} + +int32_t get32(std::istream &in) { + char data[4]; + in.read(data, sizeof(data)); + return (data[0] & 0xffU) | ((data[1] & 0xffU) << 8) | ((data[2] & 0xffU) << 16) | + ((data[3] & 0xffU) << 24); +} +int64_t get64(std::istream &in) { + char data[8]; + in.read(data, sizeof(data)); + return (data[0] & 0xffULL) | ((data[1] & 0xffULL) << 8) | ((data[2] & 0xffULL) << 16) | + ((data[3] & 0xffULL) << 24) | ((data[4] & 0xffULL) << 32) | ((data[5] & 0xffULL) << 40) | + ((data[6] & 0xffULL) << 48) | ((data[7] & 0xffULL) << 56); +} + +std::string out32(int32_t val) { + char data[4]; + data[0] = val & 0xff; + data[1] = (val >> 8) & 0xff; + data[2] = (val >> 16) & 0xff; + data[3] = (val >> 24) & 0xff; + return std::string(data, sizeof(data)); +} + +std::string out64(int64_t val) { + char data[8]; + data[0] = val & 0xff; + data[1] = (val >> 8) & 0xff; + data[2] = (val >> 16) & 0xff; + data[3] = (val >> 24) & 0xff; + data[4] = (val >> 32) & 0xff; + data[5] = (val >> 40) & 0xff; + data[6] = (val >> 48) & 0xff; + data[7] = (val >> 56) & 0xff; + return std::string(data, sizeof(data)); +} + +} // end anonymous namespace + +namespace json { + +std::istream &operator>>(std::istream &in, bson_wrap o) { + json::vector &out = o.o; + std::streamoff start = in.tellg(); + std::streamoff end = start + get32(in); + out.clear(); + while (uint8_t type = get8(in)) { + if (!in) break; + if (in.tellg() >= end) { + std::cerr << "truncated array" << std::endl; + in.setstate(std::ios::failbit); + break; + } + std::string key; + getline(in, key, '\0'); + if (key != std::to_string(out.size())) std::cerr << "incorrect key in array" << std::endl; + switch (type) { + case 0x02: { + uint32_t len = get32(in) - 1; + std::string val; + val.resize(len); + in.read(&val[0], len); + out.push_back(val.c_str()); + if (in.get() != 0) { + std::cerr << "missing NUL in bson string" << std::endl; + in.setstate(std::ios::failbit); + } + break; + } + case 0x03: { + json::map obj; + in >> binary(obj); + out.push_back(std::move(obj)); + break; + } + case 0x04: { + json::vector obj; + in >> binary(obj); + out.push_back(std::move(obj)); + break; + } + case 0x08: + switch (get8(in)) { + case 0: + out.push_back(false); + break; + case 1: + out.push_back(true); + break; + default: + std::cerr << "invalid boolean value" << std::endl; + in.setstate(std::ios::failbit); + break; + } + break; + case 0x0a: + out.push_back(nullptr); + break; + case 0x10: + out.push_back(get32(in)); + break; + case 0x12: + out.push_back(get64(in)); + break; + case 0x7f: + case 0xff: + break; + default: + std::cerr << "unhandled bson tag " << hex(type) << std::endl; + break; + } + } + if (start != -1 && in && in.tellg() != end) { + std::cerr << "incorrect length for object" << std::endl; + } + return in; +} + +std::istream &operator>>(std::istream &in, bson_wrap o) { + json::map &out = o.o; + std::streamoff start = in.tellg(); + std::streamoff end = start + get32(in); + out.clear(); + while (uint8_t type = get8(in)) { + if (!in) break; + if (in.tellg() >= end) { + std::cerr << "truncated object" << std::endl; + in.setstate(std::ios::failbit); + break; + } + std::string key; + getline(in, key, '\0'); + if (out.count(key.c_str())) std::cerr << "duplicate key in map" << std::endl; + switch (type) { + case 0x02: { + uint32_t len = get32(in) - 1; + std::string val; + val.resize(len); + in.read(&val[0], len); + out[key] = val; + if (in.get() != 0) { + std::cerr << "missing NUL in bson string" << std::endl; + in.setstate(std::ios::failbit); + } + break; + } + case 0x03: { + json::map obj; + in >> binary(obj); + out[key] = mkuniq(std::move(obj)); + break; + } + case 0x04: { + json::vector obj; + in >> binary(obj); + out[key] = mkuniq(std::move(obj)); + break; + } + case 0x08: + switch (get8(in)) { + case 0: + out[key] = mkuniq(False()); + break; + case 1: + out[key] = mkuniq(True()); + break; + default: + std::cerr << "invalid boolean value" << std::endl; + in.setstate(std::ios::failbit); + break; + } + break; + case 0x0a: + out[key] = std::unique_ptr(); + break; + case 0x10: + out[key] = get32(in); + break; + case 0x12: + out[key] = get64(in); + break; + case 0x7f: + case 0xff: + break; + default: + std::cerr << "unhandled bson tag " << hex(type) << std::endl; + break; + } + } + if (start != -1 && in && in.tellg() != end) { + std::cerr << "incorrect length for object" << std::endl; + } + return in; +} + +static std::unique_ptr map_is_vector(json::map &m) { + int idx = 0; + for (auto &el : m) { + if (*el.first != std::to_string(idx).c_str()) return nullptr; + ++idx; + } + if (idx == 0) return nullptr; + auto rv = mkuniq(); + for (auto &el : m) rv->push_back(std::move(el.second)); + // return std::move(rv); + return rv; +} + +std::istream &operator>>(std::istream &in, bson_wrap> json) { + json::map rv; + in >> binary(rv); + if (auto asvec = map_is_vector(rv)) + json.o = std::move(asvec); + else + json.o = mkuniq(std::move(rv)); + return in; +} + +std::string bson_encode(const json::vector &v); +std::string bson_encode(const json::map &m); + +std::string bson_encode_element(const std::string &key, const json::obj *o) { + if (!o) return '\x0A' + key + '\0'; + if (o->is()) return '\x08' + key + '\0' + '\1'; + if (o->is()) return '\x08' + key + '\0' + '\0'; + if (o->is()) { + auto &n = o->to(); + if (static_cast(n.val) == n.val) + return '\x10' + key + '\0' + out32(n.val); + else + return '\x12' + key + '\0' + out64(n.val); + } + if (o->is()) { + auto &s = o->to(); + return '\x02' + key + '\0' + out32(s.size() + 1) + s + '\0'; + } + if (o->is()) { + auto doc = bson_encode(o->to()); + return '\x04' + key + '\0' + out32(doc.size() + 4) + doc; + } + if (o->is()) { + auto doc = bson_encode(o->to()); + return '\x03' + key + '\0' + out32(doc.size() + 4) + doc; + } + assert(0); + return ""; // quiet warning +} + +std::string bson_encode(const json::vector &v) { + std::string rv; + int idx = 0; + for (auto &el : v) { + rv += bson_encode_element(std::to_string(idx), el.get()); + ++idx; + } + rv += '\0'; + return rv; +} +std::string bson_encode(const json::map &m) { + std::string rv; + for (auto &el : m) { + if (auto key = el.first->as_string()) + rv += bson_encode_element(*key, el.second.get()); + else + std::cerr << "Can't encode non-string key in bson object" << std::endl; + } + rv += '\0'; + return rv; +} + +std::ostream &operator<<(std::ostream &out, bson_wrap v) { + auto data = bson_encode(v.o); + out.write(out32(data.size() + 4).c_str(), 4); + out.write(data.data(), data.size()); + return out; +} +std::ostream &operator<<(std::ostream &out, bson_wrap m) { + auto data = bson_encode(m.o); + out.write(out32(data.size() + 4).c_str(), 4); + out.write(data.data(), data.size()); + return out; +} + +std::ostream &operator<<(std::ostream &out, bson_wrap json) { + if (auto m = json.o.as_map()) return out << binary(*m); + if (auto v = json.o.as_vector()) return out << binary(*v); + std::cerr << "object not map or vector can't be output as bson" << std::endl; + return out; +} + +} // end namespace json diff --git a/backends/tofino/bf-asm/bson.h b/backends/tofino/bf-asm/bson.h new file mode 100644 index 00000000000..d193123c322 --- /dev/null +++ b/backends/tofino/bf-asm/bson.h @@ -0,0 +1,74 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_BSON_H_ +#define BACKENDS_TOFINO_BF_ASM_BSON_H_ + +#include + +#include "backends/tofino/bf-asm/json.h" + +namespace json { + +template +struct bson_wrap { + T &o; + bson_wrap(T &o) : o(o) {} // NOLINT(runtime/explicit) + template + bson_wrap(U &o) : o(o) {} // NOLINT(runtime/explicit) +}; + +template +bson_wrap binary(T &o) { + return bson_wrap(o); +} + +std::istream &operator>>(std::istream &in, bson_wrap> json); +std::istream &operator>>(std::istream &in, bson_wrap json); +std::istream &operator>>(std::istream &in, bson_wrap json); +inline std::istream &operator>>(std::istream &in, bson_wrap json) { + std::unique_ptr p; + in >> binary(p); + if (in) json.o = p.release(); + return in; +} + +std::ostream &operator<<(std::ostream &out, bson_wrap); +std::ostream &operator<<(std::ostream &out, bson_wrap); +std::ostream &operator<<(std::ostream &out, bson_wrap json); +inline std::ostream &operator<<(std::ostream &out, bson_wrap json) { + return operator<<(out, bson_wrap(json.o)); +} +inline std::ostream &operator<<(std::ostream &out, bson_wrap json) { + return operator<<(out, bson_wrap(json.o)); +} +inline std::ostream &operator<<(std::ostream &out, bson_wrap json) { + return operator<<(out, bson_wrap(json.o)); +} +inline std::ostream &operator<<(std::ostream &out, bson_wrap json) { + return out << binary(*json.o); +} +inline std::ostream &operator<<(std::ostream &out, bson_wrap json) { + return out << binary(*json.o); +} +inline std::ostream &operator<<(std::ostream &out, bson_wrap> json) { + return out << binary(*json.o.get()); +} + +} // end namespace json + +#endif /* BACKENDS_TOFINO_BF_ASM_BSON_H_ */ diff --git a/backends/tofino/bf-asm/checked_array.h b/backends/tofino/bf-asm/checked_array.h new file mode 100644 index 00000000000..0c047fd0474 --- /dev/null +++ b/backends/tofino/bf-asm/checked_array.h @@ -0,0 +1,146 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_CHECKED_ARRAY_H_ +#define BACKENDS_TOFINO_BF_ASM_CHECKED_ARRAY_H_ + +#include + +#include "bfas.h" // to get at the options +#include "lib/log.h" + +void print_regname(std::ostream &out, const void *addr, const void *end); + +template +class checked_array; +template +std::ostream &operator<<(std::ostream &out, checked_array *arr); + +template +class checked_array_base { + public: + virtual T &operator[](size_t) = 0; + virtual const T &operator[](size_t) const = 0; + virtual size_t size() const = 0; + virtual T *begin() = 0; + virtual T *end() = 0; + virtual bool modified() const = 0; + virtual void set_modified(bool v = true) = 0; + virtual bool disabled() const = 0; + virtual bool disable() = 0; + virtual bool disable_if_zero() = 0; + virtual void enable() = 0; +}; + +template +class checked_array : public checked_array_base { + bool disabled_; + T data[S]; + + public: + checked_array() : disabled_(false) {} + template + explicit checked_array(U v) : disabled_(false) { + for (auto &e : data) new (&e) T(v); + } + template + checked_array(const std::initializer_list &v) : disabled_(false) { + auto it = v.begin(); + for (auto &e : data) { + if (it == v.end()) break; + new (&e) T(*it++); + } + } + T &operator[](size_t idx) { + if (idx >= S) { + LOG1("ERROR: array index " << idx << " out of bounds " << this); + BUG("array index %zu out of bounds (%zu)", idx, S); + } + return data[idx]; + } + const T &operator[](size_t idx) const { + if (idx >= S) { + LOG1("ERROR: array index " << idx << " out of bounds " << this); + BUG("array index %zu out of bounds (%zu)", idx, S); + } + return data[idx]; + } + size_t size() const { return S; } + T *begin() { return data; } + T *end() { return data + S; } + bool modified() const { + for (size_t i = 0; i < S; i++) + if (data[i].modified()) return true; + return false; + } + void set_modified(bool v = true) { + for (size_t i = 0; i < S; i++) data[i].set_modified(v); + } + bool disabled() const { return disabled_; } + bool disable() { + bool rv = true; + for (size_t i = 0; i < S; i++) + if (!data[i].disable()) rv = false; + if (rv) disabled_ = true; + return rv; + } + void enable() { + disabled_ = false; + for (size_t i = 0; i < S; i++) data[i].enable(); + } + bool disable_if_unmodified() { + bool rv = true; + for (size_t i = 0; i < S; i++) + if (!data[i].disable_if_unmodified()) rv = false; + if (rv && !options.gen_json) { + /* Can't actually disable arrays when generating json, as walle doesn't like it, + * but allow containing object to be disabled */ + disabled_ = true; + } + return rv; + } + bool disable_if_zero() { + bool rv = true; + for (size_t i = 0; i < S; i++) + if (!data[i].disable_if_zero()) rv = false; + if (rv && !options.gen_json) { + /* Can't actually disable arrays when generating json, as walle doesn't like it, + * but allow containing object to be disabled */ + disabled_ = true; + } + return rv; + } + bool disable_if_reset_value() { + bool rv = true; + for (size_t i = 0; i < S; i++) + if (!data[i].disable_if_reset_value()) rv = false; + if (rv && !options.gen_json) { + /* Can't actually disable arrays when generating json, as walle doesn't like it, + * but allow containing object to be disabled */ + disabled_ = true; + } + return rv; + } +}; + +template +inline std::ostream &operator<<(std::ostream &out, checked_array *arr) { + print_regname(out, arr, arr + 1); + return out; +} + +#endif /* BACKENDS_TOFINO_BF_ASM_CHECKED_ARRAY_H_ */ diff --git a/backends/tofino/bf-asm/cmake/config.h.cmake b/backends/tofino/bf-asm/cmake/config.h.cmake new file mode 100644 index 00000000000..3e8ae79d73f --- /dev/null +++ b/backends/tofino/bf-asm/cmake/config.h.cmake @@ -0,0 +1,17 @@ +#ifndef __BFASM_CONFIG_H__ +#define __BFASM_CONFIG_H__ + +/* Define to 1 if you have the execinfo.h header */ +#cmakedefine HAVE_EXECINFO_H @HAVE_EXECINFO_H@ + +/* Define to 1 if you have the ucontext.h header */ +#cmakedefine HAVE_UCONTEXT_H @HAVE_UCONTEXT_H@ + +/* Schema version */ +#cmakedefine CONTEXT_SCHEMA_VERSION "@CONTEXT_SCHEMA_VERSION@" + +/* define the version */ +#cmakedefine TFAS_VERSION "@BFN_P4C_VERSION@" + + +#endif // __BFASM_CONFIG_H__ diff --git a/backends/tofino/bf-asm/constants.h b/backends/tofino/bf-asm/constants.h new file mode 100644 index 00000000000..374ef837287 --- /dev/null +++ b/backends/tofino/bf-asm/constants.h @@ -0,0 +1,242 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef CONSTANTS_H_ +#define CONSTANTS_H_ + +enum { + /* global constants related to MAU stage */ + LOGICAL_TABLES_PER_STAGE = 16, + PHYSICAL_TABLES_PER_STAGE = 16, + TCAM_TABLES_PER_STAGE = 8, + SRAM_ROWS = 8, + LOGICAL_SRAM_ROWS = 16, + SRAM_UNITS_PER_ROW = 12, + MAPRAM_UNITS_PER_ROW = 6, + MEM_WORD_WIDTH = 128, + SRAM_DEPTH_BITS = 10, + SRAM_DEPTH = 1 << SRAM_DEPTH_BITS, + LAMB_DEPTH_BITS = 6, + LAMB_DEPTH = 1 << LAMB_DEPTH_BITS, + TCAM_ROWS = 12, + TCAM_UNITS_PER_ROW = 2, + TCAM_XBAR_GROUPS = 12, + TCAM_XBAR_GROUP_SIZE = 44, + TCAM_XBAR_INPUT_BYTES = 68, + TCAM_VPN_BITS = 6, + TCAM_WORD_BITS = 9, + TCAM_FORMAT_WIDTH = 47, + TCAM_PAYLOAD_BITS = 1, + TCAM_PAYLOAD_BITS_START = 0, + TCAM_MATCH_BITS_START = TCAM_PAYLOAD_BITS_START + TCAM_PAYLOAD_BITS, + TCAM_PARITY_BITS = 2, + TCAM_PARITY_BITS_START = 45, + TCAM_VERSION_BITS = 2, + TCAM_VERSION_BITS_START = 43, + EXACT_XBAR_GROUPS = 8, + EXACT_XBAR_GROUP_SIZE = 128, + BYTE_XBAR_GROUPS = 8, + BYTE_XBAR_GROUP_SIZE = 8, + GALOIS_FIELD_MATRIX_COLUMNS = 52, + EXACT_HASH_GROUP_SIZE = 52, + EXACT_HASH_ADR_BITS = 10, + EXACT_HASH_ADR_GROUPS = 5, + EXACT_HASH_SELECT_BITS = 12, + EXACT_HASH_FIRST_SELECT_BIT = EXACT_HASH_GROUP_SIZE - EXACT_HASH_SELECT_BITS, + EXACT_VPN_BITS = 9, + EXACT_WORD_BITS = 10, + NEXT_TABLE_MAX_RAM_EXTRACT_BITS = 8, + MAX_LONGBRANCH_TAGS = 8, + MAX_IMMED_ACTION_DATA = 32, + ACTION_DATA_8B_SLOTS = 16, + ACTION_DATA_16B_SLOTS = 24, + ACTION_DATA_32B_SLOTS = 16, + ACTION_DATA_BUS_SLOTS = ACTION_DATA_8B_SLOTS + ACTION_DATA_16B_SLOTS + ACTION_DATA_32B_SLOTS, + ACTION_DATA_BUS_BYTES = + ACTION_DATA_8B_SLOTS + 2 * ACTION_DATA_16B_SLOTS + 4 * ACTION_DATA_32B_SLOTS, + ACTION_HV_XBAR_SLICES = 8, + ACTION_HV_XBAR_SLICE_SIZE = 16, + ACTION_INSTRUCTION_SUCCESSOR_TABLE_DEPTH = 8, + ACTION_INSTRUCTION_ADR_ENABLE = 0x40, + ACTION_IMEM_SLOTS = 32, + ACTION_IMEM_COLORS = 2, + ACTION_IMEM_ADDR_MAX = ACTION_IMEM_SLOTS * ACTION_IMEM_COLORS, + ACTION_ALWAYS_RUN_IMEM_ADDR = 63, + SELECTOR_PORTS_PER_WORD = 120, + STATEFUL_PREDICATION_ENCODE_NOOP = 0, + STATEFUL_PREDICATION_ENCODE_NOTCMPHI = 3, + STATEFUL_PREDICATION_ENCODE_NOTCMPLO = 5, + STATEFUL_PREDICATION_ENCODE_CMPLO = 0xaaaa, + STATEFUL_PREDICATION_ENCODE_CMPHI = 0xcccc, + STATEFUL_PREDICATION_ENCODE_CMP0 = 0xaaaa, + STATEFUL_PREDICATION_ENCODE_CMP1 = 0xcccc, + STATEFUL_PREDICATION_ENCODE_CMP2 = 0xf0f0, + STATEFUL_PREDICATION_ENCODE_CMP3 = 0xff00, + STATEFUL_PREDICATION_ENCODE_UNCOND = 0xffff, + STATEFUL_PREDICATION_OUTPUT = 6, + // See bf-drivers/include/pipe_mgr/pipe_mgr_intf.h for the definitions + TYPE_ENUM_SHIFT = 24, + PIPE_ID_SHIFT = 28, + REGISTER_PARAM_HANDLE_START = (0x08 << TYPE_ENUM_SHIFT), + ACTION_HANDLE_START = (0x20 << TYPE_ENUM_SHIFT), + FIELD_HANDLE_START = (0x9 << TYPE_ENUM_SHIFT), + PER_FLOW_ENABLE_BITS = 1, + METER_TYPE_BITS = 3, + // Order is METER_TYPE, METER_PFE, METER_ADDRESS + METER_TYPE_START_BIT = 24, + METER_LOWER_HUFFMAN_BITS = 7, + METER_ADDRESS_BITS = 23, + METER_FULL_ADDRESS_BITS = METER_ADDRESS_BITS + PER_FLOW_ENABLE_BITS + METER_TYPE_BITS, + METER_ADDRESS_ZERO_PAD = 23, + METER_PER_FLOW_ENABLE_START_BIT = 23, + IDLETIME_BUSSES = 20, + IDLETIME_BUSSES_PER_HALF = IDLETIME_BUSSES / 2, + IDLETIME_ADDRESS_PER_FLOW_ENABLE_START_BIT = 20, + IDLETIME_ADDRESS_BITS = 20, + IDLETIME_FULL_ADDRESS_BITS = IDLETIME_ADDRESS_BITS + PER_FLOW_ENABLE_BITS, + IDLETIME_ADDRESS_ZERO_PAD = 4, + IDLETIME_HUFFMAN_BITS = 4, + SELECTOR_METER_TYPE_START_BIT = METER_TYPE_START_BIT, + SELECTOR_LOWER_HUFFMAN_BITS = METER_LOWER_HUFFMAN_BITS, + SELECTOR_METER_ADDRESS_BITS = METER_ADDRESS_BITS, + SELECTOR_PER_FLOW_ENABLE_START_BIT = METER_PER_FLOW_ENABLE_START_BIT, + SELECTOR_VHXBAR_HASH_BUS_INDEX = 3, + SELECTOR_LENGTH_MOD_BITS = 5, + STAT_ADDRESS_BITS = 19, + STAT_FULL_ADDRESS_BITS = STAT_ADDRESS_BITS + PER_FLOW_ENABLE_BITS, + STAT_ADDRESS_ZERO_PAD = 7, + STAT_METER_COLOR_LOWER_HUFFMAN_BITS = 3, + STATISTICS_PER_FLOW_ENABLE_START_BIT = 19, + STATISTICS_PER_FLOW_SHIFT_COUNT = 7, + ACTION_ADDRESS_ZERO_PAD = 5, + ACTION_ADDRESS_BITS = 22, + ACTION_FULL_ADDRESS_BITS = 23, + ACTION_DATA_PER_FLOW_ENABLE_START_BIT = ACTION_ADDRESS_BITS, + ACTION_DATA_LOWER_HUFFMAN_BITS = 5, + ACTION_DATA_UPPER_HUFFMAN_BITS = 2, + ACTION_DATA_HUFFMAN_BITS = ACTION_DATA_LOWER_HUFFMAN_BITS + ACTION_DATA_UPPER_HUFFMAN_BITS, + ACTION_DATA_HUFFMAN_DIFFERENCE = 10, + MAX_PORTS = 288, + MAX_LRT_ENTRIES = 3, + UPPER_MATCH_CENTRAL_FIRST_ROW = SRAM_ROWS / 2, + UPPER_MATCH_CENTRAL_FIRST_LOGICAL_ROW = UPPER_MATCH_CENTRAL_FIRST_ROW * 2, + CHECKSUM_ENGINE_PHVID_TOFINO_LOW = 224, + CHECKSUM_ENGINE_PHVID_TOFINO_HIGH = 235, + CHECKSUM_ENGINE_PHVID_TOFINO_PER_GRESS = 6, + CONSTANTS_PHVID_JBAY_LOW = 224, + CONSTANTS_PHVID_JBAY_HIGH = 232, +}; + +enum METER_ACCESS_TYPE { + NOP = 0, + METER_LPF_COLOR_BLIND = 2, + METER_SELECTOR = 4, + METER_COLOR_AWARE = 6, + STATEFUL_INSTRUCTION_0 = 1, + STATEFUL_INSTRUCTION_1 = 3, + STATEFUL_INSTRUCTION_2 = 5, + STATEFUL_INSTRUCTION_3 = 7, + METER_COLOR_ACCESS = -1 // special for color mapram access +}; + +/* constants for various config params */ +#include +#undef OVERFLOW /* get rid of global preproc define from math.h */ +namespace UnitRam { +enum { + MATCH = 1, + ACTION = 2, + STATISTICS = 3, + METER = 4, + STATEFUL = 5, + TERNARY_INDIRECTION = 6, + SELECTOR = 7, + HASH_ACTION = 8, +}; +namespace DataMux { +enum { + STATISTICS = 0, + METER = 1, + OVERFLOW = 2, + OVERFLOW2 = 3, + ACTION = 4, + NONE = 7, +}; +} // namespace DataMux +namespace AdrMux { +enum { + ACTION = 1, + TERNARY_INDIRECTION = 2, + OVERFLOW = 4, + STATS_METERS = 5, + SELECTOR_ALU = 6, + SELECTOR_OVERFLOW = 7, + SELECTOR_ACTION_OVERFLOW = 8, +}; +} // namespace AdrMux +} // namespace UnitRam +namespace AdrDist { +enum { + ACTION = 0, + STATISTICS = 1, + METER = 2, + OVERFLOW = 3, +}; +} // namespace AdrDist +namespace MapRam { +enum { + STATISTICS = 1, + METER = 2, + STATEFUL = 3, + IDLETIME = 4, + COLOR = 5, + SELECTOR_SIZE = 6, +}; +namespace Mux { +enum { + SYSTEM = 0, + SYNTHETIC_TWO_PORT = 1, + IDLETIME = 2, + COLOR = 3, +}; +} // namespace Mux +namespace ColorBus { +enum { + NONE = 0, + COLOR = 1, + OVERFLOW = 2, + OVERFLOW_2 = 3, +}; +} // namespace ColorBus +} // namespace MapRam +namespace BusHashGroup { +enum { + SELECTOR_MOD = 0, + METER_ADDRESS = 1, + STATISTICS_ADDRESS = 2, + ACTION_DATA_ADDRESS = 3, + IMMEDIATE_DATA = 4, +}; +} // namespace BusHashGroup +namespace MoveReg { +enum { + STATS = 0, + METER = 1, + IDLE = 2, +}; +} // namespace MoveReg +#endif /* CONSTANTS_H_ */ diff --git a/backends/tofino/bf-asm/counter.cpp b/backends/tofino/bf-asm/counter.cpp new file mode 100644 index 00000000000..04c497f990c --- /dev/null +++ b/backends/tofino/bf-asm/counter.cpp @@ -0,0 +1,404 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "data_switchbox.h" +#include "input_xbar.h" +#include "lib/algorithm.h" +#include "misc.h" + +// target specific template specializations +#include "jbay/counter.h" +#include "tofino/counter.h" + +void CounterTable::setup(VECTOR(pair_t) & data) { + common_init_setup(data, false, P4Table::Statistics); + if (!format) error(lineno, "No format specified in table %s", name()); + for (auto &kv : MapIterChecked(data, true)) { + if (common_setup(kv, data, P4Table::Statistics)) { + } else if (kv.key == "count") { + if (kv.value == "bytes") + type = BYTES; + else if (kv.value == "packets") + type = PACKETS; + else if (kv.value == "both" || kv.value == "packets_and_bytes") + type = BOTH; + else + error(kv.value.lineno, "Unknown counter type %s", value_desc(kv.value)); + } else if (kv.key == "teop") { + if (gress != EGRESS) error(kv.value.lineno, "tEOP can only be used in EGRESS"); + if (!Target::SUPPORT_TRUE_EOP()) + error(kv.value.lineno, "tEOP is not available on device"); + if (CHECKTYPE(kv.value, tINT)) { + teop = kv.value.i; + if (teop < 0 || teop > 3) + error(kv.value.lineno, "Invalid tEOP bus %d, valid values are 0-3", teop); + BUG_CHECK(!stage->teop[teop].first, + "previously used tEOP bus %d used again in stage %d", teop, + stage->stageno); + stage->teop[teop] = {true, stage->stageno}; + } + } else if (kv.key == "lrt") { + if (!CHECKTYPE2(kv.value, tVEC, tMAP)) continue; + collapse_list_of_maps(kv.value, true); + if (kv.value.type == tVEC) { + for (auto &el : kv.value.vec) lrt.emplace_back(el); + } else if (kv.value.map.size >= 1 && kv.value.map[0].key.type == tSTR) { + lrt.emplace_back(kv.value); + } else { + for (auto &el : kv.value.map) { + if (CHECKTYPE2(el.key, tINT, tBIGINT) && CHECKTYPE(el.value, tINT)) { + lrt.emplace_back(el.key.lineno, + get_int64(el.key, 64, "Threshold too large"), el.value.i); + } + } + } + } else if (kv.key == "bytecount_adjust") { + if (CHECKTYPE(kv.value, tINT)) { + bytecount_adjust = kv.value.i; + } + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } + if (teop >= 0 && type != BYTES && type != BOTH) + error(lineno, "tEOP bus can only used when counting bytes"); + if (Target::SRAM_GLOBAL_ACCESS()) + alloc_global_srams(); + else + alloc_rams(true, stage->sram_use); +} + +CounterTable::lrt_params::lrt_params(const value_t &m) + : lineno(m.lineno), threshold(-1), interval(-1) { + if (CHECKTYPE(m, tMAP)) { + for (auto &kv : MapIterChecked(m.map, true)) { + if (kv.key == "threshold") { + if (CHECKTYPE2(kv.value, tINT, tBIGINT)) + threshold = get_int64(kv.value, 64, "Threshold too large"); + } else if (kv.key == "interval") { + if (CHECKTYPE(kv.value, tINT)) interval = kv.value.i; + } else { + warning(kv.key.lineno, "ignoring unknown item %s in lrt params", + value_desc(kv.key)); + } + } + if (threshold < 0) error(m.lineno, "No threshold in lrt params"); + if (interval < 0) error(m.lineno, "No interval in lrt params"); + } +} + +void CounterTable::pass1() { + LOG1("### Counter table " << name() << " pass1 " << loc()); + if (!p4_table) + p4_table = P4Table::alloc(P4Table::Statistics, this); + else + p4_table->check(this); + alloc_vpns(); + alloc_maprams(); + std::sort(layout.begin(), layout.end(), + [](const Layout &a, const Layout &b) -> bool { return a.row > b.row; }); + // stage->table_use[timing_thread(gress)] |= Stage::USE_SELECTOR; + int prev_row = -1; + for (auto &row : layout) { + if (home_rows.count(row.row)) prev_row = -1; + + if (prev_row >= 0) + need_bus(lineno, stage->overflow_bus_use, row.row, "Overflow"); + else + need_bus(lineno, stage->stats_bus_use, row.row, "Statistics data"); + for (int r = (row.row + 1) | 1; r < prev_row; r += 2) + need_bus(lineno, stage->overflow_bus_use, r, "Overflow"); + prev_row = row.row; + } + Synth2Port::pass1(); + int update_interval_bits = 29; + // Tofino didn't have enough bits to cover all possible values of + // the update interval. The compiler should have saturated it to + // the max value. Check that has been done here. + if (options.target == TOFINO) update_interval_bits = 28; + for (auto &l : lrt) { + if (l.interval >= (1 << update_interval_bits)) + error(l.lineno, "lrt update interval too large"); + } + if (lrt.size() > MAX_LRT_ENTRIES) + error(lrt[0].lineno, "Too many lrt entries (max %d)", MAX_LRT_ENTRIES); +} + +void CounterTable::pass2() { + LOG1("### Counter table " << name() << " pass2 " << loc()); + if (logical_id < 0) warning(lineno, "counter %s appears unused by any table", name()); +} + +void CounterTable::pass3() { LOG1("### Counter table " << name() << " pass3 " << loc()); } + +static int counter_size[] = {0, 0, 1, 2, 3, 0, 4}; +static int counter_masks[] = {0, 7, 3, 4, 1, 0, 0}; +static int counter_shifts[] = {0, 3, 2, 3, 1, 0, 2}; +static int counter_hole_swizzle[] = {0, 0, 0, 1, 0, 0, 2}; + +int CounterTable::direct_shiftcount() const { + return 64 + STAT_ADDRESS_ZERO_PAD - counter_shifts[format->groups()]; +} + +int CounterTable::indirect_shiftcount() const { + return STAT_ADDRESS_ZERO_PAD - counter_shifts[format->groups()]; +} + +int CounterTable::address_shift() const { return counter_shifts[format->groups()]; } + +unsigned CounterTable::determine_shiftcount(Table::Call &call, int group, unsigned word, + int tcam_shift) const { + if (call.args[0].name() && strcmp(call.args[0].name(), "$DIRECT") == 0) { + return direct_shiftcount() + tcam_shift; + } else if (call.args[0].field()) { + BUG_CHECK(unsigned(call.args[0].field()->by_group[group]->bit(0) / 128) == word); + return call.args[0].field()->by_group[group]->bit(0) % 128 + indirect_shiftcount(); + } else if (call.args[1].field()) { + return call.args[1].field()->by_group[group]->bit(0) % 128 + STAT_ADDRESS_ZERO_PAD; + } + return 0; +} + +template +void CounterTable::write_merge_regs_vt(REGS ®s, MatchTable *match, int type, int bus, + const std::vector &args) { + auto &merge = regs.rams.match.merge; + unsigned adr_mask = 0; + unsigned per_entry_en_mux_ctl = 0; + unsigned adr_default = 0; + + if (args[0].type == Table::Call::Arg::Name && args[0].name() != nullptr && + strcmp(args[0].name(), "$DIRECT") == 0) { + adr_mask |= ((1U << STAT_ADDRESS_BITS) - 1) & ~counter_masks[format->groups()]; + } else if (args[0].type == Table::Call::Arg::Field && args[0].field() != nullptr) { + auto addr = args[0].field(); + auto address_bits = addr->size; + adr_mask |= ((1U << address_bits) - 1) << (counter_shifts[format->groups()]); + } + + if (args[1].type == Table::Call::Arg::Name && args[1].name() != nullptr && + strcmp(args[1].name(), "$DEFAULT") == 0) { + adr_default = (1U << STATISTICS_PER_FLOW_ENABLE_START_BIT); + } else if (args[1].type == Table::Call::Arg::Field) { + if (args[0].type == Table::Call::Arg::Field) { + per_entry_en_mux_ctl = args[1].field()->bit(0) - args[0].field()->bit(0); + per_entry_en_mux_ctl += counter_shifts[format->groups()]; + } else if (args[0].type == Table::Call::Arg::HashDist) { + per_entry_en_mux_ctl = 0; + } + } + + merge.mau_stats_adr_mask[type][bus] = adr_mask; + merge.mau_stats_adr_default[type][bus] = adr_default; + merge.mau_stats_adr_per_entry_en_mux_ctl[type][bus] = per_entry_en_mux_ctl; + merge.mau_stats_adr_hole_swizzle_mode[type][bus] = counter_hole_swizzle[format->groups()]; +} + +template +void CounterTable::write_regs_vt(REGS ®s) { + LOG1("### Counter table " << name() << " write_regs " << loc()); + // FIXME -- factor common AttachedTable::write_regs + // FIXME -- factor common Synth2Port::write_regs + // FIXME -- factor common MeterTable::write_regs + Layout *home = nullptr; + bool push_on_overflow = false; + auto &map_alu = regs.rams.map_alu; + auto &adrdist = regs.rams.match.adrdist; + DataSwitchboxSetup *swbox = nullptr; + std::vector stats_groups; + int minvpn, maxvpn; + + layout_vpn_bounds(minvpn, maxvpn, true); + for (Layout &logical_row : layout) { + unsigned row = logical_row.row / 2U; + unsigned side = logical_row.row & 1; /* 0 == left 1 == right */ + BUG_CHECK(side == 1); /* no map rams or alus on left side anymore */ + /* FIXME factor vpn/mapram stuff with selection.cpp */ + auto vpn = logical_row.vpns.begin(); + auto mapram = logical_row.maprams.begin(); + auto &map_alu_row = map_alu.row[row]; + auto home_it = home_rows.find(logical_row.row); + if (home_it != home_rows.end()) { + home = &logical_row; + swbox = new DataSwitchboxSetup(regs, this, logical_row.row, + (++home_it == home_rows.end()) ? -1 : *home_it); + + stats_groups.push_back(swbox->get_home_row() / 2); + + if (swbox->get_home_row() != row) swbox->setup_row(swbox->get_home_row()); + } + BUG_CHECK(home != nullptr); + LOG2("# DataSwitchbox.setup(" << row << ") home=" << home->row / 2U); + swbox->setup_row(row); + for (auto &memunit : logical_row.memunits) { + int logical_col = memunit.col; + unsigned col = logical_col + 6 * side; + swbox->setup_row_col(row, col, *vpn); + write_mapram_regs(regs, row, *mapram, *vpn, MapRam::STATISTICS); + if (gress) regs.cfg_regs.mau_cfg_uram_thread[col / 4U] |= 1U << (col % 4U * 8U + row); + ++mapram, ++vpn; + } + if (&logical_row == home) { + int stats_group_index = swbox->get_home_row() / 2; + auto &stats = map_alu.stats_wrap[stats_group_index].stats; + auto &stat_ctl = stats.statistics_ctl; + stat_ctl.stats_entries_per_word = format->groups(); + if (type & BYTES) stat_ctl.stats_process_bytes = 1; + if (type & PACKETS) stat_ctl.stats_process_packets = 1; + // The configuration values for threshold and interval are passed + // in directly to the assembler. Any adjustment required based + // on the counter type has already been done. + if (lrt.size() > 0) { + stat_ctl.lrt_enable = 1; + int idx = 0; + for (auto &l : lrt) { + stats.lrt_threshold[idx] = l.threshold; + stats.lrt_update_interval[idx] = l.interval; + ++idx; + } + } + stat_ctl.stats_alu_egress = timing_thread(gress); + if (type == BYTES || type == BOTH) { + auto stats_bytecount_adjust_size = stat_ctl.stats_bytecount_adjust.size(); + auto stats_bytecount_adjust_mask = ((1U << stats_bytecount_adjust_size) - 1); + int bytecount_adjust_max = (1U << (stats_bytecount_adjust_size - 1)) - 1; + int bytecount_adjust_min = -1 * (1U << (stats_bytecount_adjust_size - 1)); + if (bytecount_adjust > bytecount_adjust_max || + bytecount_adjust < bytecount_adjust_min) { + error(lineno, + "The bytecount adjust value of %d on counter %s " + "does not fit within allowed range for %d bits - { %d, %d }", + bytecount_adjust, name(), stats_bytecount_adjust_size, + bytecount_adjust_min, bytecount_adjust_max); + } + stat_ctl.stats_bytecount_adjust = bytecount_adjust & stats_bytecount_adjust_mask; + } + stat_ctl.stats_alu_error_enable = 0; // TODO + if (logical_id >= 0) regs.cfg_regs.mau_cfg_stats_alu_lt[stats_group_index] = logical_id; + // setup_muxctl(adrdist.stats_alu_phys_to_logical_ixbar_ctl[row/2], logical_id); + map_alu_row.i2portctl.synth2port_vpn_ctl.synth2port_vpn_base = minvpn; + map_alu_row.i2portctl.synth2port_vpn_ctl.synth2port_vpn_limit = maxvpn; + } else { + auto &adr_ctl = map_alu_row.vh_xbars.adr_dist_oflo_adr_xbar_ctl[side]; + if (swbox->get_home_row_logical() >= 8 && logical_row.row < 8) { + adr_ctl.adr_dist_oflo_adr_xbar_source_index = 0; + adr_ctl.adr_dist_oflo_adr_xbar_source_sel = AdrDist::OVERFLOW; + push_on_overflow = true; + BUG_CHECK(options.target == TOFINO); + } else { + adr_ctl.adr_dist_oflo_adr_xbar_source_index = swbox->get_home_row_logical() % 8; + adr_ctl.adr_dist_oflo_adr_xbar_source_sel = AdrDist::STATISTICS; + } + adr_ctl.adr_dist_oflo_adr_xbar_enable = 1; + } + } + bool run_at_eop = this->run_at_eop(); + if (home_rows.size() > 1) write_alu_vpn_range(regs); + + BUG_CHECK(stats_groups.size() == home_rows.size()); + bool first_stats_group = true; + for (int &idx : stats_groups) { + auto &movereg_stats_ctl = adrdist.movereg_stats_ctl[idx]; + for (MatchTable *m : match_tables) { + run_at_eop = run_at_eop || m->run_at_eop(); + adrdist.adr_dist_stats_adr_icxbar_ctl[m->logical_id] |= 1U << idx; + auto &dump_ctl = regs.cfg_regs.stats_dump_ctl[m->logical_id]; + dump_ctl.stats_dump_entries_per_word = format->groups(); + if (type == BYTES || type == BOTH) dump_ctl.stats_dump_has_bytes = 1; + if (type == PACKETS || type == BOTH) dump_ctl.stats_dump_has_packets = 1; + dump_ctl.stats_dump_offset = minvpn; + dump_ctl.stats_dump_size = maxvpn; + if (direct) { + adrdist.movereg_ad_direct[MoveReg::STATS] |= 1U << m->logical_id; + if (m->is_ternary()) movereg_stats_ctl.movereg_stats_ctl_tcam = 1; + } + movereg_stats_ctl.movereg_stats_ctl_lt = m->logical_id; + // The first ALU will drive this xbar register + if (first_stats_group) { + adrdist.movereg_ad_stats_alu_to_logical_xbar_ctl[m->logical_id / 8U].set_subfield( + 4 + idx, 3 * (m->logical_id % 8U), 3); + } + adrdist.mau_ad_stats_virt_lt[idx] |= 1U << m->logical_id; + } + movereg_stats_ctl.movereg_stats_ctl_size = counter_size[format->groups()]; + movereg_stats_ctl.movereg_stats_ctl_direct = direct; + if (run_at_eop) { + if (teop >= 0) { + setup_teop_regs(regs, idx); + } else { + adrdist.deferred_ram_ctl[MoveReg::STATS][idx].deferred_ram_en = 1; + adrdist.deferred_ram_ctl[MoveReg::STATS][idx].deferred_ram_thread = gress; + if (gress) regs.cfg_regs.mau_cfg_dram_thread |= 1 << idx; + movereg_stats_ctl.movereg_stats_ctl_deferred = 1; + } + adrdist.stats_bubble_req[timing_thread(gress)].bubble_req_1x_class_en |= 1 << (4 + idx); + } else { + adrdist.packet_action_at_headertime[0][idx] = 1; + adrdist.stats_bubble_req[timing_thread(gress)].bubble_req_1x_class_en |= 1 << idx; + } + if (push_on_overflow) { + adrdist.deferred_oflo_ctl = 1 << ((home->row - 8) / 2U); + adrdist.oflo_adr_user[0] = adrdist.oflo_adr_user[1] = AdrDist::STATISTICS; + } + first_stats_group = false; + } +} + +void CounterTable::gen_tbl_cfg(json::vector &out) const { + // FIXME -- factor common Synth2Port stuff + auto spare_mems = determine_spare_bank_memory_units(); + int size = (layout_size() - spare_mems.size()) * SRAM_DEPTH * format->groups(); + json::map &tbl = *base_tbl_cfg(out, "statistics", size); + json::map &stage_tbl = *add_stage_tbl_cfg(tbl, "statistics", size); + if (home_rows.size() > 1) + add_alu_indexes(stage_tbl, "stats_alu_index"); + else + add_alu_index(stage_tbl, "stats_alu_index"); + tbl["enable_pfe"] = per_flow_enable; + tbl["pfe_bit_position"] = per_flow_enable_bit(); + if (auto *f = lookup_field("bytes")) + tbl["byte_counter_resolution"] = f->size; + else + tbl["byte_counter_resolution"] = INT64_C(0); + if (auto *f = lookup_field("packets")) + tbl["packet_counter_resolution"] = f->size; + else + tbl["packet_counter_resolution"] = INT64_C(0); + switch (type) { + case PACKETS: + tbl["statistics_type"] = "packets"; + break; + case BYTES: + tbl["statistics_type"] = "bytes"; + break; + case BOTH: + tbl["statistics_type"] = "packets_and_bytes"; + break; + default: + break; + } + if (context_json) stage_tbl.merge(*context_json); +} + +DEFINE_TABLE_TYPE_WITH_SPECIALIZATION(CounterTable, TARGET_CLASS) +FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void CounterTable::write_merge_regs, + (mau_regs & regs, MatchTable *match, int type, int bus, + const std::vector &args), + { write_merge_regs_vt(regs, match, type, bus, args); }) diff --git a/backends/tofino/bf-asm/crash.cpp b/backends/tofino/bf-asm/crash.cpp new file mode 100644 index 00000000000..a092bce2696 --- /dev/null +++ b/backends/tofino/bf-asm/crash.cpp @@ -0,0 +1,281 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "backends/tofino/bf-asm/config.h" +#if HAVE_EXECINFO_H +#include +#endif +#include +#include +#include +#include + +#include +#if HAVE_UCONTEXT_H +#include +#endif +#include + +#include + +#include "bfas.h" +#include "exename.h" +#include "lib/hex.h" +#include "lib/log.h" + +using namespace P4; + +static const char *signames[] = { + "NONE", "HUP", "INT", "QUIT", "ILL", "TRAP", "ABRT", "BUS", "FPE", "KILL", "USR1", + "SEGV", "USR2", "PIPE", "ALRM", "TERM", "STKFLT", "CHLD", "CONT", "STOP", "TSTP", "TTIN", + "TTOU", "URG", "XCPU", "XFSZ", "VTALRM", "PROF", "WINCH", "POLL", "PWR", "SYS"}; + +char *program_name = nullptr; + +#ifdef MULTITHREAD +#include + +#include +std::vector thread_ids; +__thread int my_id; + +void register_thread() { + static std::mutex lock; + std::lock_guard acquire(lock); + my_id = thread_ids.size(); + thread_ids.push_back(pthread_self()); +} +#define MTONLY(...) __VA_ARGS__ +#else +#define MTONLY(...) +#endif // MULTITHREAD + +static MTONLY(__thread) int shutdown_loop = 0; // avoid infinite loop if shutdown crashes + +static void sigint_shutdown(int sig, siginfo_t *, void *) { + if (shutdown_loop++) _exit(-1); + LOG1("Exiting with SIG" << signames[sig]); + _exit(sig + 0x80); +} + +/* + * call external program addr2line WITHOUT using malloc or stdio or anything + * else that might be problematic if there's memory corruption or exhaustion + */ +const char *addr2line(void *addr, const char *text) { + MTONLY(static std::mutex lock; std::lock_guard acquire(lock);) + static pid_t child = 0; + static int to_child, from_child; + static char binary[PATH_MAX]; + static char buffer[PATH_MAX]; + const char *t; + + if (!text || !(t = strchr(text, '('))) { + text = exename(program_name); + t = text + strlen(text); + } + memcpy(buffer, text, t - text); + buffer[t - text] = 0; + if (child && strcmp(binary, buffer)) { + child = 0; + close(to_child); + close(from_child); + } + memcpy(binary, buffer, (t - text) + 1); + text = binary; + if (!child) { + int pfd1[2], pfd2[2]; + char *p = buffer; + const char *argv[4] = {"/bin/sh", "-c", buffer, 0}; + strcpy(p, "addr2line "); // NOLINT + p += strlen(p); + strcpy(p, " -Cfspe "); // NOLINT + p += strlen(p); + t = text + strlen(text); + if (!memchr(text, '/', t - text)) { + strcpy(p, "$(which "); // NOLINT + p += strlen(p); + } + memcpy(p, text, t - text); + p += t - text; + if (!memchr(text, '/', t - text)) *p++ = ')'; + *p = 0; + child = -1; +#if HAVE_PIPE2 + if (pipe2(pfd1, O_CLOEXEC) < 0) return 0; + if (pipe2(pfd2, O_CLOEXEC) < 0) return 0; +#else + if (pipe(pfd1) < 0) return 0; + if (pipe(pfd2) < 0) return 0; + fcntl(pfd1[0], F_SETFD, FD_CLOEXEC | fcntl(pfd1[0], F_GETFL)); + fcntl(pfd1[1], F_SETFD, FD_CLOEXEC | fcntl(pfd1[1], F_GETFL)); + fcntl(pfd2[0], F_SETFD, FD_CLOEXEC | fcntl(pfd2[0], F_GETFL)); + fcntl(pfd2[1], F_SETFD, FD_CLOEXEC | fcntl(pfd2[1], F_GETFL)); +#endif + while ((child = fork()) == -1 && errno == EAGAIN) { + } + if (child == -1) return 0; + if (child == 0) { + dup2(pfd1[1], 1); + dup2(pfd1[1], 2); + dup2(pfd2[0], 0); + execvp(argv[0], (char *const *)argv); + _exit(-1); + } + close(pfd1[1]); + from_child = pfd1[0]; + close(pfd2[0]); + to_child = pfd2[1]; + } + if (child == -1) return 0; + char *p = buffer; + uintptr_t a = (uintptr_t)addr; + int shift = (CHAR_BIT * sizeof(uintptr_t) - 1) & ~3; + while (shift > 0 && (a >> shift) == 0) shift -= 4; + while (shift >= 0) { + *p++ = "0123456789abcdef"[(a >> shift) & 0xf]; + shift -= 4; + } + *p++ = '\n'; + auto _unused = write(to_child, buffer, p - buffer); + (void)_unused; + p = buffer; + int len; + while (p < buffer + sizeof(buffer) - 1 && + (len = read(from_child, p, buffer + sizeof(buffer) - p - 1)) > 0 && (p += len) && + !memchr(p - len, '\n', len)) { + } + *p = 0; + if ((p = strchr(buffer, '\n'))) *p = 0; + if (buffer[0] == 0 || buffer[0] == '?') return 0; + return buffer; +} + +#if HAVE_UCONTEXT_H +static void dumpregs(mcontext_t *mctxt) { +#if defined(REG_EAX) + LOG1(" eax=" << P4::hex(mctxt->gregs[REG_EAX], 8, '0') + << " ebx=" << P4::hex(mctxt->gregs[REG_EBX], 8, '0') + << " ecx=" << P4::hex(mctxt->gregs[REG_ECX], 8, '0') + << " edx=" << P4::hex(mctxt->gregs[REG_EDX], 8, '0')); + LOG1(" edi=" << P4::hex(mctxt->gregs[REG_EDI], 8, '0') + << " esi=" << P4::hex(mctxt->gregs[REG_ESI], 8, '0') + << " ebp=" << P4::hex(mctxt->gregs[REG_EBP], 8, '0') + << " esp=" << P4::hex(mctxt->gregs[REG_ESP], 8, '0')); +#elif defined(REG_RAX) + LOG1(" rax=" << P4::hex(mctxt->gregs[REG_RAX], 16, '0') + << " rbx=" << P4::hex(mctxt->gregs[REG_RBX], 16, '0') + << " rcx=" << P4::hex(mctxt->gregs[REG_RCX], 16, '0')); + LOG1(" rdx=" << P4::hex(mctxt->gregs[REG_RDX], 16, '0') + << " rdi=" << P4::hex(mctxt->gregs[REG_RDI], 16, '0') + << " rsi=" << P4::hex(mctxt->gregs[REG_RSI], 16, '0')); + LOG1(" rbp=" << P4::hex(mctxt->gregs[REG_RBP], 16, '0') + << " rsp=" << P4::hex(mctxt->gregs[REG_RSP], 16, '0') + << " r8=" << P4::hex(mctxt->gregs[REG_R8], 16, '0')); + LOG1(" r9=" << P4::hex(mctxt->gregs[REG_R9], 16, '0') + << " r10=" << P4::hex(mctxt->gregs[REG_R10], 16, '0') + << " r11=" << P4::hex(mctxt->gregs[REG_R11], 16, '0')); + LOG1(" r12=" << P4::hex(mctxt->gregs[REG_R12], 16, '0') + << " r13=" << P4::hex(mctxt->gregs[REG_R13], 16, '0') + << " r14=" << P4::hex(mctxt->gregs[REG_R14], 16, '0')); + LOG1(" r15=" << P4::hex(mctxt->gregs[REG_R15], 16, '0')); +#elif defined(__i386__) + LOG1(" eax=" << P4::hex(mctxt->mc_eax, 8, '0') << " ebx=" << P4::hex(mctxt->mc_ebx, 8, '0') + << " ecx=" << P4::hex(mctxt->mc_ecx, 8, '0') + << " edx=" << P4::hex(mctxt->mc_edx, 8, '0')); + LOG1(" edi=" << P4::hex(mctxt->mc_edi, 8, '0') << " esi=" << P4::hex(mctxt->mc_esi, 8, '0') + << " ebp=" << P4::hex(mctxt->mc_ebp, 8, '0') + << " esp=" << P4::hex(mctxt->mc_esp, 8, '0')); +#elif defined(__amd64__) + LOG1(" rax=" << P4::hex(mctxt->mc_rax, 16, '0') << " rbx=" << P4::hex(mctxt->mc_rbx, 16, '0') + << " rcx=" << P4::hex(mctxt->mc_rcx, 16, '0')); + LOG1(" rdx=" << P4::hex(mctxt->mc_rdx, 16, '0') << " rdi=" << P4::hex(mctxt->mc_rdi, 16, '0') + << " rsi=" << P4::hex(mctxt->mc_rsi, 16, '0')); + LOG1(" rbp=" << P4::hex(mctxt->mc_rbp, 16, '0') << " rsp=" << P4::hex(mctxt->mc_rsp, 16, '0') + << " r8=" << P4::hex(mctxt->mc_r8, 16, '0')); + LOG1(" r9=" << P4::hex(mctxt->mc_r9, 16, '0') << " r10=" << P4::hex(mctxt->mc_r10, 16, '0') + << " r11=" << P4::hex(mctxt->mc_r11, 16, '0')); + LOG1(" r12=" << P4::hex(mctxt->mc_r12, 16, '0') << " r13=" << P4::hex(mctxt->mc_r13, 16, '0') + << " r14=" << P4::hex(mctxt->mc_r14, 16, '0')); + LOG1(" r15=" << P4::hex(mctxt->mc_r15, 16, '0')); +#else +#warning "unknown machine type" +#endif +} +#endif + +static void crash_shutdown(int sig, siginfo_t *info, void *uctxt) { + if (shutdown_loop++) _exit(-1); + MTONLY(static std::recursive_mutex lock; static int threads_dumped = 0; + static bool killed_all_threads = false; lock.lock(); if (!killed_all_threads) { + killed_all_threads = true; + for (int i = 0; i < int(thread_ids.size()); i++) + if (i != my_id - 1) { + pthread_kill(thread_ids[i], SIGABRT); + } + }) + LOG1(MTONLY("Thread #" << my_id << " " <<) "exiting with SIG" << signames[sig] << ", trace:"); + if (sig == SIGILL || sig == SIGFPE || sig == SIGSEGV || sig == SIGBUS || sig == SIGTRAP) + LOG1(" address = " << hex(info->si_addr)); +#if HAVE_UCONTEXT_H + dumpregs(&(static_cast(uctxt)->uc_mcontext)); +#else + (void)uctxt; // Suppress unused parameter warning. +#endif +#if HAVE_EXECINFO_H + if (LOGGING(1)) { + static void *buffer[64]; + int size = backtrace(buffer, 64); + char **strings = backtrace_symbols(buffer, size); + for (int i = 1; i < size; i++) { + if (strings) LOG1(" " << strings[i]); + if (const char *line = addr2line(buffer[i], strings ? strings[i] : 0)) + LOG1(" " << line); + } + if (size < 1) LOG1("backtrace failed"); + free(strings); + } +#endif + MTONLY( + if (++threads_dumped < int(thread_ids.size())) { + lock.unlock(); + pthread_exit(0); + } else { lock.unlock(); }) + if (sig != SIGABRT) BUG("Exiting with SIG%s", signames[sig]); + _exit(sig + 0x80); +} + +void register_exit_signals() { + struct sigaction sigact; + sigact.sa_sigaction = sigint_shutdown; + sigact.sa_flags = SA_SIGINFO; + sigemptyset(&sigact.sa_mask); + sigaction(SIGHUP, &sigact, 0); + sigaction(SIGINT, &sigact, 0); + sigaction(SIGQUIT, &sigact, 0); + sigaction(SIGTERM, &sigact, 0); + sigact.sa_sigaction = crash_shutdown; + sigaction(SIGILL, &sigact, 0); + sigaction(SIGABRT, &sigact, 0); + sigaction(SIGFPE, &sigact, 0); + sigaction(SIGSEGV, &sigact, 0); + sigaction(SIGBUS, &sigact, 0); + sigaction(SIGTRAP, &sigact, 0); + signal(SIGPIPE, SIG_IGN); +} diff --git a/backends/tofino/bf-asm/data_switchbox.h b/backends/tofino/bf-asm/data_switchbox.h new file mode 100644 index 00000000000..d69d952f899 --- /dev/null +++ b/backends/tofino/bf-asm/data_switchbox.h @@ -0,0 +1,168 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_DATA_SWITCHBOX_H_ +#define BACKENDS_TOFINO_BF_ASM_DATA_SWITCHBOX_H_ + +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" + +/* + * Code to handle programming of the Ram Data Bus Horizontal/Vertical Switchbox + * see section 6.2.4.4 of the MAU uArch docs + */ + +template +class DataSwitchboxSetup { + REGS ®s; + Table *tbl; + unsigned home_row, home_row_logical, prev_row, top_ram_row, bottom_ram_row; + + public: + unsigned get_home_row() { return home_row; } + unsigned get_home_row_logical() { return home_row_logical; } + DataSwitchboxSetup(REGS ®s, Table *t, int home = -1, int next_home = -1) + : regs(regs), tbl(t) { + if (home >= 0) + top_ram_row = prev_row = home_row = home / 2U; + else + top_ram_row = prev_row = home_row = tbl->layout[0].row / 2U; + bottom_ram_row = tbl->layout.back().row / 2U; + if (next_home >= 0) { + for (auto it = tbl->layout.rbegin(); it != tbl->layout.rend(); ++it) { + if (it->row > next_home) { + bottom_ram_row = it->row / 2U; + break; + } + } + } + + // Counter ALU's are on even rows on right side of RAM array. Set + // home_row to the correct ALU + if (tbl->table_type() == Table::COUNTER) + prev_row = home_row = prev_row % 2 ? prev_row + 1 : prev_row; + // Stateful/Selection/Meter ALU's are on odd rows on right side of RAM + // array. Set home_row to the correct ALU + else if (tbl->table_type() == Table::STATEFUL || tbl->table_type() == Table::SELECTION || + tbl->table_type() == Table::METER) + prev_row = home_row = prev_row % 2 ? prev_row : prev_row + 1; + home_row_logical = home_row * 2 + 1; + } + /** + * Responsible for the data hv switch box per row, as well as the fabric_ctl. At a high + * level, the fabric ctl is an optimized version of the fabric_ctl in order to manage + * some of the timing issues. + * + * Operates under the assumption that all rows in the layout are numerically highest to lowest. + * Information has to flow up to the home row, and flow down to the lowest row. Should not + * flow above the homerow and below the lowest row + */ + void setup_row(unsigned row) { + auto &map_alu = regs.rams.map_alu; + auto &swbox = regs.rams.array.switchbox.row; + auto &map_alu_row = map_alu.row[row]; + int side = 1; // always -- currently no maprams on left side + auto &syn2port_ctl = map_alu_row.i2portctl.synth2port_fabric_ctl[0][side]; + map_alu_row.i2portctl.synth2port_ctl.synth2port_enable = 1; + while (prev_row != row) { + auto &prev_syn2port_ctl = map_alu.row[prev_row].i2portctl.synth2port_fabric_ctl[0]; + if (prev_row == home_row) { + swbox[prev_row].ctl.r_stats_alu_o_mux_select.r_stats_alu_o_sel_oflo_rd_b_i = 1; + swbox[prev_row].ctl.b_oflo_wr_o_mux_select.b_oflo_wr_o_sel_stats_wr_r_i = 1; + prev_syn2port_ctl[side].stats_to_vbus_below = 1; + } else { + // If a row is in the middle of possible rows, must program the switchbox + // to have data pass through the bottom of the switch box to the top of + // the switchbox + swbox[prev_row].ctl.t_oflo_rd_o_mux_select.t_oflo_rd_o_sel_oflo_rd_b_i = 1; + swbox[prev_row].ctl.b_oflo_wr_o_mux_select.b_oflo_wr_o_sel_oflo_wr_t_i = 1; + // below2above only means that there is no synth2port RAMs on this row, but + // the signal needs to pass between the rows + prev_syn2port_ctl[side].synth2port_connect_below2above = 1; + /* need to also program left side below2above connections + * see ram_bus_path.py:254 -- 'Mike F.' comment */ + prev_syn2port_ctl[0].synth2port_connect_below2above = 1; + prev_syn2port_ctl[side].oflo_to_vbus_below = 1; + } + auto &next_syn2port_ctl = + map_alu.row[prev_row - 1].i2portctl.synth2port_fabric_ctl[0][side]; + // From RTL, it only appears that oflo_to_vbus_below/above should be programmed + // when RAMs appear on the RAM line, but the model asserts if these are not enabled. + // Keeping this, as it is what is DV'ed against + next_syn2port_ctl.oflo_to_vbus_above = 1; + prev_row--; + } + // FIXME: Should this be top_ram_row? + if (row == home_row) { + swbox[row].ctl.r_stats_alu_o_mux_select.r_stats_alu_o_sel_stats_rd_r_i = 1; + } else { + // The oflo signal of this row must go through the overflow bus + swbox[row].ctl.t_oflo_rd_o_mux_select.t_oflo_rd_o_sel_oflo_rd_r_i = 1; + swbox[row].ctl.r_oflo_wr_o_mux_select = 1; + syn2port_ctl.synth2port_connect_above = 1; + } + + if (row != bottom_ram_row) { + // To determine whether data flows back down. Doesn't flow down on the lowest row + syn2port_ctl.synth2port_connect_below = 1; + } + } + void setup_row_col(unsigned row, unsigned col, int vpn) { + int side = col >= 6; + unsigned logical_col = col % 6U; + auto &ram = regs.rams.array.row[row].ram[col]; + auto &map_alu = regs.rams.map_alu; + auto &map_alu_row = map_alu.row[prev_row]; + auto &unitram_config = map_alu_row.adrmux.unitram_config[side][logical_col]; + unitram_config.unitram_type = tbl->unitram_type(); + unitram_config.unitram_logical_table = tbl->logical_id; + if (!options.match_compiler) // FIXME -- compiler doesn't set this? + unitram_config.unitram_vpn = vpn; + if (tbl->gress == INGRESS || tbl->gress == GHOST) + unitram_config.unitram_ingress = 1; + else + unitram_config.unitram_egress = 1; + unitram_config.unitram_enable = 1; + + auto &ram_address_mux_ctl = map_alu_row.adrmux.ram_address_mux_ctl[side][logical_col]; + ram_address_mux_ctl.ram_unitram_adr_mux_select = UnitRam::AdrMux::STATS_METERS; + if (row == home_row) { + ram.unit_ram_ctl.match_ram_write_data_mux_select = UnitRam::DataMux::STATISTICS; + ram.unit_ram_ctl.match_ram_read_data_mux_select = UnitRam::DataMux::STATISTICS; + if (tbl->adr_mux_select_stats()) + ram_address_mux_ctl.ram_stats_meter_adr_mux_select_stats = 1; + else + ram_address_mux_ctl.ram_stats_meter_adr_mux_select_meter = 1; + ram_address_mux_ctl.ram_ofo_stats_mux_select_statsmeter = 1; + ram_address_mux_ctl.synth2port_radr_mux_select_home_row = 1; + } else { + ram.unit_ram_ctl.match_ram_write_data_mux_select = UnitRam::DataMux::OVERFLOW; + ram.unit_ram_ctl.match_ram_read_data_mux_select = UnitRam::DataMux::OVERFLOW; + ram_address_mux_ctl.ram_oflo_adr_mux_select_oflo = 1; + ram_address_mux_ctl.ram_ofo_stats_mux_select_oflo = 1; + ram_address_mux_ctl.synth2port_radr_mux_select_oflo = 1; + } + ram_address_mux_ctl.map_ram_wadr_mux_select = MapRam::Mux::SYNTHETIC_TWO_PORT; + ram_address_mux_ctl.map_ram_wadr_mux_enable = 1; + ram_address_mux_ctl.map_ram_radr_mux_select_smoflo = 1; + int syn2port_bus = prev_row == top_ram_row ? 0 : 1; + auto &syn2port_members = map_alu_row.i2portctl.synth2port_hbus_members[syn2port_bus][side]; + syn2port_members |= 1U << logical_col; + } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_DATA_SWITCHBOX_H_ */ diff --git a/backends/tofino/bf-asm/deparser.cpp b/backends/tofino/bf-asm/deparser.cpp new file mode 100644 index 00000000000..922b07baf11 --- /dev/null +++ b/backends/tofino/bf-asm/deparser.cpp @@ -0,0 +1,819 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "deparser.h" + +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/target.h" +#include "constants.h" +#include "lib/range.h" +#include "misc.h" +#include "parser-tofino-jbay.h" +#include "phv.h" +#include "top_level.h" +#include "ubits.h" + +unsigned Deparser::unique_field_list_handle; +Deparser Deparser::singleton_object; + +Deparser::Deparser() : Section("deparser") {} +Deparser::~Deparser() {} + +struct Deparser::FDEntry { + struct Base { + virtual ~Base() {} + virtual void check(bitvec &phv_use) = 0; + virtual unsigned encode() = 0; + virtual unsigned size() = 0; // size in bytes; + virtual void dbprint(std::ostream &) const = 0; + template + bool is() const { + return dynamic_cast(this) != nullptr; + } + template + T *to() { + return dynamic_cast(this); + } + friend std::ostream &operator<<(std::ostream &out, const Base &b) { + b.dbprint(out); + return out; + } + }; + struct Phv : Base { + ::Phv::Ref val; + Phv(gress_t g, const value_t &v) : val(g, DEPARSER_STAGE, v) {} + void check(bitvec &phv_use) override { + if (val.check()) { + phv_use[val->reg.uid] = 1; + if (val->lo != 0 || val->hi != val->reg.size - 1) + error(val.lineno, + "Can only output full phv registers, not slices, " + "in deparser"); + } + } + unsigned encode() override { return val->reg.deparser_id(); } + unsigned size() override { return val->reg.size / 8; } + const ::Phv::Register *reg() { return &val->reg; } + void dbprint(std::ostream &out) const override { out << val.desc(); } + }; + struct Checksum : Base { + gress_t gress; + int unit; + Checksum(gress_t gr, const value_t &v) : gress(gr) { + if (CHECKTYPE(v, tINT)) { + if ((unit = v.i) < 0 || v.i >= Target::DEPARSER_CHECKSUM_UNITS()) + error(v.lineno, "Invalid deparser checksum unit %" PRId64 "", v.i); + } + } + void check(bitvec &phv_use) override {} + template + unsigned encode(); + unsigned encode() override; + unsigned size() override { return 2; } + void dbprint(std::ostream &out) const override { out << gress << " checksum " << unit; } + }; + struct Constant : Base { + int lineno; + gress_t gress; + int val; + Constant(gress_t g, const value_t &v) : gress(g), val(v.i) { + lineno = v.lineno; + if (v.i < 0 || v.i >> 8) + error(lineno, + "Invalid deparser constant %" PRId64 ", valid constant range is 0-255", v.i); + bool ok = Deparser::add_constant(gress, val); + if (!ok) error(lineno, "Ran out of deparser constants"); + } + void check(bitvec &phv_use) override {} + template + unsigned encode(); + unsigned encode() override; + unsigned size() override { return 1; } + void dbprint(std::ostream &out) const override { out << val; } + }; + struct Clot : Base { + int lineno; + gress_t gress; + std::string tag; + int length = -1; + std::map phv_replace; + std::map csum_replace; + Clot(gress_t gr, const value_t &tag, const value_t &data, ordered_set<::Phv::Ref> &pov) + : lineno(tag.lineno), gress(gr) { + if (CHECKTYPE2(tag, tINT, tSTR)) { + if (tag.type == tSTR) + this->tag = tag.s; + else + this->tag = std::to_string(tag.i); + } + if (data.type == tMAP) { + for (auto &kv : data.map) { + if (kv.key == "pov") { + pov.emplace(gress, DEPARSER_STAGE, kv.value); + } else if (kv.key == "max_length" || kv.key == "length") { + if (length >= 0) error(kv.value.lineno, "Duplicate length"); + if (CHECKTYPE(kv.value, tINT) && ((length = kv.value.i) < 0 || length > 64)) + error(kv.value.lineno, "Invalid clot length"); + } else if (kv.key.type == tINT) { + if (phv_replace.count(kv.key.i) || csum_replace.count(kv.key.i)) + error(kv.value.lineno, "Duplicate value at offset %" PRId64 "", + kv.key.i); + if (kv.value.type == tCMD && kv.value.vec.size == 2 && + kv.value == "full_checksum") + csum_replace.emplace(kv.key.i, Checksum(gress, kv.value.vec[1])); + else + phv_replace.emplace(kv.key.i, + ::Phv::Ref(gress, DEPARSER_STAGE, kv.value)); + } else { + error(kv.value.lineno, "Unknown key for clot: %s", value_desc(kv.key)); + } + } + } else { + pov.emplace(gress, DEPARSER_STAGE, data); + } + if (pov.size() > Target::DEPARSER_MAX_POV_PER_USE()) + error(data.lineno, "Too many POV bits for CLOT"); + } + void check(bitvec &phv_use) override { + if (length < 0) length = Parser::clot_maxlen(gress, tag); + if (length < 0) error(lineno, "No length for clot %s", tag.c_str()); + if (Parser::clot_tag(gress, tag) < 0) error(lineno, "No tag for clot %s", tag.c_str()); + unsigned next = 0; + ::Phv::Ref *prev = nullptr; + for (auto &r : phv_replace) { + if (r.first < next) { + error(r.second.lineno, "Overlapping phvs in clot"); + error(prev->lineno, "%s and %s", prev->name(), r.second.name()); + } + if (r.second.check()) { + phv_use[r.second->reg.uid] = 1; + if (r.second->lo != 0 || r.second->hi != r.second->reg.size - 1) + error(r.second.lineno, + "Can only output full phv registers, not slices," + " in deparser"); + next = r.first + r.second->reg.size / 8U; + prev = &r.second; + } + } + } + unsigned size() override { return length; } + unsigned encode() override { + BUG(); + return -1; + } + void dbprint(std::ostream &out) const override { + out << "clot " << tag; + if (length > 0) out << " [len " << length << "]"; + } + }; + + int lineno; + std::unique_ptr what; + ordered_set<::Phv::Ref> pov; + FDEntry(gress_t gress, const value_t &v, const value_t &p) { + lineno = v.lineno; + if (v.type == tCMD && v.vec.size == 2 && v == "clot") { + what.reset(new Clot(gress, v.vec[1], p, pov)); + } else if (v.type == tCMD && v.vec.size == 2 && v == "full_checksum") { + what.reset(new Checksum(gress, v.vec[1])); + pov.emplace(gress, DEPARSER_STAGE, p); + } else if (v.type == tINT) { + what.reset(new Constant(gress, v)); + pov.emplace(gress, DEPARSER_STAGE, p); + } else { + what.reset(new Phv(gress, v)); + pov.emplace(gress, DEPARSER_STAGE, p); + } + } + void check(bitvec &phv_use) { what->check(phv_use); } +}; + +struct Deparser::Intrinsic::Type { + target_t target; + gress_t gress; + std::string name; + int max; + static std::map all[TARGET_INDEX_LIMIT][2]; + + protected: + Type(target_t t, gress_t gr, const char *n, int m) : target(t), gress(gr), name(n), max(m) { + BUG_CHECK(!all[t][gr].count(name)); + all[target][gress][name] = this; + } + ~Type() { all[target][gress].erase(name); } + + public: +#define VIRTUAL_TARGET_METHODS(TARGET) \ + virtual void setregs(Target::TARGET::deparser_regs ®s, Deparser &deparser, \ + Intrinsic &vals) { \ + BUG_CHECK(!"target mismatch"); \ + } + FOR_ALL_REGISTER_SETS(VIRTUAL_TARGET_METHODS) +#undef VIRTUAL_TARGET_METHODS +}; + +#define DEPARSER_INTRINSIC(TARGET, GR, NAME, MAX) \ + static struct TARGET##INTRIN##GR##NAME : public Deparser::Intrinsic::Type { \ + TARGET##INTRIN##GR##NAME() \ + : Deparser::Intrinsic::Type(Target::TARGET::tag, GR, #NAME, MAX) {} \ + void setregs(Target::TARGET::deparser_regs &, Deparser &, Deparser::Intrinsic &) override; \ + } TARGET##INTRIN##GR##NAME##_singleton; \ + void TARGET##INTRIN##GR##NAME::setregs(Target::TARGET::deparser_regs ®s, \ + Deparser &deparser, Deparser::Intrinsic &intrin) + +std::map + Deparser::Intrinsic::Type::all[TARGET_INDEX_LIMIT][2]; + +Deparser::Digest::Digest(Deparser::Digest::Type *t, int l, VECTOR(pair_t) & data) { + type = t; + lineno = l; + for (auto &l : data) { + if (l.key == "select") { + if (l.value.type == tMAP && l.value.map.size == 1) { + select = Val(t->gress, l.value.map[0].key, l.value.map[0].value); + } else { + select = Val(t->gress, l.value); + } + } else if (t->can_shift && l.key == "shift") { + if (CHECKTYPE(l.value, tINT)) shift = l.value.i; + } else if (l.key == "context_json") { + if (CHECKTYPE(l.value, tMAP)) context_json = toJson(l.value.map); + } else if (!CHECKTYPE(l.key, tINT)) { + continue; + } else if (l.key.i < 0 || l.key.i >= t->count) { + error(l.key.lineno, "%s index %" PRId64 " out of range", t->name.c_str(), l.key.i); + } else if (l.value.type != tVEC) { + layout[l.key.i].emplace_back(t->gress, DEPARSER_STAGE, l.value); + } else { + // TODO : Need an empty layout entry if no values are present to + // set the config registers correctly + layout.emplace(l.key.i, std::vector()); + for (auto &v : l.value.vec) layout[l.key.i].emplace_back(t->gress, DEPARSER_STAGE, v); + } + } + if (!select && t->name != "pktgen") error(lineno, "No select key in %s spec", t->name.c_str()); +} + +#define DEPARSER_DIGEST(TARGET, GRESS, NAME, CNT, ...) \ + static struct TARGET##GRESS##NAME##Digest : public Deparser::Digest::Type { \ + TARGET##GRESS##NAME##Digest() \ + : Deparser::Digest::Type(Target::TARGET::tag, GRESS, #NAME, CNT) { \ + __VA_ARGS__ \ + } \ + void setregs(Target::TARGET::deparser_regs &, Deparser &, Deparser::Digest &) override; \ + } TARGET##GRESS##NAME##Digest##_singleton; \ + void TARGET##GRESS##NAME##Digest::setregs(Target::TARGET::deparser_regs ®s, \ + Deparser &deparser, Deparser::Digest &data) + +std::map Deparser::Digest::Type::all[TARGET_INDEX_LIMIT][2]; + +void Deparser::start(int lineno, VECTOR(value_t) args) { + if (args.size == 0) { + this->lineno[INGRESS] = this->lineno[EGRESS] = lineno; + return; + } + if (args.size != 1 || (args[0] != "ingress" && args[0] != "egress")) + error(lineno, "deparser must specify ingress or egress"); + gress_t gress = args[0] == "egress" ? EGRESS : INGRESS; + if (!this->lineno[gress]) this->lineno[gress] = lineno; +} + +void Deparser::input(VECTOR(value_t) args, value_t data) { + if (!CHECKTYPE(data, tMAP)) return; + for (gress_t gress : Range(INGRESS, EGRESS)) { + if (args.size > 0) { + if (args[0] == "ingress" && gress != INGRESS) continue; + if (args[0] == "egress" && gress != EGRESS) continue; + } else if (error_count > 0) { + break; + } + for (auto &kv : MapIterChecked(data.map, true)) { + if (kv.key == "dictionary") { + if (kv.value.type == tVEC && kv.value.vec.size == 0) continue; + collapse_list_of_maps(kv.value); + if (!CHECKTYPE(kv.value, tMAP)) continue; + for (auto &ent : kv.value.map) + dictionary[gress].emplace_back(gress, ent.key, ent.value); + } else if (kv.key == "pov") { + if (kv.value.type != tVEC) { + /// The check for correct type is done in Phv::Ref constructor + pov_order[gress].emplace_back(gress, DEPARSER_STAGE, kv.value); + } else { + for (auto &ent : kv.value.vec) + pov_order[gress].emplace_back(gress, DEPARSER_STAGE, ent); + } + } else if (kv.key == "partial_checksum") { + if (kv.key.type != tCMD || kv.key.vec.size != 2 || kv.key[1].type != tINT || + kv.key[1].i < 0 || kv.key[1].i >= Target::DEPARSER_CHECKSUM_UNITS()) { + error(kv.key.lineno, "Invalid deparser checksum unit number"); + } else if (CHECKTYPE2(kv.value, tVEC, tMAP)) { + collapse_list_of_maps(kv.value); + int unit = kv.key[1].i; + if (unit < 0) error(kv.key.lineno, "Invalid checksum unit %d", unit); + for (auto &ent : kv.value.map) { + checksum_entries[gress][unit].emplace_back(gress, ent.key, ent.value); + } + } + } else if (kv.key == "full_checksum") { + if (kv.key.type != tCMD || kv.key.vec.size != 2 || kv.key[1].type != tINT || + kv.key[1].i < 0 || kv.key[1].i >= Target::DEPARSER_CHECKSUM_UNITS()) { + error(kv.key.lineno, "Invalid deparser checksum unit number"); + } else if (CHECKTYPE2(kv.value, tVEC, tMAP)) { + collapse_list_of_maps(kv.value); + int unit = kv.key[1].i; + if (unit < 0) error(kv.key.lineno, "Invalid checksum unit %d", unit); + for (auto &ent : kv.value.map) { + if (ent.key == "partial_checksum") { + full_checksum_unit[gress][unit].entries[ent.key[1].i] = + checksum_entries[gress][ent.key[1].i]; + collapse_list_of_maps(ent.value); + for (auto &a : ent.value.map) { + if (a.key == "pov") { + full_checksum_unit[gress][unit].pov[ent.key[1].i] = + ::Phv::Ref(gress, DEPARSER_STAGE, a.value); + } else if (a.key == "invert") { + full_checksum_unit[gress][unit].checksum_unit_invert.insert( + ent.key[1].i); + } + } + } else if (ent.key == "clot") { + collapse_list_of_maps(ent.value); + for (auto &a : ent.value.map) { + if (a.key == "pov") { + full_checksum_unit[gress][unit].clot_entries.emplace_back( + gress, ent.key[1].i, a.value); + } else if (a.key == "invert") { + full_checksum_unit[gress][unit].clot_tag_invert.insert( + a.value.i); + } + } + } else if (ent.key == "zeros_as_ones") { + full_checksum_unit[gress][unit].zeros_as_ones_en = ent.value.i; + } + } + } + } else if (auto *itype = ::get(Intrinsic::Type::all[Target::register_set()][gress], + value_desc(&kv.key))) { + intrinsics.emplace_back(itype, kv.key.lineno); + auto &intrin = intrinsics.back(); + collapse_list_of_maps(kv.value); + if (kv.value.type == tVEC) { + for (auto &val : kv.value.vec) intrin.vals.emplace_back(gress, val); + } else if (kv.value.type == tMAP) { + for (auto &el : kv.value.map) intrin.vals.emplace_back(gress, el.key, el.value); + } else { + intrin.vals.emplace_back(gress, kv.value); + } + } else if (auto *digest = ::get(Digest::Type::all[Target::register_set()][gress], + value_desc(&kv.key))) { + if (CHECKTYPE(kv.value, tMAP)) + digests.emplace_back(digest, kv.value.lineno, kv.value.map); + } else { + error(kv.key.lineno, "Unknown deparser tag %s", value_desc(&kv.key)); + } + } + } +} + +template +static void write_checksum_entry(ENTRIES &entry, unsigned mask, int swap, int id, + const char *name = "entry") { + BUG_CHECK(swap == 0 || swap == 1); + BUG_CHECK(mask == 0 || mask & 3); + if (entry.modified()) error(1, "%s appears multiple times in checksum %d", name, id); + entry.swap = swap; + // CSR: The order of operation: data is swapped or not and then zeroed or not + if (swap) mask = (mask & 0x2) >> 1 | (mask & 0x1) << 1; + switch (mask) { + case 0: + entry.zero_m_s_b = 1; + entry.zero_l_s_b = 1; + break; + case 1: + entry.zero_m_s_b = 1; + entry.zero_l_s_b = 0; + break; + case 2: + entry.zero_m_s_b = 0; + entry.zero_l_s_b = 1; + break; + case 3: + entry.zero_m_s_b = 0; + entry.zero_l_s_b = 0; + break; + default: + break; + } +} + +// Used for field dictionary logging and deparser resoureces. +// Using fd entry and pov, a json::map is filled with appropriate field names +void write_field_name_in_json(const Phv::Register *phv, const Phv::Register *pov, int povBit, + json::map &chunk_byte, json::map &fd_entry_chunk_byte, int stageno, + gress_t gress) { + auto povName_ = Phv::get_pov_name(pov->mau_id(), povBit); + std::string povName = povName_; + std::string headerName; + size_t pos = 0; + if ((pos = povName.find("$valid")) != std::string::npos) { + headerName = povName.substr(0, pos); + } + std::string fieldNames; + auto allFields = Phv::aliases(phv, stageno); + for (auto fieldName : allFields) { + if (fieldName.find(headerName) != std::string::npos) fieldNames += (fieldName + ", "); + } + fd_entry_chunk_byte["phv_container"] = phv->uid; + chunk_byte["PHV"] = phv->uid; + chunk_byte["Field"] = fieldNames; + return; +} + +void write_pov_resources_in_json(ordered_map &pov, + json::map &pov_resources) { + unsigned pov_size = 0; + json::vector pov_bits; + // ent will be tuple of (register ref, pov position start) + for (auto const &ent : pov) { + // Go through all the bits + unsigned used_bits = 0; + for (unsigned i = 0; i < ent.first->size; i++) { + json::map pov_bit; + std::string pov_name = Phv::get_pov_name(ent.first->uid, i); + // Check if this POV bit is used + if (pov_name.compare(" ") != 0) { + pov_bit["pov_bit"] = ent.second + i; + pov_bit["phv_container"] = ent.first->uid; + pov_bit["phv_container_bit"] = i; + pov_bit["pov_name"] = pov_name; + pov_bits.push_back(std::move(pov_bit)); + used_bits++; + } + } + if (pov_size < (ent.second + used_bits)) pov_size = ent.second + used_bits; + } + pov_resources["size"] = pov_size; + pov_resources["pov_bits"] = std::move(pov_bits); +} + +// Used for field dictionary logging. Using fd entry and pov, a json::map +// is filled with appropriate checksum or constant +void write_csum_const_in_json(int deparserPhvIdx, json::map &chunk_byte, + json::map &fd_entry_chunk_byte, gress_t gress) { + if (options.target == Target::Tofino::tag) { + if (deparserPhvIdx >= CHECKSUM_ENGINE_PHVID_TOFINO_LOW && + deparserPhvIdx <= CHECKSUM_ENGINE_PHVID_TOFINO_HIGH) { + auto csum_id = deparserPhvIdx - CHECKSUM_ENGINE_PHVID_TOFINO_LOW - + (gress * CHECKSUM_ENGINE_PHVID_TOFINO_PER_GRESS); + chunk_byte["Checksum"] = csum_id; + fd_entry_chunk_byte["csum_engine"] = csum_id; + } + } else if (options.target == Target::JBay::tag) { + if (deparserPhvIdx > CONSTANTS_PHVID_JBAY_LOW && + deparserPhvIdx < CONSTANTS_PHVID_JBAY_HIGH) { + chunk_byte["Constant"] = + Deparser::get_constant(gress, deparserPhvIdx - CONSTANTS_PHVID_JBAY_LOW); + fd_entry_chunk_byte["phv_container"] = deparserPhvIdx; + } else { + auto csum_id = deparserPhvIdx - CONSTANTS_PHVID_JBAY_HIGH; + chunk_byte["Checksum"] = csum_id; + fd_entry_chunk_byte["csum_engine"] = csum_id; + } + } + return; +} + +/// Get JSON for deparser resources from digest of deparser table +/// @param tab_digest Digest for the deparser table, nullptr if the table does not exist +/// @return JSON node representation of the table for deparser resources +json::map deparser_table_digest_to_json(Deparser::Digest *tab_digest) { + json::map dep_table; + json::vector table_phv; + + // nullptr means the table is not used, create JSON node for empty table + // and return it + if (tab_digest == nullptr) { + dep_table["nTables"] = 0; + dep_table["maxBytes"] = 0; + dep_table["table_phv"] = std::move(table_phv); + return dep_table; + } + + unsigned int max_bytes = 0; + // Prepare tables of the deparser table type + for (auto &set : tab_digest->layout) { + json::map table; + table["table_id"] = set.first; + // TODO: field_list_name? + json::vector bytes; + unsigned byte_n = 0; + for (auto ® : set.second) { + json::map byte; + byte["byte_number"] = byte_n++; + byte["phv_container"] = reg->reg.uid; + bytes.push_back(std::move(byte)); + } + if (byte_n > max_bytes) max_bytes = byte_n; + table["bytes"] = std::move(bytes); + table_phv.push_back(std::move(table)); + } + dep_table["nTables"] = tab_digest->layout.size(); + dep_table["maxBytes"] = max_bytes; + dep_table["index_phv"] = tab_digest->select->reg.uid; + dep_table["table_phv"] = std::move(table_phv); + // Now we have a digest + return dep_table; +} + +/// Create resources_deparser.json with the deparser node +/// for resources.json +/// @param fde_entries_i JSON vector of field dictionary entries from Ingress +/// @param fde_entries_e JSON vector of field dictionary entries from Egress +void Deparser::report_resources_deparser_json(json::vector &fde_entries_i, + json::vector &fde_entries_e) { + json::map resources_deparser_ingress; + json::map resources_deparser_egress; + // Set gress property + resources_deparser_ingress["gress"] = "ingress"; + resources_deparser_egress["gress"] = "egress"; + // Fill out POV resource information for ingress + json::map pov_resources; + write_pov_resources_in_json(pov[INGRESS], pov_resources); + resources_deparser_ingress["pov"] = std::move(pov_resources); + // Fill out POV resoure information for egress + write_pov_resources_in_json(pov[EGRESS], pov_resources); + resources_deparser_egress["pov"] = std::move(pov_resources); + // Fill out field dictionaries + unsigned n_fde_entries = Target::DEPARSER_MAX_FD_ENTRIES(); + resources_deparser_ingress["nFdeEntries"] = n_fde_entries; + resources_deparser_ingress["fde_entries"] = std::move(fde_entries_i); + resources_deparser_egress["nFdeEntries"] = n_fde_entries; + resources_deparser_egress["fde_entries"] = std::move(fde_entries_e); + // Fill deparser tables + Digest *learning_table[2] = {nullptr, nullptr}; + Digest *resubmit_table[2] = {nullptr, nullptr}; + Digest *mirror_table[2] = {nullptr, nullptr}; + for (auto &digest : digests) { + // Check if this is egress/ingress + if (digest.type->gress != INGRESS && digest.type->gress != EGRESS) continue; + if (digest.type->name == "learning") + learning_table[digest.type->gress] = &digest; + else if (digest.type->name == "resubmit" || + digest.type->name == "resubmit_preserving_field_list") + resubmit_table[digest.type->gress] = &digest; + else if (digest.type->name == "mirror") + mirror_table[digest.type->gress] = &digest; + } + resources_deparser_ingress["mirror_table"] = + deparser_table_digest_to_json(mirror_table[INGRESS]); + resources_deparser_egress["mirror_table"] = deparser_table_digest_to_json(mirror_table[EGRESS]); + resources_deparser_ingress["resubmit_table"] = + deparser_table_digest_to_json(resubmit_table[INGRESS]); + resources_deparser_egress["resubmit_table"] = + deparser_table_digest_to_json(resubmit_table[EGRESS]); + resources_deparser_ingress["learning_table"] = + deparser_table_digest_to_json(learning_table[INGRESS]); + resources_deparser_egress["learning_table"] = + deparser_table_digest_to_json(learning_table[EGRESS]); + + // Create the main deparser resources node + json::vector resources_deparser; + resources_deparser.push_back(std::move(resources_deparser_ingress)); + resources_deparser.push_back(std::move(resources_deparser_egress)); + // Dump resources to file + auto deparser_json_dump = open_output("logs/resources_deparser.json"); + *deparser_json_dump << &resources_deparser; +} + +#include "jbay/deparser.cpp" // NOLINT(build/include) +#include "tofino/deparser.cpp" // NOLINT(build/include) + +std::vector Deparser::merge_csum_entries( + const std::vector &entries, int id) { + std::vector rv; + ordered_map merged_entries; + + for (auto &entry : entries) { + if (entry.is_clot()) { + rv.push_back(entry); + continue; + } + auto name = entry.val.name(); + int hi = entry.val.hibit(); + int lo = entry.val.lobit(); + bool is_hi = hi >= 16; + bool is_lo = lo < 16; + + if (!merged_entries.count(name)) { + auto reg = Phv::reg(name); + auto new_entry(entry); + if (lo != 0 && hi != reg->size - 1) { + new_entry.val = Phv::Ref(*reg, entry.val.gress(), 0, reg->size - 1); + } + merged_entries.emplace(name, new_entry); + } else { + auto &rv_entry = merged_entries[name]; + if (rv_entry.mask & entry.mask) + error(entry.lineno, "bytes within %s appear multiple times in checksum %d", name, + id); + if (is_hi) { + if ((rv_entry.mask & 0xc) && (rv_entry.swap & 2) != (entry.swap & 2)) + error(entry.lineno, "incompatible swap values for %s in checksum %d", name, id); + rv_entry.mask |= entry.mask & 0xc; + rv_entry.swap |= entry.swap & 2; + } + if (is_lo) { + if ((rv_entry.mask & 0x3) && (rv_entry.swap & 1) != (entry.swap & 1)) + error(entry.lineno, "incompatible swap values for %s in checksum %d", name, id); + rv_entry.mask |= entry.mask & 0x3; + rv_entry.swap |= entry.swap & 1; + } + } + } + + for (auto &[_, entry] : merged_entries) rv.push_back(entry); + + return rv; +} + +/* The following uses of specialized templates must be after the specialization... */ +void Deparser::process() { + bitvec pov_use[2]; + for (gress_t gress : Range(INGRESS, EGRESS)) { + for (auto &ent : pov_order[gress]) + if (ent.check()) { + pov_use[gress][ent->reg.uid] = 1; + phv_use[gress][ent->reg.uid] = 1; + } + for (auto &ent : dictionary[gress]) { + ent.check(phv_use[gress]); + for (auto &pov : ent.pov) { + if (!pov.check()) continue; + phv_use[gress][pov->reg.uid] = 1; + if (pov->lo != pov->hi) error(pov.lineno, "POV bits should be single bits"); + if (!pov_use[gress][pov->reg.uid]) { + pov_order[gress].emplace_back(pov->reg, gress); + pov_use[gress][pov->reg.uid] = 1; + } + } + } + for (int i = 0; i < MAX_DEPARSER_CHECKSUM_UNITS; i++) + for (auto &ent : full_checksum_unit[gress][i].entries) { + for (const auto &entry : ent.second) { + if (!entry.check()) error(entry.lineno, "Invalid checksum entry"); + } + ent.second = merge_csum_entries(ent.second, i); + } + } + for (auto &intrin : intrinsics) { + for (auto &el : intrin.vals) { + if (el.check()) phv_use[intrin.type->gress][el->reg.uid] = 1; + for (auto &pov : el.pov) { + if (pov.check()) { + phv_use[intrin.type->gress][pov->reg.uid] = 1; + if (pov->lo != pov->hi) error(pov.lineno, "POV bits should be single bits"); + if (!pov_use[intrin.type->gress][pov->reg.uid]) { + pov_order[intrin.type->gress].emplace_back(pov->reg, intrin.type->gress); + pov_use[intrin.type->gress][pov->reg.uid] = 1; + } + } + } + } + if (intrin.vals.size() > (size_t)intrin.type->max) + error(intrin.lineno, "Too many values for %s", intrin.type->name.c_str()); + } + if (phv_use[INGRESS].intersects(phv_use[EGRESS])) + error(lineno[INGRESS], "Registers used in both ingress and egress in deparser: %s", + Phv::db_regset(phv_use[INGRESS] & phv_use[EGRESS]).c_str()); + for (auto &digest : digests) { + if (digest.select.check()) { + phv_use[digest.type->gress][digest.select->reg.uid] = 1; + if (digest.select->lo > 0 && !digest.type->can_shift) + error(digest.select.lineno, "%s digest selector must be in bottom bits of phv", + digest.type->name.c_str()); + } + for (auto &pov : digest.select.pov) { + if (pov.check()) { + phv_use[digest.type->gress][pov->reg.uid] = 1; + if (pov->lo != pov->hi) error(pov.lineno, "POV bits should be single bits"); + if (!pov_use[digest.type->gress][pov->reg.uid]) { + pov_order[digest.type->gress].emplace_back(pov->reg, digest.type->gress); + pov_use[digest.type->gress][pov->reg.uid] = 1; + } + } + } + for (auto &set : digest.layout) + for (auto ® : set.second) + if (reg.check()) phv_use[digest.type->gress][reg->reg.uid] = 1; + } + SWITCH_FOREACH_REGISTER_SET(Target::register_set(), TARGET *t = nullptr; + // process(t); + process((TARGET *)nullptr);) + + if (options.match_compiler || 1) { /* FIXME -- need proper liveness analysis */ + Phv::setuse(INGRESS, phv_use[INGRESS]); + Phv::setuse(EGRESS, phv_use[EGRESS]); + } + for (gress_t gress : Range(INGRESS, EGRESS)) { + int pov_byte = 0, pov_size = 0; + for (auto &ent : pov_order[gress]) + if (pov[gress].count(&ent->reg) == 0) { + pov[gress][&ent->reg] = pov_size; + pov_size += ent->reg.size; + } + if (pov_size > 8 * Target::DEPARSER_MAX_POV_BYTES()) + error(lineno[gress], "Ran out of space in POV in deparser"); + } +} + +/* The following uses of specialized templates must be after the specialization... */ +void Deparser::output(json::map &map) { + SWITCH_FOREACH_TARGET(options.target, auto *regs = new TARGET::deparser_regs; + declare_registers(regs); write_config(*regs); + gen_learn_quanta(*regs, map["learn_quanta"]); return;) + error(__LINE__, "Unsupported target %d", options.target); +} + +/* this is a bit complicated since the output from compiler digest is as follows: + context_json: + 0: [ [ ipv4.ihl, 0, 4, 0], [ ipv4.protocol, 0, 8, 1], [ ipv4.srcAddr, 0, 32, 2], [ + ethernet.srcAddr, 0, 48, 6], [ ethernet.dstAddr, 0, 48, 12], [ ipv4.fragOffset, 0, 13, 18 ], [ + ipv4.identification, 0, 16, 20], [ routing_metadata.learn_meta_1, 0, 20, 22], [ + routing_metadata.learn_meta_4, 0, 10, 26] ] 1: [ [ ipv4.ihl, 0, 4, 0], [ ipv4.identification, 0, + 16, 1], [ ipv4.protocol, 0, 8, 3], [ ipv4.srcAddr, 0, 32, 4], [ ethernet.srcAddr, 0, 48, 8], [ + ethernet.dstAddr, 0, 48, 14], [ ipv4.fragOffset, 0, 13, 20], [ routing_metadata.learn_meta_2, + 0, 24, 22], [ routing_metadata.learn_meta_3, 0, 25, 26] ] name: [ learn_1, learn_2 ] +*/ +template +void Deparser::gen_learn_quanta(REGS ®s, json::vector &learn_quanta) { + for (auto &digest : digests) { + if (digest.type->name != "learning") continue; + BUG_CHECK(digest.context_json); + auto namevec = (*(digest.context_json))["name"]; + auto &names = *(namevec->as_vector()); + auto digentry = digest.context_json->begin(); + // Iterate on names. for each name, get the corresponding digest entry and fill in + for (auto &tname : names) { + BUG_CHECK(digentry != digest.context_json->end()); + json::map quanta; + quanta["name"] = (*tname).c_str(); + quanta["lq_cfg_type"] = digentry->first->as_number()->val; + quanta["handle"] = next_handle(); + auto *digfields = digentry->second->as_vector(); + if (digfields) { + auto &digfields_vec = *digfields; + json::vector &fields = quanta["fields"]; + for (auto &tup : digfields_vec) { + auto &one = *(tup->as_vector()); + BUG_CHECK(one.size() == 5); + json::map anon; + anon["field_name"] = (*(one[0])).clone(); + anon["start_byte"] = (*(one[1])).clone(); + anon["field_width"] = (*(one[2])).clone(); + anon["start_bit"] = (*(one[3])).clone(); + anon["phv_offset"] = (*(one[4])).clone(); + fields.push_back(std::move(anon)); + } + } + digentry++; + learn_quanta.push_back(std::move(quanta)); + } + } +} + +unsigned Deparser::FDEntry::Checksum::encode() { + SWITCH_FOREACH_TARGET(options.target, return encode();); + return -1; +} + +unsigned Deparser::FDEntry::Constant::encode() { + SWITCH_FOREACH_TARGET(options.target, return encode();); + return -1; +} + +void Deparser::gtest_clear() { + for (int i = 0; i < 2; i++) { + for (int j = 0; j < MAX_DEPARSER_CHECKSUM_UNITS; j++) checksum_entries[i][j].clear(); + dictionary[i].clear(); + pov_order[i].clear(); + pov[i].clear(); + phv_use[i].clear(); + constants[i].clear(); + } + intrinsics.clear(); + digests.clear(); +} diff --git a/backends/tofino/bf-asm/deparser.h b/backends/tofino/bf-asm/deparser.h new file mode 100644 index 00000000000..c958f3dd428 --- /dev/null +++ b/backends/tofino/bf-asm/deparser.h @@ -0,0 +1,286 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef DEPARSER_H_ +#define DEPARSER_H_ + +#include + +#include + +#include "constants.h" +#include "lib/bitops.h" +#include "lib/ordered_set.h" +#include "phv.h" +#include "sections.h" + +enum { + // limits over all targets + MAX_DEPARSER_CHECKSUM_UNITS = 8, + DEPARSER_STAGE = INT_MAX, // greater than the number of stages +}; + +/** + * \ingroup parde + */ +class Deparser : public Section { + static Deparser singleton_object; + + public: + struct Val { + /* a phv or clot reference with optional associated POV phv reference */ + Phv::Ref val; + int tag = -1; + ordered_set pov; + std::reference_wrapper lineno = val.lineno; + Val() = default; + Val(const Val &) = default; + Val(Val &&) = default; + Val &operator=(Val &&) = default; + virtual ~Val() {} + Val(gress_t gr, const value_t &v) : val(gr, DEPARSER_STAGE, v) {} + Val(gress_t gr, const value_t &v, const value_t &p) : val(gr, DEPARSER_STAGE, v) { + pov.emplace(gr, DEPARSER_STAGE, p); + } + Val(gress_t gr, int tag, const value_t &p) : tag(tag) { + pov.emplace(gr, DEPARSER_STAGE, p); + } + Val &operator=(const Val &a) { + val = a.val; + tag = a.tag; + pov = a.pov; + return *this; + } + explicit operator bool() const { return is_phv() || is_clot(); } + Phv::Slice operator*() const { return *val; } + Phv::Slice operator->() const { return *val; } + bool is_phv() const { return bool(val); } + bool is_clot() const { return tag >= 0; } + virtual bool check() const { + if (is_phv() && is_clot()) { + error(lineno, "Reference cannot be phv and clot at the same time"); + return false; + } + if (is_phv()) { + return val.check(); + } else if (is_clot()) { + if (pov.empty()) { + error(lineno, "Clot requires a pov bit"); + return false; + } + } else { + error(lineno, "Unknown val"); + return false; + } + return true; + } + }; + + struct ChecksumVal : public Val { + int mask = 0; + int swap = 0; + ChecksumVal(gress_t gr, const value_t &v, const value_t &m) : Val(gr, v) { + if ((val->lo % 8 != 0) || (val->hi % 8 != 7)) + error(lineno, "Can only do checksums on byte-aligned container slices"); + mask = ((1 << (val->hi + 1) / 8) - 1) ^ ((1 << val->lo / 8) - 1); + + if (CHECKTYPE(m, tMAP)) { + for (auto &kv : m.map) { + if (kv.key == "pov") { + if (!pov.empty()) error(kv.value.lineno, "Duplicate POV"); + pov.emplace_back(gr, DEPARSER_STAGE, kv.value); + } else if (kv.key == "swap" && CHECKTYPE(kv.value, tINT)) { + swap = kv.value.i; + } else { + error(m.lineno, "Unknown key for checksum: %s", value_desc(kv.key)); + } + } + } + } + ChecksumVal(gress_t gr, int tag, const value_t &p) : Val(gr, tag, p) {} + ChecksumVal &operator=(const ChecksumVal &a) { + Val::operator=(a); + mask = a.mask; + swap = a.swap; + return *this; + } + ChecksumVal(const ChecksumVal &a) : Val(a) { + mask = a.mask; + swap = a.swap; + }; + ChecksumVal() : Val() {} + ChecksumVal(ChecksumVal &&) = default; + ChecksumVal &operator=(ChecksumVal &&) = default; + bool check() const override { + if (is_phv()) { + if (mask == 0) error(lineno, "mask is 0 for phv checkum value?"); + if (swap < 0 || swap > 3) error(lineno, "Invalid swap for phv checksum"); + } + return Val::check(); + } + }; + + struct FullChecksumUnit { + std::map> entries; + std::map pov; + std::set checksum_unit_invert; + std::set clot_tag_invert; + std::vector clot_entries; + bool zeros_as_ones_en = false; + }; + + struct FDEntry; + std::vector checksum_entries[2][MAX_DEPARSER_CHECKSUM_UNITS]; + FullChecksumUnit full_checksum_unit[2][MAX_DEPARSER_CHECKSUM_UNITS]; + int lineno[2]; + std::vector dictionary[2]; + std::vector pov_order[2]; + ordered_map pov[2]; + bitvec phv_use[2]; + std::set constants[2]; + + struct Intrinsic { + struct Type; + Type *type; + int lineno; + std::vector vals; + Intrinsic(Type *t, int l) : type(t), lineno(l) {} + }; + std::vector intrinsics; + struct Digest { + struct Type { + target_t target; + gress_t gress; + std::string name; + int count; + bool can_shift = false; + static std::map all[TARGET_INDEX_LIMIT][2]; + + protected: + Type(target_t t, gress_t gr, const char *n, int cnt) + : target(t), gress(gr), name(n), count(cnt) { + BUG_CHECK(!all[target][gress].count(name)); + all[target][gress][name] = this; + } + ~Type() { all[target][gress].erase(name); } + + public: +#define VIRTUAL_TARGET_METHODS(TARGET) \ + virtual void setregs(Target::TARGET::deparser_regs ®s, Deparser &deparser, \ + Deparser::Digest &data) { \ + BUG_CHECK(!"target mismatch"); \ + } + FOR_ALL_REGISTER_SETS(VIRTUAL_TARGET_METHODS) +#undef VIRTUAL_TARGET_METHODS + }; + + Type *type; + int lineno; + Val select; + int shift = 0; + std::map> layout; + std::unique_ptr context_json; + Digest(Type *t, int lineno, VECTOR(pair_t) & data); + }; + std::vector digests; + Deparser(); + ~Deparser(); + void start(int lineno, VECTOR(value_t) args); + void input(VECTOR(value_t) args, value_t data); + void process(); + std::vector merge_csum_entries(const std::vector &, int); + template + void process(TARGET *); + void output(json::map &); + template + void gen_learn_quanta(REGS &, json::vector &); + template + void write_config(REGS &); + + static const bitvec &PhvUse(gress_t gr) { return singleton_object.phv_use[gr]; } + + static bool add_constant(gress_t gr, int c) { + if (!singleton_object.constants[gr].count(c)) { + singleton_object.constants[gr].insert(c); + if (int(singleton_object.constants[gr].size()) > Target::DEPARSER_CONSTANTS()) + return false; + } + return true; + } + + static int constant_idx(gress_t gr, int c) { + if (singleton_object.constants[gr].count(c)) + return std::distance(singleton_object.constants[gr].begin(), + singleton_object.constants[gr].find(c)); + return -1; + } + + // @return constant value that will be deparsed + static int get_constant(gress_t gr, int phv_idx) { + int i = 0; + for (auto constant : singleton_object.constants[gr]) { + if ((phv_idx - 224) == i) { + return constant; + } else { + i++; + } + } + return -1; + } + + // Writes POV information in json used for field dictionary logging + // and deparser resources + static void write_pov_in_json(json::map &fd, json::map &fd_entry, const Phv::Register *phv, + int bit, int offset) { + auto povName = Phv::get_pov_name(phv->uid, offset); + // Field dictionary logging + fd["POV PHV"] = phv->uid; + fd["POV Field bit"] = bit; + fd["POV Field Name"] = povName; + // Deparser resources + fd_entry["pov_bit"] = bit; + fd_entry["pov_name"] = povName; + return; + } + + // Digest Handle Setup + // ------------------------------------------------------ + // | Pipe ID | Field Type | Field List Handle | + // 31 ... 28 24 0 + // Field List Handle = 24 bits + // Field List Type = 4 bits (Field list is 0x9) + // Pipe ID = 4 bits + static unsigned unique_field_list_handle; + static unsigned next_handle() { + return unique_table_offset << PIPE_ID_SHIFT | FIELD_HANDLE_START | + unique_field_list_handle++; + } + + // gtest methods + + /// @brief Get the singleton object for use in gtest + static Deparser *gtest_get_deparser() { return &singleton_object; } + + /// @brief Clear/reset the deparser object + void gtest_clear(); + + private: + // Report deparser resources to JSON file + void report_resources_deparser_json(json::vector &fde_entries_i, json::vector &fde_entries_e); +}; + +#endif /* DEPARSER_H_ */ diff --git a/backends/tofino/bf-asm/depositfield.cpp b/backends/tofino/bf-asm/depositfield.cpp new file mode 100644 index 00000000000..15f2b991bb4 --- /dev/null +++ b/backends/tofino/bf-asm/depositfield.cpp @@ -0,0 +1,39 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "depositfield.h" + +namespace DepositField { + +RotateConstant discoverRotation(int32_t val, int containerSize, int32_t tooLarge, + int32_t tooSmall) { + int32_t containerMask = ~(UINT64_MAX << containerSize); + int32_t signBit = 1U << (containerSize - 1); + unsigned rotate = 0; + for (/*rotate*/; rotate < containerSize; ++rotate) { + if (val > tooSmall && val < tooLarge) break; + // Reverse the rotate-right to discover encoding. + int32_t rotBit = (val >> (containerSize - 1)) & 1; + val = ((val << 1) | rotBit) & containerMask; + val |= (val & signBit) ? ~containerMask : 0; + } + // If a solution has not been found, val is back to where it started. + rotate %= containerSize; + return RotateConstant{rotate, val}; +} + +} // namespace DepositField diff --git a/backends/tofino/bf-asm/depositfield.h b/backends/tofino/bf-asm/depositfield.h new file mode 100644 index 00000000000..b7519eb2fcb --- /dev/null +++ b/backends/tofino/bf-asm/depositfield.h @@ -0,0 +1,34 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef _DEPOSITFIELD_H_ +#define _DEPOSITFIELD_H_ + +#include + +namespace DepositField { + +struct RotateConstant { + unsigned rotate; + int32_t value; +}; + +RotateConstant discoverRotation(int32_t val, int containerSize, int32_t tooLarge, int32_t tooSmall); + +} // namespace DepositField + +#endif /* _DEPOSITFIELD_H_ */ diff --git a/backends/tofino/bf-asm/disasm.cpp b/backends/tofino/bf-asm/disasm.cpp new file mode 100644 index 00000000000..0a58816fc47 --- /dev/null +++ b/backends/tofino/bf-asm/disasm.cpp @@ -0,0 +1,29 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "disasm.h" + +#include + +Disasm *Disasm::create(std::string target) { +#define CREATE_TARGET(TARGET, ...) \ + if (target == Target::TARGET::name) return new Disasm::TARGET; + FOR_ALL_TARGETS(CREATE_TARGET); +#undef CREATE_TARGET + std::cerr << "Unsupported target " << target << std::endl; + return nullptr; +} diff --git a/backends/tofino/bf-asm/disasm.h b/backends/tofino/bf-asm/disasm.h new file mode 100644 index 00000000000..812ed4af0b5 --- /dev/null +++ b/backends/tofino/bf-asm/disasm.h @@ -0,0 +1,51 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef DISASM_H_ +#define DISASM_H_ + +#include "backends/tofino/bf-asm/target.h" + +class Disasm { + public: + FOR_ALL_TARGETS(DECLARE_TARGET_CLASS) + virtual ~Disasm() {} + virtual void input_binary(uint64_t addr, char tag, uint32_t *data, size_t len) = 0; + static Disasm *create(std::string target); +}; + +#define DECLARE_DISASM_TARGET(TARGET, ...) \ + class Disasm::TARGET : public Disasm { \ + public: \ + typedef ::Target::TARGET Target; \ + Target::top_level_regs regs; \ + TARGET() { declare_registers(®s); } \ + ~TARGET() { undeclare_registers(®s); } \ + TARGET(const TARGET &) = delete; \ + __VA_ARGS__ \ + }; + +FOR_ALL_TARGETS( + DECLARE_DISASM_TARGET, void input_binary(uint64_t addr, char tag, uint32_t *data, size_t len) { + if (tag == 'D') { + regs.mem_top.input_binary(addr, tag, data, len); + } else { + regs.reg_top.input_binary(addr, tag, data, len); + } + }) + +#endif /* DISASM_H_ */ diff --git a/backends/tofino/bf-asm/dynhash.cpp b/backends/tofino/bf-asm/dynhash.cpp new file mode 100644 index 00000000000..c3457c4bb58 --- /dev/null +++ b/backends/tofino/bf-asm/dynhash.cpp @@ -0,0 +1,64 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +#include "backends/tofino/bf-asm/json.h" +#include "bfas.h" +#include "sections.h" + +class DynHash : public Section { + int lineno = -1; + std::unique_ptr _dynhash = nullptr; + std::string _dynhashFileName; + + DynHash() : Section("dynhash") {} + + void input(VECTOR(value_t) args, value_t data) { + lineno = data.lineno; + if (!CHECKTYPE(data, tSTR)) return; + _dynhashFileName = data.s; + } + + void process() { + if (_dynhashFileName.empty()) return; + std::ifstream inputFile(_dynhashFileName); + if (!inputFile && _dynhashFileName[0] != '/') + inputFile.open(asmfile_dir + "/" + _dynhashFileName); + if (!inputFile) { + warning(lineno, "%s: can't read file", _dynhashFileName.c_str()); + } else { + inputFile >> _dynhash; + if (!inputFile) { + warning(lineno, "%s: not valid dynhash json representation", + _dynhashFileName.c_str()); + _dynhash.reset(new json::map()); + } + } + } + + void output(json::map &ctxtJson) { + ctxtJson["dynamic_hash_calculations"] = json::vector(); // this key required by schema + if (_dynhash) { + ctxtJson.merge(_dynhash->to()); + } + } + + static DynHash singleton_dynhash; +} DynHash::singleton_dynhash; diff --git a/backends/tofino/bf-asm/error_mode.cpp b/backends/tofino/bf-asm/error_mode.cpp new file mode 100644 index 00000000000..ca710fd98a2 --- /dev/null +++ b/backends/tofino/bf-asm/error_mode.cpp @@ -0,0 +1,202 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "error_mode.h" + +#include "backends/tofino/bf-asm/stage.h" + +DefaultErrorMode DefaultErrorMode::singleton; + +ErrorMode::mode_t ErrorMode::str2mode(const value_t &v) { + if (CHECKTYPE(v, tSTR)) { + if (v == "propagate") return PROPAGATE; + if (v == "map_to_immediate") return MAP_TO_IMMEDIATE; + if (v == "disable") return DISABLE_ALL_TABLES; + if (v == "propagate_and_map") return PROPAGATE_AND_MAP; + if (v == "propagate_and_disable") return PROPAGATE_AND_DISABLE; + if (v != "no_config") error(v.lineno, "Not a valid error mode: %s", v.s); + } + return NO_CONFIG; +} + +const char *ErrorMode::mode2str(ErrorMode::mode_t m) { + switch (m) { + case NO_CONFIG: + return "no_config"; + case PROPAGATE: + return "propagate"; + case MAP_TO_IMMEDIATE: + return "map_to_immediate"; + case DISABLE_ALL_TABLES: + return "disable"; + case PROPAGATE_AND_MAP: + return "propagate_and_map"; + case PROPAGATE_AND_DISABLE: + return "propagate_and_disable"; + default: + return ""; + } +} + +ErrorMode::type_t ErrorMode::str2type(const value_t &v) { + if (CHECKTYPE(v, tSTR)) { + if (v == "tcam_match") return TCAM_MATCH; + if (v == "tind_ecc") return TIND_ECC; + if (v == "gfm_parity") return GFM_PARITY; + if (v == "emm_ecc") return EMM_ECC; + if (v == "prev_err") return PREV_ERROR; + if (v == "actiondata") return ACTIONDATA_ERROR; + if (v == "imem_parity") return IMEM_PARITY_ERROR; + error(v.lineno, "Not a valid error type: %s", v.s); + } + return TCAM_MATCH; // avoid invalid type here, error message has been output already +} + +const char *ErrorMode::type2str(ErrorMode::type_t t) { + switch (t) { + case TCAM_MATCH: + return "tcam_match"; + case TIND_ECC: + return "tind_ecc"; + case GFM_PARITY: + return "gfm_parity"; + case EMM_ECC: + return "emm_ecc"; + case PREV_ERROR: + return "prev_err"; + case ACTIONDATA_ERROR: + return "actiondata"; + case IMEM_PARITY_ERROR: + return "imem_parity"; + default: + return ""; + } +} + +void ErrorMode::input(value_t data) { + if (!CHECKTYPE2(data, tSTR, tMAP)) return; + if (data.type == tSTR) { + mode_t m = str2mode(data); + for (int i = 0; i < NUM_TYPE_T; ++i) { + if (i == LATE_ERROR && m != NO_CONFIG) m = PROPAGATE; + mode[i] = m; + } + } else { + for (auto &kv : MapIterChecked(data.map)) { + type_t t = str2type(kv.key); + mode_t m = str2mode(kv.value); + if (t >= LATE_ERROR && m > PROPAGATE) + error(kv.value.lineno, "%s error mode can only propagate, not %s", type2str(t), + mode2str(m)); + mode[t] = m; + } + } +} + +template +void ErrorMode::write_regs(REGS ®s, const Stage *stage, gress_t gress) { + auto &merge = regs.rams.match.merge; + int tcam_err_delay = stage->tcam_delay(gress) ? 1 : 0; + int fifo_err_delay = + stage->pipelength(gress) - stage->pred_cycle(gress) - Target::MAU_ERROR_DELAY_ADJUST(); + bool map_to_immed = false; + bool propagate = false; +#define YES(X) X +#define NO(X) +#define HANDLE_ERROR_CASES(REG, HAVE_O_ERR_EN) \ + case NO_CONFIG: \ + break; \ + case PROPAGATE: \ + HAVE_O_ERR_EN(merge.REG[gress].REG##_o_err_en = 1;) \ + propagate = true; \ + break; \ + case PROPAGATE_AND_MAP: \ + HAVE_O_ERR_EN(merge.REG[gress].REG##_o_err_en = 1;) \ + propagate = true; \ + /* fall through */ \ + case MAP_TO_IMMEDIATE: \ + merge.REG[gress].REG##_idata_ovr = 1; \ + map_to_immed = true; \ + break; \ + case PROPAGATE_AND_DISABLE: \ + HAVE_O_ERR_EN(merge.REG[gress].REG##_o_err_en = 1;) \ + propagate = true; \ + /* fall through */ \ + case DISABLE_ALL_TABLES: \ + merge.REG[gress].REG##_dis_pred = 1; \ + break; \ + default: \ + BUG(); + + switch (mode[PREV_ERROR]) { HANDLE_ERROR_CASES(prev_error_ctl, NO) } + merge.prev_error_ctl[gress].prev_error_ctl_delay = tcam_err_delay; + if (propagate) { + switch (stage->stage_dep[gress]) { + case Stage::CONCURRENT: + merge.prev_error_ctl[gress].prev_error_ctl_conc = 1; + break; + case Stage::ACTION_DEP: + merge.prev_error_ctl[gress].prev_error_ctl_action = 1; + break; + case Stage::NONE: + if (stage->stageno == 0) { + // stage 0 does not have stage_dep set, but counts as if it was match + // dependent (on the parser). FIXME -- should just always set stage_dep to + // MATCH_DEP for stage 0? fall through + case Stage::MATCH_DEP: + merge.prev_error_ctl[gress].prev_error_ctl_match = 1; + break; + } + [[fallthrough]]; + default: + BUG("unexpected stage_dep: %d", stage->stage_dep[gress]); + } + } + + switch (mode[TCAM_MATCH]) { HANDLE_ERROR_CASES(tcam_match_error_ctl, YES) } + switch (mode[TIND_ECC]) { HANDLE_ERROR_CASES(tind_ecc_error_ctl, YES) } + switch (mode[GFM_PARITY]) { HANDLE_ERROR_CASES(gfm_parity_error_ctl, YES) } + merge.gfm_parity_error_ctl[gress].gfm_parity_error_ctl_delay = tcam_err_delay; + switch (mode[EMM_ECC]) { HANDLE_ERROR_CASES(emm_ecc_error_ctl, YES) } + merge.emm_ecc_error_ctl[gress].emm_ecc_error_ctl_delay = tcam_err_delay; + + if (map_to_immed) { + merge.err_idata_ovr_fifo_ctl[gress].err_idata_ovr_fifo_ctl_en = 1; + merge.err_idata_ovr_fifo_ctl[gress].err_idata_ovr_fifo_ctl_delay = fifo_err_delay - 2; + } + if (propagate) { + merge.o_error_fifo_ctl[gress].o_error_fifo_ctl_en = 1; + merge.o_error_fifo_ctl[gress].o_error_fifo_ctl_delay = fifo_err_delay; + } + + // action error sources can only propagate (too late for disable or map_to_immed + if (mode[ACTIONDATA_ERROR]) merge.actiondata_error_ctl |= 1 << gress; + if (mode[IMEM_PARITY_ERROR]) merge.imem_parity_error_ctl |= 1 << gress; + + /* TODO -- additional error cfg regs: + * rams.match.merge.err_idata_ovr_ctl[gress] + * rams.match.merge.s2p_meter_error_ctl[gress] + * rams.match.merge.s2p_stats_error_ctl[gress] + * rams.map_alu.stats_wrap[alu].stats.statistics.ctl.stats_alu_error_enable; + * rams.map_alu.meter_alu_group_error_ctl[alu] + * rams.array.row[r].actiondata_error_uram_ctl[gress] + * rams.array.row[r].emm_ecc_error_uram_ctl[gress] + * rams.array.row[r].tind_ecc_error_uram_ctl[gress] + */ +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void ErrorMode::write_regs, mau_regs &, + const Stage *, gress_t); diff --git a/backends/tofino/bf-asm/error_mode.h b/backends/tofino/bf-asm/error_mode.h new file mode 100644 index 00000000000..b0d5cb75e0b --- /dev/null +++ b/backends/tofino/bf-asm/error_mode.h @@ -0,0 +1,73 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_ERROR_MODE_H_ +#define BACKENDS_TOFINO_BF_ASM_ERROR_MODE_H_ + +#include "sections.h" + +class Stage; + +class ErrorMode { + public: + typedef enum { + NO_CONFIG = 0, + PROPAGATE, + MAP_TO_IMMEDIATE, + DISABLE_ALL_TABLES, + PROPAGATE_AND_MAP, + PROPAGATE_AND_DISABLE + } mode_t; + typedef enum { + TCAM_MATCH, + TIND_ECC, + GFM_PARITY, + EMM_ECC, + PREV_ERROR, + ACTIONDATA_ERROR, + IMEM_PARITY_ERROR, + NUM_TYPE_T, + LATE_ERROR = ACTIONDATA_ERROR, // this (and after) is limited + } type_t; + + mode_t mode[NUM_TYPE_T] = {NO_CONFIG}; + mode_t &operator[](type_t t) { return mode[t]; } + static mode_t str2mode(const value_t &); + static const char *mode2str(mode_t m); + static type_t str2type(const value_t &); + static const char *type2str(type_t t); + + void input(value_t data); + template + void write_regs(REGS &, const Stage *, gress_t); +}; + +class DefaultErrorMode : public Section, public ErrorMode { + DefaultErrorMode() : Section("error_mode") { + // This code sets the default error mode when the assembler is used with an older + // compiler. Current compiler should always set or override this in the .bfa file + for (auto &m : mode) m = PROPAGATE_AND_DISABLE; + } + static DefaultErrorMode singleton; + + public: + void input(VECTOR(value_t) args, value_t data) override { ErrorMode::input(data); } + void output(json::map &) override {} + static ErrorMode get() { return singleton; } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_ERROR_MODE_H_ */ diff --git a/backends/tofino/bf-asm/escape.h b/backends/tofino/bf-asm/escape.h new file mode 100644 index 00000000000..22cd45f6ac1 --- /dev/null +++ b/backends/tofino/bf-asm/escape.h @@ -0,0 +1,56 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_ESCAPE_H_ +#define BACKENDS_TOFINO_BF_ASM_ESCAPE_H_ + +#include +#include + +#include "lib/hex.h" + +class escape { + std::string str; + + public: + explicit escape(const std::string &s) : str(s) {} + friend std::ostream &operator<<(std::ostream &os, escape e); +}; + +inline std::ostream &operator<<(std::ostream &os, escape e) { + for (char ch : e.str) { + switch (ch) { + case '\n': + os << "\\n"; + break; + case '\t': + os << "\\t"; + break; + case '\\': + os << "\\\\"; + break; + default: + if (ch < 32 || ch >= 127) + os << "\\x" << hex(ch & 0xff, 2, '0'); + else + os << ch; + } + } + return os; +} + +#endif /* BACKENDS_TOFINO_BF_ASM_ESCAPE_H_ */ diff --git a/backends/tofino/bf-asm/exact_match.cpp b/backends/tofino/bf-asm/exact_match.cpp new file mode 100644 index 00000000000..d715a740b2f --- /dev/null +++ b/backends/tofino/bf-asm/exact_match.cpp @@ -0,0 +1,528 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "tofino/exact_match.h" + +#include "action_bus.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "hashexpr.h" +#include "input_xbar.h" +#include "instruction.h" +#include "lib/algorithm.h" +#include "lib/hex.h" +#include "misc.h" + +void ExactMatchTable::setup(VECTOR(pair_t) & data) { + common_init_setup(data, false, P4Table::MatchEntry); + for (auto &kv : MapIterChecked(data, {"meter", "stats", "stateful"})) { + if (common_setup(kv, data, P4Table::MatchEntry)) { + // Dynamic key masks are only on exact match tables + } else if (kv.key == "dynamic_key_masks") { + if (CHECKTYPE(kv.value, tSTR)) + dynamic_key_masks = (strncmp(kv.value.s, "true", 4) == 0); + } else if (kv.key == "stash") { + CHECKTYPE(kv.value, tMAP); + for (auto &m : kv.value.map) { + if (m.key == "row") { + if (CHECKTYPE(m.value, tVEC)) { + auto rows = m.value.vec; + for (value_t &r : rows) { + if (CHECKTYPE(r, tINT)) stash_rows.push_back(r.i); + } + } + } + if (m.key == "col") { + if (CHECKTYPE(m.value, tVEC)) { + auto cols = m.value.vec; + for (value_t &c : cols) { + if (CHECKTYPE(c, tINT)) stash_cols.push_back(c.i); + } + } + } + if (m.key == "unit") { + if (CHECKTYPE(m.value, tVEC)) { + auto units = m.value.vec; + for (value_t &u : units) { + if (CHECKTYPE(u, tINT)) stash_units.push_back(u.i); + } + } + } + } + if (stash_rows.size() == 0) { + error(kv.value.lineno, "No 'row' attribute for stash info in exact match table %s", + name()); + return; + } + if (stash_cols.size() == 0) { + error(kv.value.lineno, "No 'col' attribute for stash info in exact match table %s", + name()); + return; + } + if (stash_units.size() == 0) { + error(kv.value.lineno, "No 'unit' attribute for stash info in exact match table %s", + name()); + return; + } + if (stash_units.size() != stash_rows.size()) { + error(kv.value.lineno, + "Stash units not specified correctly for each row entry " + "in exact match table %s", + name()); + return; + } + } else if (kv.key == "search_bus" || kv.key == "result_bus") { + // already dealt with in Table::setup_layout via common_init_setup + } else { + common_sram_setup(kv, data); + } + } + common_sram_checks(); +} + +void ExactMatchTable::pass1() { + LOG1("### Exact match table " << name() << " pass1 " << loc()); + SRamMatchTable::pass1(); + // Check if stashes are allocated (only for exact match tables). Note + // stashes are disabled on JBAY + if (stash_rows.size() == 0 && options.target == TOFINO && layout_size() > 0) + error(lineno, "No stashes allocated for exact match table %s in stage %d", name(), + stage->stageno); +} + +/** + * Any bits that are not matched directly against, but appear in the key of the p4 table, + * are ghost bits. The rules for ghost bits on exact match tables are: + * + * 1. Any field that does not appear in the match key must appear in the hash function. This + * is considered a ghost bit + * 2. A hash column can have at most one ghost bit, in order to maintain the linear + * independence of the impact of each ghost bit. + * + * The following function verifies these two properties, and saves them in a map to be output + * in the gen_ghost_bits function call + */ +void ExactMatchTable::determine_ghost_bits() { + std::set> ghost_bits; + // Determine ghost bits by determine what is not in the match + for (auto &p4_param : p4_params_list) { + for (int bit = p4_param.start_bit; bit < p4_param.start_bit + p4_param.bit_width; bit++) { + if (!p4_param.mask.empty() && !p4_param.mask[bit]) continue; // Skip non-masked bits. + bool found = false; + for (auto ms : match) { + std::string field_name = ms->name(); + int field_bit_lo = remove_name_tail_range(field_name) + ms->fieldlobit(); + int field_bit_hi = field_bit_lo + ms->size() - 1; + if (field_name == p4_param.name && field_bit_lo <= bit && field_bit_hi >= bit) { + found = true; + break; + } + } + if (found) continue; + ghost_bits.emplace(p4_param.name, bit); + } + } + + BUG_CHECK(!input_xbar.empty(), "%s does not have an input xbar", name()); + for (const auto &ixb : input_xbar) { + int way_index = 0; + for (auto way : ways) { + bitvec hash_tables; + if (auto *hash_group = ixb->get_hash_group(way.group_xme)) { + hash_tables = bitvec(hash_group->tables); + } else { + for (auto &ht : ixb->get_hash_tables()) { + BUG_CHECK(ht.first.type == InputXbar::HashTable::EXACT); + hash_tables[ht.first.index] = 1; + } + } + + // key is the field name/field bit that is the ghost bit + // value is the bits that the ghost bit appears in within this way + std::map, bitvec> ghost_bit_impact; + + // Check a phv ref against the ghost bits for sanity + auto check_ref = [this, way_index, &ghost_bits, &ghost_bit_impact, &ixb](Phv::Ref &ref, + int hash_bit) { + std::string field_name = ref.name(); + int field_bit = remove_name_tail_range(field_name) + ref.fieldlobit(); + for (int i = 0; i < ref.size(); ++i) { + auto key = std::make_pair(field_name, field_bit + i); + auto ghost_bit_it = ghost_bits.find(key); + if (ghost_bit_it == ghost_bits.end()) continue; + + // This is a check to make sure that the ghost bit appears only once + // in the hash column, as an even number of appearances would + // xor each other out, and cancel the hash out. This check + // should be done on all hash bits + if (ghost_bit_impact[key].getbit(hash_bit)) { + error(ixb->lineno, + "Ghost bit %s:%d appears multiple times " + "in the same hash col %d", + key.first.c_str(), key.second, way_index); + return; + } + ghost_bit_impact[key].setbit(hash_bit); + } + }; + + // Calculate the ghost bit per hash way + for (unsigned hash_table_id : hash_tables) { + auto &hash_table = ixb->get_hash_table(hash_table_id); + for (auto hash_bit : way.select_bits()) { + if (hash_table.count(hash_bit) == 0) continue; + const HashCol &hash_col = hash_table.at(hash_bit); + if (hash_col.fn) { + for (auto &ref : hash_col.fn->get_sources(hash_col.bit)) + check_ref(ref, hash_bit); + } else { + for (const auto &input_bit : hash_col.data) + if (auto ref = ixb->get_hashtable_bit(hash_table_id, input_bit)) + check_ref(ref, hash_bit); + } + } + } + + // Verify that each ghost bit appears in the hash function + for (auto gb : ghost_bits) { + if (ghost_bit_impact.find(gb) == ghost_bit_impact.end()) { + error(ixb->lineno, + "Ghost bit %s:%d does not appear on the hash function " + "for way %d", + gb.first.c_str(), gb.second, way_index); + return; + } + } + + // Verify that the ghost bits are linearly independent, that only one ghost bit + // exists per column + bitvec total_use; + for (auto gbi : ghost_bit_impact) { + if (!(total_use & gbi.second).empty()) + error(ixb->lineno, "The ghost bits are not linear independent on way %d", + way_index); + total_use |= gbi.second; + } + + auto &ghost_bit_position = ghost_bit_positions[way.group_xme]; + for (auto gbi : ghost_bit_impact) { + ghost_bit_position[gbi.first] |= gbi.second; + } + way_index++; + } + } +} + +void ExactMatchTable::pass2() { + LOG1("### Exact match table " << name() << " pass2 " << loc()); + // FIXME -- does some of this common stuff belong in SRamMatch::pass2 + if (logical_id < 0) choose_logical_id(); + for (auto &ixb : input_xbar) ixb->pass2(); + setup_word_ixbar_group(); + if (actions) actions->pass2(this); + if (action_bus) action_bus->pass2(this); + if (gateway) gateway->pass2(); + if (idletime) idletime->pass2(); + if (format) format->pass2(this); + unsigned usable = -1; + for (auto &ixb : input_xbar) usable &= ixb->exact_physical_ids(); + allocate_physical_ids(usable); + determine_ghost_bits(); + // Derive a stash format from current table format with a single entry (we + // use group 0 entry) and all fields except 'version' and 'action' (match + // overhead). The version bits are set by the driver. + if (format) { + stash_format.reset(new Format(this)); + stash_format->size = MEM_WORD_WIDTH; + stash_format->log2size = ceil_log2(MEM_WORD_WIDTH); + auto group = 0; + for (auto f = format->begin(group); f != format->end(group); f++) { + if (f->first == "action" || f->first == "version") continue; + stash_format->add_field(f->second, f->first, group); + } + } + for (auto &hd : hash_dist) hd.pass2(this); + if (format) verify_format_pass2(); +} + +void ExactMatchTable::pass3() { + LOG1("### Exact match table " << name() << " pass3 " << loc()); + SRamMatchTable::pass3(); + if (action_bus) action_bus->pass3(this); +} + +// Check way_map for each stash row/col pair to determine which word the ram is +// assigned to and verify if it is the match overhead word. Allocate stash +// overhead row for each stash row/col pair. +void ExactMatchTable::generate_stash_overhead_rows() { + auto mem_units_per_word = format ? format->get_mem_units_per_table_word() : 1; + for (int i = 0; i < stash_rows.size(); i++) { + auto idx = (i + mem_units_per_word) / mem_units_per_word; + if (stash_overhead_rows.size() >= idx) continue; + auto stash_row = stash_rows[i]; + auto stash_col = stash_cols[i]; + for (auto &row : layout) { + if (row.row == stash_row) { + Ram stash_ram(stash_row, stash_col); + if (way_map.count(stash_ram) > 0) { + auto way_word = way_map[stash_ram].word; + BUG_CHECK(format); + if (way_word == format->overhead_word) { + stash_overhead_rows.push_back(stash_row); + break; + } + } + } + } + } +} + +/* FIXME -- should have ExactMatchTable::write_merge_regs write some of the merge stuff + * from write_regs? */ +template +void ExactMatchTable::write_regs_vt(REGS ®s) { + LOG1("### Exact match table " << name() << " write_regs " << loc()); + SRamMatchTable::write_regs(regs); + + for (auto &row : layout) { + auto &rams_row = regs.rams.array.row[row.row]; + for (auto &ram : row.memunits) { + auto &way = way_map[ram]; + BUG_CHECK(ram.stage == INT_MIN && ram.row == row.row, "bogus %s in row %d", ram.desc(), + row.row); + auto &ram_cfg = rams_row.ram[ram.col]; + ram_cfg.match_nibble_s0q1_enable = version_nibble_mask.getrange(way.word * 32U, 32); + ram_cfg.match_nibble_s1q0_enable = UINT64_C(0xffffffff); + } + } + + // Write stash regs if stashes are allocated + if (stash_rows.size() == 0) return; + auto &merge = regs.rams.match.merge; + auto &stash_hitmap_output_map = merge.stash_hitmap_output_map; + generate_stash_overhead_rows(); + auto mem_units_per_word = format ? format->get_mem_units_per_table_word() : 1; + for (int i = 0; i < stash_rows.size(); i++) { + auto stash_row = stash_rows[i]; + auto stash_col = stash_cols[i]; + auto stash_unit_id = stash_units[i]; + MemUnit stash_memunit(stash_row, stash_col); + auto idx = i / mem_units_per_word; + auto physical_row_with_overhead = + stash_overhead_rows.size() > idx ? stash_overhead_rows[idx] : ways[0].rams[0].row; + LOG5("Setting cfg for stash Row: " << stash_row << ", stash Unit: " << stash_unit_id + << " with overhead word row: " + << physical_row_with_overhead); + auto &stash_map_entry = stash_hitmap_output_map[stash_unit_id][stash_row]; + stash_map_entry.enabled_3bit_muxctl_select = physical_row_with_overhead; + stash_map_entry.enabled_3bit_muxctl_enable = 1; + auto &stash_reg = regs.rams.array.row[stash_row].stash; + auto &input_data_ctl = stash_reg.stash_match_input_data_ctl[stash_unit_id]; + input_data_ctl.stash_hash_adr_select = ways[0].index / EXACT_HASH_ADR_BITS; + input_data_ctl.stash_enable = 1; + input_data_ctl.stash_logical_table = logical_id; + input_data_ctl.stash_thread = (gress == EGRESS); + auto &stash_row_nxtable_bus_drive = + merge.stash_row_nxtable_bus_drive[stash_unit_id][stash_row]; + for (auto &row : layout) { + if (row.row != stash_row) continue; + if (contains(row.memunits, stash_memunit)) { + // Assumption is that the search or match and result buses are + // always generated on the same index + auto &stash_match_mask = stash_reg.stash_match_mask[stash_unit_id]; + if (stash_row == physical_row_with_overhead) { + // FIXME -- the overhead row should always have a result bus allocated, but + // sometimes it does not. This hack has been here for awhile and is needed + // for p4_16/compile_only/meters_0.p4 at least, but seems wrong and unsafe + int result_bus = row.bus.count(Layout::RESULT_BUS) + ? row.bus.at(Layout::RESULT_BUS) + : row.bus.at(Layout::SEARCH_BUS); + stash_row_nxtable_bus_drive = 1 << result_bus; + stash_reg.stash_match_result_bus_select[stash_unit_id] = 1 << result_bus; + + // Set default next table only when there is a single next table + auto &nxt_table_lut = merge.stash_next_table_lut[stash_unit_id][stash_row]; + std::set nxt_tables; + for (auto &n : hit_next) { + for (auto &n1 : n) { + nxt_tables.emplace(n1); + } + } + if (nxt_tables.size() == 0) { + nxt_table_lut = Stage::end_of_pipe(); + } else if (nxt_tables.size() == 1) { + nxt_table_lut = miss_next.next_table_id(); + } else { + nxt_table_lut = 0; + } + + // 2 entries per stash unit + nxt_table_lut |= (nxt_table_lut << 8); + + bitvec match_mask; + match_mask.setrange(0, 128); + // Since stash format can only have one entry (and no version bits) we + // generate the stash mask on exact match format with group 0 + if (Format::Field *match = format->field("match", 0)) { + for (auto &piece : match->bits) + match_mask.clrrange(piece.lo, piece.hi + 1 - piece.lo); + } + for (int word = 0; word < 4; word++) { + stash_match_mask[word] = match_mask.getrange(word * 32, 32); + } + } else { + stash_row_nxtable_bus_drive = 0; + stash_reg.stash_match_result_bus_select[stash_unit_id] = 0; + for (int word = 0; word < 4; word++) { + stash_match_mask[word] = 0; + } + } + input_data_ctl.stash_match_data_select = row.bus.at(Layout::SEARCH_BUS); + input_data_ctl.stash_hashbank_select = row.bus.at(Layout::SEARCH_BUS); + break; + } + } + } +} + +void ExactMatchTable::gen_tbl_cfg(json::vector &out) const { + LOG3("### Exact match table " << name() << " gen_tbl_cfg " << loc()); + unsigned size = get_number_entries(); + json::map &tbl = *base_tbl_cfg(out, "match", size); + add_all_reference_tables(tbl); + json::map &stage_tbl = *add_common_sram_tbl_cfgs(tbl, "exact", "hash_match"); + add_pack_format(stage_tbl, format.get(), true, false); + stage_tbl["memory_resource_allocation"] = nullptr; + if (stash_rows.size() > 0) { + json::map &stash_allocation = stage_tbl["stash_allocation"] = json::map(); + // Add 'action' field if present + if (format && stash_format) { + int group = 0; + for (auto f = format->begin(group); f != format->end(group); f++) { + if (f->first == "action") stash_format->add_field(f->second, f->first, group); + } + } + add_pack_format(stash_allocation, stash_format.get(), false, true); + auto mem_units_per_word = format ? format->get_mem_units_per_table_word() : 1; + auto &stash_pack_formats = stash_allocation["pack_format"]->to(); + for (auto &stash_pack_format : stash_pack_formats) { + json::map &pack = stash_pack_format->to(); + pack["number_memory_units_per_table_word"] = mem_units_per_word; + pack["table_word_width"] = MEM_WORD_WIDTH * mem_units_per_word; + } + auto num_stash_entries = stash_rows.size() / mem_units_per_word * 2; + stash_allocation["num_stash_entries"] = num_stash_entries; + json::vector &stash_entries = stash_allocation["stash_entries"] = json::vector(); + for (int k = 0; k < stash_rows.size() / mem_units_per_word; k++) { + for (int i = 0; i < 2; i++) { + json::vector stash_entry; + for (int j = 0; j < mem_units_per_word; j++) { + auto stash_row = stash_rows[k * mem_units_per_word + j]; + auto stash_col = stash_cols[k * mem_units_per_word + j]; + auto stash_unit = stash_units[k * mem_units_per_word + j]; + MemUnit stash_memunit(stash_row, stash_col); + json::map stash_entry_per_unit; + stash_entry_per_unit["stash_entry_id"] = (4 * stash_row) + (2 * stash_unit) + i; + for (auto &row : layout) { + if (row.row != stash_row) continue; + if (contains(row.memunits, stash_memunit)) { + int bus = row.bus.at(Layout::SEARCH_BUS); + stash_entry_per_unit["stash_match_data_select"] = bus; + stash_entry_per_unit["stash_hashbank_select"] = bus; + stash_entry_per_unit["hash_function_id"] = k; + break; + } + } + stash_entry.push_back(std::move(stash_entry_per_unit)); + } + stash_entries.push_back(std::move(stash_entry)); + } + } + } else { + stage_tbl["stash_allocation"] = nullptr; + } + json::map &match_attributes = tbl["match_attributes"]; + match_attributes["uses_dynamic_key_masks"] = dynamic_key_masks; + if (ways.size() > 0) { + json::vector &way_stage_tables = stage_tbl["ways"] = json::vector(); + unsigned way_number = 0; + for (auto &way : ways) { + json::map way_tbl; + way_tbl["stage_number"] = stage->stageno; + way_tbl["way_number"] = way_number++; + way_tbl["stage_table_type"] = "hash_way"; + auto fmt_width = get_format_width(); + BUG_CHECK(fmt_width); + unsigned ram_depth = way.rams.at(0).isLamb() ? LAMB_DEPTH : SRAM_DEPTH; + way_tbl["size"] = way.rams.size() / fmt_width * format->groups() * ram_depth; + add_pack_format(way_tbl, format.get(), false); + way_tbl["memory_resource_allocation"] = gen_memory_resource_allocation_tbl_cfg(way); + way_stage_tables.push_back(std::move(way_tbl)); + } + } + if (size == 0) { + if (!match_attributes.count("match_type")) + match_attributes["match_type"] = "match_with_no_key"; + if (!stage_tbl["stage_table_type"]) stage_tbl["stage_table_type"] = "match_with_no_key"; + stage_tbl["size"] = 1; + } + if (stage_tbl["stage_table_type"] == "hash_match") { + // hash_match table schema requires 'hash_functions' and 'ways' so add (empty) if + // they are not present + if (!stage_tbl["hash_functions"]) stage_tbl["hash_functions"] = json::vector(); + if (!stage_tbl["ways"]) stage_tbl["ways"] = json::vector(); + } +} + +/** + * The ghost_bits information is required by the driver to correctly run an entry read from + * hardware. Ghost bits are bits that do not appear in the key, and must be calculated + * from the hash matrix. + * + * The ghost_bits information is broken into two vectors: + * + * - ghost_bit_info: a vector of information on ghost bits, maps of 2 fields + * 1. field_name - name of the field being ghosted + * 2. bit_in_match_spec - awfully named for the field bit (not the bit in the entire key) + * + * - ghost_bit_to_hash_bit: a vector per each entry in the ghost_bit_info describing which + * hash bits coordinate to which ghost bits + */ +void ExactMatchTable::gen_ghost_bits(int hash_function_number, + json::vector &ghost_bits_to_hash_bits, + json::vector &ghost_bits_info) const { + if (ghost_bit_positions.count(hash_function_number) == 0) return; + auto ghost_bit_pos = ghost_bit_positions.at(hash_function_number); + + for (auto kv : ghost_bit_pos) { + json::map ghost_bit_info; + auto field_name = kv.first.first; + auto global_name = field_name; + auto p4_param = find_p4_param(field_name); + if (p4_param && !p4_param->key_name.empty()) field_name = p4_param->key_name; + ghost_bit_info["field_name"] = field_name; + ghost_bit_info["global_name"] = global_name; + ghost_bit_info["bit_in_match_spec"] = kv.first.second; + ghost_bits_info.push_back(std::move(ghost_bit_info)); + + json::vector ghost_bit_to_hash_bits; + for (auto hash_bit : kv.second) ghost_bit_to_hash_bits.push_back(hash_bit); + ghost_bits_to_hash_bits.push_back(std::move(ghost_bit_to_hash_bits)); + } +} + +DEFINE_TABLE_TYPE_WITH_SPECIALIZATION(ExactMatchTable, TARGET_CLASS) diff --git a/backends/tofino/bf-asm/exename.cpp b/backends/tofino/bf-asm/exename.cpp new file mode 100644 index 00000000000..5fe2e6a162c --- /dev/null +++ b/backends/tofino/bf-asm/exename.cpp @@ -0,0 +1,69 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "exename.h" + +#include +#include +#include +#include + +#include +#include + +#include "bfas.h" + +template +static void convertToAbsPath(const char *const relPath, char (&output)[N]) { + output[0] = '\0'; // Default to the empty string, indicating failure. + + char cwd[PATH_MAX]; + if (!getcwd(cwd, sizeof(cwd))) return; + const size_t cwdLen = strlen(cwd); + if (cwdLen == 0) return; + const char *separator = cwd[cwdLen - 1] == '/' ? "" : "/"; + + // Construct an absolute path. We're assuming that @relPath is relative to + // the current working directory. + int n = snprintf(output, N, "%s%s%s", cwd, separator, relPath); + BUG_CHECK(n >= 0, "Pathname too long"); +} + +const char *exename(const char *argv0) { + static char buffer[PATH_MAX]; + if (buffer[0]) return buffer; // done already + int len; + /* find the path of the executable. We use a number of techniques that may fail + * or work on different systems, and take the first working one we find. Fall + * back to not overriding the compiled-in installation path */ + if ((len = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1)) > 0 || + (len = readlink("/proc/curproc/exe", buffer, sizeof(buffer) - 1)) > 0 || + (len = readlink("/proc/curproc/file", buffer, sizeof(buffer) - 1)) > 0 || + (len = readlink("/proc/self/path/a.out", buffer, sizeof(buffer) - 1)) > 0) { + buffer[len] = 0; + } else if (argv0 && argv0[0] == '/') { + snprintf(buffer, sizeof(buffer), "%s", argv0); + } else if (argv0 && strchr(argv0, '/')) { + convertToAbsPath(argv0, buffer); + } else if (getenv("_")) { + strncpy(buffer, getenv("_"), sizeof(buffer)); + buffer[sizeof(buffer) - 1] = 0; + } else { + buffer[0] = 0; + } + return buffer; +} diff --git a/backends/tofino/bf-asm/exename.h b/backends/tofino/bf-asm/exename.h new file mode 100644 index 00000000000..4e2523b5daf --- /dev/null +++ b/backends/tofino/bf-asm/exename.h @@ -0,0 +1,25 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_EXENAME_H_ +#define BACKENDS_TOFINO_BF_ASM_EXENAME_H_ + +/** Attempt to determine the executable name and return a static path to it. Will use + * argv0 if provided and nothing better can be found */ +const char *exename(const char *argv0 = nullptr); + +#endif /* BACKENDS_TOFINO_BF_ASM_EXENAME_H_ */ diff --git a/backends/tofino/bf-asm/fdstream.cpp b/backends/tofino/bf-asm/fdstream.cpp new file mode 100644 index 00000000000..0e49aee298b --- /dev/null +++ b/backends/tofino/bf-asm/fdstream.cpp @@ -0,0 +1,85 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "fdstream.h" + +#include + +#include + +#define BUFSIZE 1024 + +fdstream::buffer_t::int_type fdstream::buffer_t::underflow() { + if (!gptr()) { + char_type *n = new char_type[BUFSIZE]; + setg(n, n, n); + } else if (gptr() != egptr()) { + size_t len = egptr() - gptr(); + if (len > 0) std::memmove(eback(), gptr(), len * sizeof(char_type)); + setg(eback(), eback(), eback() + len); + } else { + setg(eback(), eback(), eback()); + } + int rv = ::read(fd, egptr(), eback() + BUFSIZE - egptr()); + if (rv > 0) + setg(eback(), eback(), egptr() + rv); + else if (gptr() == egptr()) + return traits_type::eof(); + return traits_type::to_int_type(*gptr()); +} + +fdstream::buffer_t::int_type fdstream::buffer_t::overflow(fdstream::buffer_t::int_type c) { + if (!pptr()) { + char_type *n = new char_type[BUFSIZE]; + setp(n, n + BUFSIZE); + } + if (pptr() != pbase()) { + int rv = ::write(fd, pbase(), pptr() - pbase()); + if (rv <= 0) return traits_type::eof(); + if (pbase() + rv == pptr()) + setp(pbase(), epptr()); + else { + size_t len = pptr() - pbase() + rv; + std::memmove(pbase(), pbase() + rv, len); + setp(pbase(), epptr()); + pbump(len); + } + } + if (!traits_type::eq_int_type(c, traits_type::eof())) { + *pptr() = c; + pbump(1); + return c; + } else { + return traits_type::not_eof(c); + } +} + +int fdstream::buffer_t::sync() { + char *p = pbase(), *e = pptr(); + while (p != e) { + int rv = ::write(fd, p, e - p); + if (rv <= 0) { + if (p != pbase()) std::memmove(pbase(), p, e - p); + setp(pbase(), epptr()); + pbump(e - p); + return -1; + } + p += rv; + } + setp(pbase(), epptr()); + return 0; +} diff --git a/backends/tofino/bf-asm/fdstream.h b/backends/tofino/bf-asm/fdstream.h new file mode 100644 index 00000000000..8cd4fb96775 --- /dev/null +++ b/backends/tofino/bf-asm/fdstream.h @@ -0,0 +1,61 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_FDSTREAM_H_ +#define BACKENDS_TOFINO_BF_ASM_FDSTREAM_H_ + +#include +#include + +#include +#include +#include + +class fdstream : public std::iostream { + struct buffer_t : public std::basic_streambuf { + int fd; + + public: + explicit buffer_t(int _fd) : fd(_fd) {} + ~buffer_t() { + delete[] eback(); + delete[] pbase(); + } + int sync(); + int_type underflow(); + int_type overflow(int_type c = traits_type::eof()); + void reset() { + setg(eback(), eback(), eback()); + setp(pbase(), epptr()); + } + } buffer; + std::function closefn; + + public: + explicit fdstream(int fd = -1) : std::iostream(&buffer), buffer(fd) { init(&buffer); } + ~fdstream() { + if (closefn) closefn(); + } + void connect(int fd) { + flush(); + buffer.reset(); + buffer.fd = fd; + } + void setclose(std::function fn) { closefn = fn; } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_FDSTREAM_H_ */ diff --git a/backends/tofino/bf-asm/flexible_headers.cpp b/backends/tofino/bf-asm/flexible_headers.cpp new file mode 100644 index 00000000000..df3489a6a51 --- /dev/null +++ b/backends/tofino/bf-asm/flexible_headers.cpp @@ -0,0 +1,48 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "backends/tofino/bf-asm/sections.h" + +namespace BFASM { + +// Singleton class representing the assembler flexible_headers +class FlexibleHeaders : public Section { + private: + std::unique_ptr flexHeaders; + + FlexibleHeaders() : Section("flexible_headers") {} + + void input(VECTOR(value_t) args, value_t data) { + if (!CHECKTYPE(data, tVEC)) return; + flexHeaders = std::move(toJson(data.vec)); + } + + void output(json::map &ctxtJson) { + if (flexHeaders != nullptr) ctxtJson["flexible_headers"] = std::move(flexHeaders); + } + + public: + // disable any other constructors + FlexibleHeaders(FlexibleHeaders const &) = delete; + void operator=(FlexibleHeaders const &) = delete; + + static FlexibleHeaders singleton_flexHeaders; +} FlexibleHeaders::singleton_flexHeaders; + +}; // namespace BFASM diff --git a/backends/tofino/bf-asm/gateway.cpp b/backends/tofino/bf-asm/gateway.cpp new file mode 100644 index 00000000000..48d8b74b20f --- /dev/null +++ b/backends/tofino/bf-asm/gateway.cpp @@ -0,0 +1,918 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/hashexpr.h" +#include "backends/tofino/bf-asm/input_xbar.h" +#include "backends/tofino/bf-asm/instruction.h" +#include "backends/tofino/bf-asm/misc.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "lib/algorithm.h" +#include "lib/hex.h" + +// template specialization declarations +#include "backends/tofino/bf-asm/jbay/gateway.h" +#include "backends/tofino/bf-asm/tofino/gateway.h" + +static struct { + unsigned units, bits, half_shift, mask, half_mask; +} range_match_info[] = {{0, 0, 0, 0, 0}, {6, 4, 2, 0xf, 0x3}, {3, 8, 8, 0xffff, 0xff}}; + +// Dummy value used to start gateway handles. For future use by driver, +// Incremented from inside the gateway table +static uint gateway_handle = 0x70000000; + +GatewayTable::Match::Match(value_t *v, value_t &data, range_match_t range_match) { + if (range_match) { + for (unsigned i = 0; i < range_match_info[range_match].units; i++) + range[i] = range_match_info[range_match].mask; + } + if (v) { + lineno = v->lineno; + if (v->type == tVEC) { + int last = v->vec.size - 1; + if (last > static_cast(range_match_info[range_match].units)) + error(lineno, "Too many set values for range match"); + for (int i = 0; i < last; i++) + if (CHECKTYPE((*v)[last - i - 1], tINT)) { + if ((unsigned)(*v)[last - i - 1].i > range_match_info[range_match].mask) + error(lineno, "range match set too large"); + range[i] = (*v)[last - i - 1].i; + } + v = &(*v)[last]; + } + if (v->type == tINT) { + val.word1 = bitvec(v->i); + val.word0.setrange(0, 64); + val.word0 -= val.word1; + } else if (v->type == tBIGINT) { + val.word1.setraw(v->bigi.data, v->bigi.size); + val.word0.setrange(0, v->bigi.size * 64); + val.word0 -= val.word1; + } else if (v->type == tMATCH) { + val = v->m; + } else if (v->type == tBIGMATCH) { + val = v->bigm; + } + } + if (data == "run_table") { + run_table = true; + } else if (data.type == tSTR || data.type == tVEC) { + next = data; + } else if (data.type == tMAP) { + for (auto &kv : MapIterChecked(data.map)) { + if (kv.key == "next") { + next = kv.value; + } else if (kv.key == "run_table") { + if (kv.value == "true") + run_table = true; + else if (kv.value == "false") + run_table = false; + else + error(kv.value.lineno, "Syntax error, expecting boolean"); + } else if (kv.key == "action") { + if (CHECKTYPE(kv.value, tSTR)) action = kv.value.s; + } else { + error(kv.key.lineno, "Syntax error, expecting gateway action description"); + } + } + if (run_table && next.set()) + error(data.lineno, "Can't run table and override next in the same gateway row"); + } else { + error(data.lineno, "Syntax error, expecting gateway action description"); + } +} + +void GatewayTable::setup(VECTOR(pair_t) & data) { + setup_logical_id(); + if (auto *v = get(data, "range")) { + if (CHECKTYPE(*v, tINT)) { + if (v->i == 2) range_match = DC_2BIT; + if (v->i == 4) + range_match = DC_4BIT; + else + error(v->lineno, "Unknown range match size %" PRId64 " bits", v->i); + } + } + for (auto &kv : MapIterChecked(data, true)) { + if (kv.key == "name") { + if (CHECKTYPE(kv.value, tSTR)) gateway_name = kv.value.s; + } else if (kv.key == "row") { + if (!CHECKTYPE(kv.value, tINT)) continue; + if (kv.value.i < 0 || kv.value.i > Target::GATEWAY_ROWS()) + error(kv.value.lineno, "row %" PRId64 " out of range", kv.value.i); + if (layout.empty()) layout.resize(1); + layout[0].row = kv.value.i; + layout[0].lineno = kv.value.lineno; + } else if (kv.key == "bus") { + if (!CHECKTYPE(kv.value, tINT)) continue; + if (kv.value.i < 0 || kv.value.i > 1) + error(kv.value.lineno, "bus %" PRId64 " out of range", kv.value.i); + if (layout.empty()) layout.resize(1); + layout[0].bus[Layout::SEARCH_BUS] = kv.value.i; + if (layout[0].lineno < 0) layout[0].lineno = kv.value.lineno; + } else if (kv.key == "payload_row") { + if (!CHECKTYPE(kv.value, tINT)) continue; + if (kv.value.i < 0 || kv.value.i > 7) + error(kv.value.lineno, "row %" PRId64 " out of range", kv.value.i); + if (layout.size() < 2) layout.resize(2); + layout[1].row = kv.value.i; + layout[1].lineno = kv.value.lineno; + } else if (kv.key == "payload_bus") { + if (!CHECKTYPE(kv.value, tINT)) continue; + if (kv.value.i < 0 || kv.value.i > 3) + error(kv.value.lineno, "bus %" PRId64 " out of range", kv.value.i); + if (layout.size() < 2) layout.resize(2); + layout[1].bus[Layout::RESULT_BUS] = kv.value.i; + if (layout[1].lineno < 0) layout[1].lineno = kv.value.lineno; + } else if (kv.key == "payload_unit") { + if (!CHECKTYPE(kv.value, tINT)) continue; + if (kv.value.i < 0 || kv.value.i > 1) + error(kv.value.lineno, "payload unit %" PRId64 " out of range", kv.value.i); + payload_unit = kv.value.i; + } else if (kv.key == "gateway_unit" || kv.key == "unit") { + if (!CHECKTYPE(kv.value, tINT)) continue; + if (kv.value.i < 0 || kv.value.i > 1) + error(kv.value.lineno, "gateway unit %" PRId64 " out of range", kv.value.i); + gw_unit = kv.value.i; + } else if (kv.key == "input_xbar") { + if (CHECKTYPE(kv.value, tMAP)) + input_xbar.emplace_back(InputXbar::create(this, false, kv.key, kv.value.map)); + } else if (kv.key == "format") { + if (CHECKTYPEPM(kv.value, tMAP, kv.value.map.size > 0, "non-empty map")) + format.reset(new Format(this, kv.value.map)); + } else if (kv.key == "always_run") { + if ((always_run = get_bool(kv.value)) && !Target::SUPPORT_ALWAYS_RUN()) + error(kv.key.lineno, "always_run not supported on %s", Target::name()); + } else if (kv.key == "miss") { + miss = Match(0, kv.value, range_match); + } else if (kv.key == "condition") { + if (CHECKTYPE(kv.value, tMAP)) { + for (auto &v : kv.value.map) { + if (v.key == "expression" && CHECKTYPE(v.value, tSTR)) + gateway_cond = v.value.s; + else if (v.key == "true") + cond_true = Match(0, v.value, range_match); + else if (v.key == "false") + cond_false = Match(0, v.value, range_match); + } + } + } else if (kv.key == "payload") { + if (CHECKTYPE2(kv.value, tINT, tBIGINT)) payload = get_int64(kv.value); + /* FIXME -- should also be able to specify payload as () */ + have_payload = kv.key.lineno; + } else if (kv.key == "payload_map") { + if (kv.value.type == tVEC) { + if (kv.value.vec.size > Target::GATEWAY_PAYLOAD_GROUPS()) + error(kv.value.lineno, "payload_map too large (limit %d)", + Target::GATEWAY_PAYLOAD_GROUPS()); + for (auto &v : kv.value.vec) { + if (v == "_") + payload_map.push_back(-1); + else if (CHECKTYPE(v, tINT)) + payload_map.push_back(v.i); + } + } + } else if (kv.key == "match_address") { + if (CHECKTYPE(kv.value, tINT)) match_address = kv.value.i; + } else if (kv.key == "match") { + if (kv.value.type == tVEC) { + for (auto &v : kv.value.vec) match.emplace_back(gress, stage->stageno, v); + } else if (kv.value.type == tMAP) { + for (auto &v : kv.value.map) { + if (CHECKTYPE(v.key, tINT)) { + if (v.value.type == tCMD && v.value.vec.size == 2 && + v.value.vec[0] == "$valid") { + match.emplace_back(v.key.i, gress, stage->stageno, v.value.vec[1], + true); + } else { + match.emplace_back(v.key.i, gress, stage->stageno, v.value); + } + } + } + } else { + match.emplace_back(gress, stage->stageno, kv.value); + } + } else if (kv.key == "range") { + /* done above, to be before match parsing */ + } else if (kv.key == "xor") { + if (kv.value.type == tVEC) { + for (auto &v : kv.value.vec) xor_match.emplace_back(gress, stage->stageno, v); + } else if (kv.value.type == tMAP) { + for (auto &v : kv.value.map) + if (CHECKTYPE(v.key, tINT)) + xor_match.emplace_back(v.key.i, gress, stage->stageno, v.value); + } else { + xor_match.emplace_back(gress, stage->stageno, kv.value); + } + } else if (kv.key == "long_branch" && Target::LONG_BRANCH_TAGS() > 0) { + if (options.disable_long_branch) error(kv.key.lineno, "long branches disabled"); + if (CHECKTYPE(kv.value, tMAP)) { + for (auto &lb : kv.value.map) { + if (lb.key.type != tINT || lb.key.i < 0 || + lb.key.i >= Target::LONG_BRANCH_TAGS()) + error(lb.key.lineno, "Invalid long branch tag %s", value_desc(lb.key)); + else if (long_branch.count(lb.key.i)) + error(lb.key.lineno, "Duplicate long branch tag %" PRIi64, lb.key.i); + else + long_branch.emplace(lb.key.i, lb.value); + } + } + } else if (kv.key == "context_json") { + setup_context_json(kv.value); + } else if (kv.key.type == tINT || kv.key.type == tBIGINT || kv.key.type == tMATCH || + (kv.key.type == tVEC && range_match != NONE)) { + table.emplace_back(&kv.key, kv.value, range_match); + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } +} + +bool GatewayTable::check_match_key(MatchKey &key, const std::vector &vec, bool is_xor) { + if (!key.val.check()) return false; + if (key.val->reg.mau_id() < 0) + error(key.val.lineno, "%s not accessable in mau", key.val->reg.name); + if (key.offset >= 0) { + for (auto &okey : vec) { + if (&okey == &key) break; + if (key.offset < okey.offset + static_cast(okey.val->size()) && + okey.offset < key.offset + static_cast(key.val->size())) + error(key.val.lineno, + "Gateway %s key at offset %d overlaps previous " + "value at offset %d", + is_xor ? "xor" : "match", key.offset, okey.offset); + } + } else if (&key == &vec[0]) { + key.offset = 0; + } else { + auto *prev = &key - 1; + key.offset = prev->offset + prev->val->size(); + } + return true; +} + +void GatewayTable::verify_format() { + if (format->log2size > 6) + error(format->lineno, "Gateway payload format too large (max 64 bits)"); + format->log2size = 6; + format->pass1(this); + if (format->groups() > Target::GATEWAY_PAYLOAD_GROUPS()) + error(format->lineno, "Too many groups for gateway payload"); + if (payload_map.empty()) { + if (format->groups() == 1) { + payload_map.push_back(0); + } else { + payload_map = std::vector(Target::GATEWAY_PAYLOAD_GROUPS(), -1); + int i = Target::GATEWAY_PAYLOAD_GROUPS() - 2; + int grp = 0; + for (auto &row : table) { + if (!row.run_table && i >= 0) { + if (grp >= format->groups() && format->groups() > 1) { + error(format->lineno, "Not enough groups in format for payload"); + grp = 0; + } + payload_map[i--] = grp++; + } + } + if (!miss.run_table) payload_map.back() = format->groups() - 1; + } + } + for (auto pme : payload_map) { + if (pme < -1 || pme >= int(format->groups())) + error(format->lineno, "Invalid format group %d in payload_map", pme); + } + if (match_table) { + if (match_table->table_type() == TERNARY) { + if (format->groups() > 1) + error(format->lineno, + "Can't have mulitple payload format groups when attached " + "to a ternary table"); + } else if (!match_table->format) { + // ok + } else if (auto *srm = match_table->to()) { + int groups = std::min(format->groups(), match_table->format->groups()); + bool err = false; + for (auto &field : *format) { + if (auto match_field = match_table->format->field(field.first)) { + int match_group = -1; + for (auto gw_group : payload_map) { + ++match_group; + if (gw_group < 0) continue; + int em_group = match_group; + if (!srm->word_info.empty()) { + if (match_group < srm->word_info[0].size()) + em_group = srm->word_info[0][match_group]; + else + em_group = -1; + } + if (em_group < 0) continue; + if (field.second.by_group[gw_group]->bits != + match_field->by_group[em_group]->bits) { + if (!err) { + error(format->lineno, + "Gateway format inconsistent with table " + "%s it is attached to", + match_table->name()); + error(match_table->format->lineno, "field %s inconsistent", + field.first.c_str()); + err = true; + break; + } + } + } + } else { + if (!err) + error(format->lineno, + "Gateway format inconsistent with table %s it is " + "attached to", + match_table->name()); + error(match_table->format->lineno, "No field %s in match table format", + field.first.c_str()); + err = true; + } + } + } + } else if (layout.size() > 1) { + if (!layout[1].bus.count(Layout::RESULT_BUS)) { + error(layout[1].lineno, "No result bus for gateway payload"); + } else { + int result_bus = layout[1].bus.at(Layout::RESULT_BUS); + if (result_bus > 3) + error(layout[1].lineno, "Invalid bus %d for gateway payload", result_bus); + if ((result_bus & 2) && format->groups() > 1) + error(format->lineno, + "Can't have mulitple payload format groups when using " + "ternary indirect bus"); + } + } +} + +void GatewayTable::pass1() { + LOG1("### Gateway table " << name() << " pass1 " << loc()); + if (!match_table) { + // needs to happen before Actions::pass1, but will have been called from the + // match table if this gateway is attached to one. + setup_map_indexing(this); + } + Table::pass1(); +#if 0 + // redundant with (and supercedes) choose_logical_id in pass2. That function is much + // better, taking dependencies into account, so logical_id should not be allocated here + alloc_id("logical", logical_id, stage->pass1_logical_id, + LOGICAL_TABLES_PER_STAGE, true, stage->logical_id_use); +#endif + if (always_run && match_table) + error(lineno, "always_run set on non-standalone gateway for %s", match_table->name()); + if (gw_unit >= 0) { + if (auto *old = stage->gw_unit_use[layout[0].row][gw_unit]) + error(layout[0].lineno, "gateway %d.%d already in use by table %s", layout[0].row, + gw_unit, old->name()); + else + stage->gw_unit_use[layout[0].row][gw_unit] = this; + } + for (auto &ixb : input_xbar) { + ixb->pass1(); + if (Target::GATEWAY_SINGLE_XBAR_GROUP() && ixb->match_group() < 0) + error(ixb->lineno, "Gateway match keys must be in a single ixbar group"); + } + for (auto &k : match) + if (!check_match_key(k, match, false)) break; + for (auto &k : xor_match) + if (!check_match_key(k, xor_match, true)) break; + std::sort(match.begin(), match.end()); + std::sort(xor_match.begin(), xor_match.end()); + if (table.size() > 4) error(lineno, "Gateway can only have 4 match entries max"); + for (auto &line : table) check_next(line.next); + check_next(miss.next); + check_next(cond_false.next); + check_next(cond_true.next); + if (format) verify_format(); + + if (error_count > 0) return; + /* FIXME -- the rest of this function is a hack -- sometimes the compiler wants to + * generate matches just covering the bits it names in the match and other times it wants + * to create the whole tcam value. Need to fix the asm syntax to be sensible and fix the + * compiler's output. + * Part of the issue is that in tofino1/2 we copy the word0/word1 bits directly to + * the tcam, so we need to treat unspecified bits as don't care. Another part is that + * integer constants used as matches get padded with 0 out to a mulitple of 64 bits, + * and those should also be don't care where they don't get matched. + */ + bitvec ignore(0, Target::GATEWAY_MATCH_BITS()); + int shift = -1; + int maxbit = 0; + for (auto &r : match) { + if (range_match && r.offset >= 32) { + continue; + } + ignore.clrrange(r.offset, r.val->size()); + if (shift < 0 || shift > r.offset) shift = r.offset; + if (maxbit < r.offset + r.val->size()) maxbit = r.offset + r.val->size(); + } + if (shift < 0) shift = 0; + LOG3("shift=" << shift << " ignore=0x" << ignore); + for (auto &line : table) { + bitvec matching = (line.val.word0 ^ line.val.word1) << shift; + matching -= (line.val.word0 << shift) - bitvec(0, maxbit); // ignore leading 0s + if (matching & ignore) + warning(line.lineno, "Trying to match on bits not in match of gateway"); + line.val.word0 = (line.val.word0 << shift) | ignore; + line.val.word1 = (line.val.word1 << shift) | ignore; + } +} + +int GatewayTable::find_next_lut_entry(Table *tbl, const Match &match) { + int rv = 0; + for (auto &e : tbl->hit_next) { + if (e == match.next) return rv; + ++rv; + } + for (auto &e : tbl->extra_next_lut) { + if (e == match.next) return rv; + ++rv; + } + tbl->extra_next_lut.push_back(match.next); + if (rv == Target::NEXT_TABLE_SUCCESSOR_TABLE_DEPTH()) + error(tbl->lineno, "Too many next table map entries in table %s", tbl->name()); + return rv; +} + +void GatewayTable::pass2() { + LOG1("### Gateway table " << name() << " pass2 " << loc()); + if (logical_id < 0) { + if (match_table) + logical_id = match_table->logical_id; + else + choose_logical_id(); + } + for (auto &ixb : input_xbar) ixb->pass2(); + need_next_map_lut = miss.next.need_next_map_lut(); + for (auto &e : table) need_next_map_lut |= e.next.need_next_map_lut(); + if (need_next_map_lut) { + Table *tbl = match_table; + if (!tbl) tbl = this; + for (auto &e : table) + if (!e.run_table && e.next_map_lut < 0) e.next_map_lut = find_next_lut_entry(tbl, e); + if (!miss.run_table && miss.next_map_lut < 0) + miss.next_map_lut = find_next_lut_entry(tbl, miss); + } +} + +void GatewayTable::pass3() { + LOG1("### Gateway table " << name() << " pass3 " << loc()); + if (match_table) + physical_ids = match_table->physical_ids; + else + allocate_physical_ids(); +} + +static unsigned match_input_use(const std::vector &match) { + unsigned rv = 0; + for (auto &r : match) { + unsigned lo = r.offset; + unsigned hi = lo + r.val->size() - 1; + if (lo < 32) { + rv |= (((UINT32_C(1) << (hi / 8 - lo / 8 + 1)) - 1) << lo / 8) & 0xf; + lo = 32; + } + if (lo <= hi) rv |= ((UINT32_C(1) << (hi - lo + 1)) - 1) << (lo - 24); + } + return rv; +} + +/* caluclate match_bus byte use (8 bytes/bits) + hash output use (12 bits) */ +unsigned GatewayTable::input_use() const { + unsigned rv = match_input_use(match) | match_input_use(xor_match); + if (!xor_match.empty()) rv |= (rv & 0xf) << 4; + return rv; +} + +bool GatewayTable::is_branch() const { + for (auto &line : table) + if (line.next.next_table() != nullptr) return true; + if (!miss.run_table && miss.next.next_table() != nullptr) return true; + return false; +} + +/* FIXME -- how to deal with (or even specify) matches in the upper 24 bits coming from + * the hash bus? Currently we assume that the input_xbar is declared to set up the + * hash signals correctly so that we can just match them. Should at least check it + * somewhere, somehow. We do some checking in check_match_key above, but is that enough? + */ +template +static bool setup_vh_xbar(REGS ®s, Table *table, Table::Layout &row, int base, + std::vector &match, int group) { + auto &rams_row = regs.rams.array.row[row.row]; + auto &byteswizzle_ctl = + rams_row.exactmatch_row_vh_xbar_byteswizzle_ctl[row.bus.at(Table::Layout::SEARCH_BUS)]; + for (auto &r : match) { + if (r.offset >= 32) break; /* skip hash matches */ + for (int bit = 0; bit < r.val->size(); ++bit) { + int ibyte = table->find_on_ixbar(*Phv::Ref(r.val, bit, bit), group); + if (ibyte < 0) { + error(r.val.lineno, "Can't find %s(%d) on ixbar", r.val.desc().c_str(), bit); + return false; + } + unsigned byte = base + (r.offset + bit) / 8; + byteswizzle_ctl[byte][(r.val->lo + bit) & 7] = 0x10 + ibyte; + } + } + return true; +} + +template +void enable_gateway_payload_exact_shift_ovr(REGS ®s, int bus) { + regs.rams.match.merge.gateway_payload_exact_shift_ovr[bus / 8] |= 1U << bus % 8; +} + +template +void GatewayTable::payload_write_regs(REGS ®s, int row, int type, int bus) { + auto &merge = regs.rams.match.merge; + auto &xbar_ctl = merge.gateway_to_pbus_xbar_ctl[row * 2 + bus]; + if (type) { + xbar_ctl.tind_logical_select = logical_id; + xbar_ctl.tind_inhibit_enable = 1; + } else { + xbar_ctl.exact_logical_select = logical_id; + xbar_ctl.exact_inhibit_enable = 1; + } + if (have_payload >= 0 || match_address >= 0) { + BUG_CHECK(payload_unit == bus); + if (type) + merge.gateway_payload_tind_pbus[row] |= 1 << bus; + else + merge.gateway_payload_exact_pbus[row] |= 1 << bus; + } + if (have_payload >= 0) { + merge.gateway_payload_data[row][bus][0][type] = payload & 0xffffffff; + merge.gateway_payload_data[row][bus][1][type] = payload >> 32; + merge.gateway_payload_data[row][bus][0][type ^ 1] = payload & 0xffffffff; + merge.gateway_payload_data[row][bus][1][type ^ 1] = payload >> 32; + } + if (match_address >= 0) { + merge.gateway_payload_match_adr[row][bus][type] = match_address; + merge.gateway_payload_match_adr[row][bus][type ^ 1] = match_address; + } else if (options.target == TOFINO) { + // For Tofino A0, there is a bug in snapshot that cannot distinguish if a + // gateway is inhibiting a table To work around this, configure the + // gateway_payload_match_adr to an invalid value. Add a command line flag + // if this is only a tofino A0 issue?. + merge.gateway_payload_match_adr[row][bus][type] = 0x7ffff; + merge.gateway_payload_match_adr[row][bus][type ^ 1] = 0x7ffff; + } + + int groups = format ? format->groups() : 1; + if (groups > 1 || payload_map.size() > 1) { + BUG_CHECK(type == 0); // only supported on exact result busses + enable_gateway_payload_exact_shift_ovr(regs, row * 2 + bus); + } + + int tcam_shift = 0; + if (type != 0 && format) { + auto match_table = get_match_table(); + if (match_table) { + auto ternary_table = match_table->to(); + if (ternary_table && ternary_table->has_indirect()) { + tcam_shift = format->log2size - 2; + } + } + } + + if (format) { + if (auto *attached = get_attached()) { + for (auto &st : attached->stats) { + if (type == 0) { + for (unsigned i = 0; i < payload_map.size(); ++i) { + auto grp = payload_map.at(i); + if (grp < 0) continue; + merge.mau_stats_adr_exact_shiftcount[row * 2 + bus][i] = + st->determine_shiftcount(st, grp, 0, 0); + } + } else { + merge.mau_stats_adr_tcam_shiftcount[row * 2 + bus] = + st->determine_shiftcount(st, 0, 0, tcam_shift); + } + break; + } + + for (auto &m : attached->meters) { + if (type == 0) { + for (unsigned i = 0; i < payload_map.size(); ++i) { + auto grp = payload_map.at(i); + if (grp < 0) continue; + m->to()->setup_exact_shift(regs, row * 2 + bus, grp, 0, i, m, + attached->meter_color); + } + } else { + m->to()->setup_tcam_shift(regs, row * 2 + bus, tcam_shift, m, + attached->meter_color); + } + break; + } + for (auto &s : attached->statefuls) { + if (type == 0) { + for (unsigned i = 0; i < payload_map.size(); ++i) { + auto grp = payload_map.at(i); + if (grp < 0) continue; + merge.mau_meter_adr_exact_shiftcount[row * 2 + bus][i] = + s->determine_shiftcount(s, grp, 0, 0); + } + } else { + merge.mau_meter_adr_tcam_shiftcount[row * 2 + bus] = + s->determine_shiftcount(s, 0, 0, tcam_shift); + } + break; + } + } + } + + if (match_table && match_table->instruction) { + if (auto field = match_table->instruction.args[0].field()) { + if (type == 0) { + for (unsigned i = 0; i < payload_map.size(); ++i) { + auto grp = payload_map.at(i); + if (grp < 0) continue; + merge.mau_action_instruction_adr_exact_shiftcount[row * 2 + bus][i] = + field->by_group[grp]->bit(0); + } + } else { + merge.mau_action_instruction_adr_tcam_shiftcount[row * 2 + bus] = + field->bit(0) + tcam_shift; + } + } + } else if (auto *action = format ? format->field("action") : nullptr) { + if (type == 0) { + for (unsigned i = 0; i < payload_map.size(); ++i) { + auto grp = payload_map.at(i); + if (grp < 0) continue; + merge.mau_action_instruction_adr_exact_shiftcount[row * 2 + bus][i] = + action->by_group[grp]->bit(0); + } + } else { + merge.mau_action_instruction_adr_tcam_shiftcount[row * 2 + bus] = + action->bit(0) + tcam_shift; + } + } + + if (format && format->immed) { + if (type == 0) { + for (unsigned i = 0; i < payload_map.size(); ++i) { + auto grp = payload_map.at(i); + if (grp < 0) continue; + merge.mau_immediate_data_exact_shiftcount[row * 2 + bus][i] = + format->immed->by_group[grp]->bit(0); + } + } else { + merge.mau_immediate_data_tcam_shiftcount[row * 2 + bus] = + format->immed->bit(0) + tcam_shift; + } + // FIXME -- may be redundant witehr writing this for the match table, + // but should always be consistent + merge.mau_immediate_data_mask[type][row * 2 + bus] = bitMask(format->immed_size); + merge.mau_payload_shifter_enable[type][row * 2 + bus].immediate_data_payload_shifter_en = 1; + } + + if (type) { + merge.tind_bus_prop[row * 2 + bus].tcam_piped = 1; + merge.tind_bus_prop[row * 2 + bus].thread = gress; + merge.tind_bus_prop[row * 2 + bus].enabled = 1; + } else { + merge.exact_match_phys_result_en[row / 4U] |= 1U << (row % 4U * 2 + bus); + merge.exact_match_phys_result_thread[row / 4U] |= gress << (row % 4U * 2 + bus); + if (stage->tcam_delay(gress)) + merge.exact_match_phys_result_delay[row / 4U] |= 1U << (row % 4U * 2 + bus); + } +} + +template +void GatewayTable::standalone_write_regs(REGS ®s) {} + +template +void GatewayTable::write_regs_vt(REGS ®s) { + LOG1("### Gateway table " << name() << " write_regs " << loc()); + auto &row = layout[0]; + for (auto &ixb : input_xbar) { + // FIXME -- if there's no ixbar in the gateway, we should look for a group with + // all the match/xor values across all the exact match groups in the stage and use + // that. + ixb->write_regs(regs); + if (!setup_vh_xbar(regs, this, row, 0, match, ixb->match_group()) || + !setup_vh_xbar(regs, this, row, 4, xor_match, ixb->match_group())) + return; + } + auto &row_reg = regs.rams.array.row[row.row]; + auto &gw_reg = row_reg.gateway_table[gw_unit]; + auto &merge = regs.rams.match.merge; + int search_bus = row.bus.at(Layout::SEARCH_BUS); + if (search_bus == 0) { + gw_reg.gateway_table_ctl.gateway_table_input_data0_select = 1; + gw_reg.gateway_table_ctl.gateway_table_input_hash0_select = 1; + } else { + BUG_CHECK(search_bus == 1); + gw_reg.gateway_table_ctl.gateway_table_input_data1_select = 1; + gw_reg.gateway_table_ctl.gateway_table_input_hash1_select = 1; + } + for (auto &ixb : input_xbar) { + if (ixb->hash_group() >= 0) + setup_muxctl(row_reg.vh_adr_xbar.exactmatch_row_hashadr_xbar_ctl[search_bus], + ixb->hash_group()); + if (ixb->match_group() >= 0 && gateway_needs_ixbar_group()) { + auto &vh_xbar_ctl = row_reg.vh_xbar[search_bus].exactmatch_row_vh_xbar_ctl; + setup_muxctl(vh_xbar_ctl, ixb->match_group()); + /* vh_xbar_ctl.exactmatch_row_vh_xbar_thread = gress; */ } + } + gw_reg.gateway_table_ctl.gateway_table_logical_table = logical_id; + gw_reg.gateway_table_ctl.gateway_table_thread = timing_thread(gress); + for (auto &r : xor_match) + gw_reg.gateway_table_matchdata_xor_en |= bitMask(r.val->size()) << r.offset; + int idx = 3; + gw_reg.gateway_table_ctl.gateway_table_mode = range_match; + for (auto &line : table) { + BUG_CHECK(idx >= 0); + /* FIXME -- hardcoding version/valid to always */ + gw_reg.gateway_table_vv_entry[idx].gateway_table_entry_versionvalid0 = 0x3; + gw_reg.gateway_table_vv_entry[idx].gateway_table_entry_versionvalid1 = 0x3; + gw_reg.gateway_table_entry_matchdata[idx][0] = line.val.word0.getrange(0, 32); + gw_reg.gateway_table_entry_matchdata[idx][1] = line.val.word1.getrange(0, 32); + if (range_match) { + auto &info = range_match_info[range_match]; + for (unsigned i = 0; i < range_match_info[range_match].units; i++) { + gw_reg.gateway_table_data_entry[idx][0] |= (line.range[i] & info.half_mask) + << (i * info.bits); + gw_reg.gateway_table_data_entry[idx][1] |= + ((line.range[i] >> info.half_shift) & info.half_mask) << (i * info.bits); + } + } else { + gw_reg.gateway_table_data_entry[idx][0] = line.val.word0.getrange(32, 24); + gw_reg.gateway_table_data_entry[idx][1] = line.val.word1.getrange(32, 24); + } + if (!line.run_table) { + merge.gateway_inhibit_lut[logical_id] |= 1 << idx; + } + idx--; + } + if (!miss.run_table) { + merge.gateway_inhibit_lut[logical_id] |= 1 << 4; + } + write_next_table_regs(regs); + merge.gateway_en |= 1 << logical_id; + setup_muxctl(merge.gateway_to_logicaltable_xbar_ctl[logical_id], row.row * 2 + gw_unit); + if (layout.size() > 1) { + int result_bus = layout[1].bus.at(Layout::RESULT_BUS); + payload_write_regs(regs, layout[1].row, result_bus >> 1, result_bus & 1); + } + if (Table *tbl = match_table) { + bool tind_bus = false; + auto bus_type = Layout::RESULT_BUS; + auto *tmatch = dynamic_cast(tbl); + if (tmatch) { + tind_bus = true; + bus_type = Layout::TIND_BUS; + tbl = tmatch->indirect; + } else if (auto *hashaction = dynamic_cast(tbl)) { + tind_bus = hashaction->layout[0].bus.at(bus_type) >= 2; + } + if (tbl) { + for (auto &row : tbl->layout) { + if (row.bus.count(bus_type)) { + int bus = row.bus.at(bus_type); + auto &xbar_ctl = merge.gateway_to_pbus_xbar_ctl[row.row * 2 + (bus & 1)]; + if (tind_bus) { + xbar_ctl.tind_logical_select = logical_id; + xbar_ctl.tind_inhibit_enable = 1; + } else { + xbar_ctl.exact_logical_select = logical_id; + xbar_ctl.exact_inhibit_enable = 1; + } + } + } + } else { + BUG_CHECK(tmatch); + auto &xbar_ctl = merge.gateway_to_pbus_xbar_ctl[tmatch->indirect_bus]; + xbar_ctl.tind_logical_select = logical_id; + xbar_ctl.tind_inhibit_enable = 1; + } + } else { + if (gress != GHOST) merge.predication_ctl[gress].table_thread |= 1 << logical_id; + if (gress == INGRESS || gress == GHOST) { + merge.logical_table_thread[0].logical_table_thread_ingress |= 1 << logical_id; + merge.logical_table_thread[1].logical_table_thread_ingress |= 1 << logical_id; + merge.logical_table_thread[2].logical_table_thread_ingress |= 1 << logical_id; + } else if (gress == EGRESS) { + regs.dp.imem_table_addr_egress |= 1 << logical_id; + merge.logical_table_thread[0].logical_table_thread_egress |= 1 << logical_id; + merge.logical_table_thread[1].logical_table_thread_egress |= 1 << logical_id; + merge.logical_table_thread[2].logical_table_thread_egress |= 1 << logical_id; + } + auto &adrdist = regs.rams.match.adrdist; + adrdist.adr_dist_table_thread[timing_thread(gress)][0] |= 1 << logical_id; + adrdist.adr_dist_table_thread[timing_thread(gress)][1] |= 1 << logical_id; + // FIXME -- allow table_counter on standalone gateay? What can it count? + if (options.match_compiler) + merge.mau_table_counter_ctl[logical_id / 8U].set_subfield(4, 3 * (logical_id % 8U), 3); + standalone_write_regs(regs); + } + if (stage->tcam_delay(gress) > 0) merge.exact_match_logical_result_delay |= 1 << logical_id; +} + +std::set gateways_in_json; +void GatewayTable::gen_tbl_cfg(json::vector &out) const { + // Avoid adding gateway table multiple times to the json. The gateway table + // gets called multiple times in some cases based on how it is attached or + // associated with a match table, we should only output it to json once. + auto gwName = gateway_name.empty() ? name() : gateway_name; + if (gateways_in_json.count(gwName)) return; + LOG3("### Gateway table " << gwName << " gen_tbl_cfg " << loc()); + json::map gTable; + gTable["direction"] = P4Table::direction_name(gress); + gTable["attached_to"] = match_table ? match_table->p4_name() : "-"; + gTable["handle"] = gateway_handle++; + gTable["name"] = gwName; + gTable["table_type"] = "condition"; + + json::vector gStageTables; + json::map gStageTable; + + json::map &next_table_ids = gStageTable["next_tables"]; + json::map &next_table_names = gStageTable["next_table_names"]; + + auto &condTNext = cond_true.next; + auto &condFNext = cond_false.next; + if (Target::LONG_BRANCH_TAGS() > 0) { + json::vector &next_table_names_true = next_table_names["true"]; + json::vector &next_table_names_false = next_table_names["false"]; + json::vector &next_table_ids_true = next_table_ids["true"]; + json::vector &next_table_ids_false = next_table_ids["false"]; + if (condTNext.size() == 0) { + next_table_names_true.push_back(condTNext.next_table_name()); + next_table_ids_true.push_back(condTNext.next_table_id()); + } else { + for (auto t : condTNext) { + next_table_names_true.push_back(t.name); + next_table_ids_true.push_back(t->table_id()); + } + } + if (condFNext.size() == 0) { + next_table_names_false.push_back(condFNext.next_table_name()); + next_table_ids_false.push_back(condFNext.next_table_id()); + } else { + for (auto t : condFNext) { + next_table_names_false.push_back(t.name); + next_table_ids_false.push_back(t->table_id()); + } + } + } else { + next_table_ids["false"] = json::string(condFNext.next_table_id()); + next_table_ids["true"] = json::string(condTNext.next_table_id()); + next_table_names["false"] = json::string(condFNext.next_table_name()); + next_table_names["true"] = json::string(condTNext.next_table_name()); + } + + json::map mra; + mra["memory_unit"] = gw_memory_unit(); + mra["memory_type"] = "gateway"; + mra["payload_buses"] = json::vector(); + gStageTable["memory_resource_allocation"] = std::move(mra); + json::vector pack_format; // For future use + gStageTable["pack_format"] = std::move(pack_format); + + gStageTable["logical_table_id"] = logical_id; + gStageTable["stage_number"] = stage->stageno; + gStageTable["stage_table_type"] = "gateway"; + gStageTable["size"] = 0; + gStageTables.push_back(std::move(gStageTable)); + + json::vector condition_fields; + for (auto m : match) { + json::map condition_field; + condition_field["name"] = m.val.name(); + condition_field["start_bit"] = m.offset; + condition_field["bit_width"] = m.val.size(); + condition_fields.push_back(std::move(condition_field)); + } + + gTable["stage_tables"] = std::move(gStageTables); + gTable["condition_fields"] = std::move(condition_fields); + gTable["condition"] = gateway_cond; + gTable["size"] = 0; + out.push_back(std::move(gTable)); + gateways_in_json.insert(gwName); +} + +DEFINE_TABLE_TYPE_WITH_SPECIALIZATION(GatewayTable, TARGET_CLASS) diff --git a/backends/tofino/bf-asm/gtest/asm-types.cpp b/backends/tofino/bf-asm/gtest/asm-types.cpp new file mode 100644 index 00000000000..f03908e26bd --- /dev/null +++ b/backends/tofino/bf-asm/gtest/asm-types.cpp @@ -0,0 +1,270 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/asm-types.h" + +#include + +namespace { + +auto CaptureStderr = ::testing::internal::CaptureStderr; +auto Stderr = ::testing::internal::GetCapturedStderr; +auto terminate = ::testing::KilledBySignal(SIGABRT); + +TEST(asm_types, get_int64_0) { + uint32_t i = 0; + value_t v{tINT, 0, 0}; + v.i = i; + CaptureStderr(); + EXPECT_EQ(get_int64(v), i); + EXPECT_EQ(get_int64(v, 0), i); + EXPECT_EQ(get_int64(v, 0, "no error check"), i); + EXPECT_EQ(get_int64(v, 1), i); + EXPECT_EQ(get_int64(v, 1, "no error"), i); + EXPECT_EQ(get_int64(v, 64), i); + EXPECT_EQ(get_int64(v, 64, "no error"), i); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + // Slow tests... + EXPECT_EXIT(get_int64(v, 128), terminate, "Assembler BUG"); + EXPECT_EXIT(get_int64(v, 128, "terminates"), terminate, "Assembler BUG"); +} + +TEST(asm_types, get_int64_32bit) { + uint32_t i = 0xAAAAAAAA; + value_t v{tINT, 0, 0}; + v.i = i; + CaptureStderr(); + EXPECT_EQ(get_int64(v), i); + EXPECT_EQ(get_int64(v, 0), i); + EXPECT_EQ(get_int64(v, 0, "no error check"), i); + EXPECT_EQ(get_int64(v, 32), i); + EXPECT_EQ(get_int64(v, 32, "no error"), i); + EXPECT_EQ(get_int64(v, 16), 0xAAAA); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + CaptureStderr(); + get_int64(v, 16, "my error"); + EXPECT_TRUE(Stderr().find("error: my error") != std::string::npos); +} + +TEST(asm_types, get_int64_64bit) { + uint64_t i = 0xAAAAAAAAAAAAAAAA; + value_t v{tINT, 0, 0}; + v.i = i; + CaptureStderr(); + EXPECT_EQ(get_int64(v), i); + EXPECT_EQ(get_int64(v, 0), i); + EXPECT_EQ(get_int64(v, 0, "no error check"), i); + EXPECT_EQ(get_int64(v, 64), i); + EXPECT_EQ(get_int64(v, 64, "no error"), i); + EXPECT_EQ(get_int64(v, 48), 0xAAAAAAAAAAAA); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + CaptureStderr(); + get_int64(v, 48, "my error"); + EXPECT_TRUE(Stderr().find("error: my error") != std::string::npos); +} + +TEST(asm_types, get_bigi_empty) { + value_t v{tBIGINT, 0, 0}; + v.bigi = EMPTY_VECTOR_INIT; + EXPECT_EQ(get_int64(v), 0); + EXPECT_EQ(get_bitvec(v), bitvec()); +} + +TEST(asm_types, get_int64_bigi_0) { + uint32_t i = 0; + value_t v{tBIGINT, 0, 0}; + VECTOR_init1(v.bigi, i); + CaptureStderr(); + EXPECT_EQ(get_int64(v), i); + EXPECT_EQ(get_int64(v, 0), i); + EXPECT_EQ(get_int64(v, 0, "no error check"), i); + EXPECT_EQ(get_int64(v, 1), i); + EXPECT_EQ(get_int64(v, 1, "no error"), i); + EXPECT_EQ(get_int64(v, 64), i); + EXPECT_EQ(get_int64(v, 64, "no error"), i); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + // Slow tests... + EXPECT_EXIT(get_int64(v, 128), terminate, "Assembler BUG"); + EXPECT_EXIT(get_int64(v, 128, "terminates"), terminate, "Assembler BUG"); +} + +TEST(asm_types, get_int64_bigi_32bit) { + uint32_t i = 0xAAAAAAAA; + value_t v{tBIGINT, 0, 0}; + VECTOR_init1(v.bigi, i); + CaptureStderr(); + EXPECT_EQ(get_int64(v), i); + EXPECT_EQ(get_int64(v, 0), i); + EXPECT_EQ(get_int64(v, 0, "no error check"), i); + EXPECT_EQ(get_int64(v, 32), i); + EXPECT_EQ(get_int64(v, 32, "no error"), i); + EXPECT_EQ(get_int64(v, 16), 0xAAAA); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + CaptureStderr(); + get_int64(v, 16, "my error"); + EXPECT_TRUE(Stderr().find("error: my error") != std::string::npos); +} + +TEST(asm_types, get_int64_bigi_64bit) { + uint64_t i = 0xAAAAAAAAAAAAAAAA; + value_t v{tBIGINT, 0, 0}; + if (sizeof(uintptr_t) == sizeof(uint32_t)) + VECTOR_init2(v.bigi, 0xAAAAAAAA, 0xAAAAAAAA); + else + VECTOR_init1(v.bigi, i); + CaptureStderr(); + EXPECT_EQ(get_int64(v), i); + EXPECT_EQ(get_int64(v, 0), i); + EXPECT_EQ(get_int64(v, 0, "no error check"), i); + EXPECT_EQ(get_int64(v, 64), i); + EXPECT_EQ(get_int64(v, 64, "no error"), i); + EXPECT_EQ(get_int64(v, 48), 0xAAAAAAAAAAAA); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + CaptureStderr(); + get_int64(v, 48, "my error"); + EXPECT_TRUE(Stderr().find("error: my error") != std::string::npos); +} + +TEST(asm_types, get_bitvec_0) { + value_t v{tINT, 0, 0}; + v.i = 0; + auto i = bitvec(0); + CaptureStderr(); + EXPECT_EQ(get_bitvec(v), i); + EXPECT_EQ(get_bitvec(v, 0), i); + EXPECT_EQ(get_bitvec(v, 0, "no error check"), i); + EXPECT_EQ(get_bitvec(v, 1), i); + EXPECT_EQ(get_bitvec(v, 1, "no error"), i); + EXPECT_EQ(get_bitvec(v, 64), i); + EXPECT_EQ(get_bitvec(v, 64, "no error"), i); + EXPECT_EQ(get_bitvec(v, 128), i); + EXPECT_EQ(get_bitvec(v, 128, "no error"), i); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); +} + +TEST(asm_types, get_bitvec_32bit) { + value_t v{tINT, 0, 0}; + v.i = 0xAAAAAAAA; + auto i = bitvec(0xAAAAAAAA); + CaptureStderr(); + EXPECT_EQ(get_bitvec(v), i); + EXPECT_EQ(get_bitvec(v, 0), i); + EXPECT_EQ(get_bitvec(v, 0, "no error check"), i); + EXPECT_EQ(get_bitvec(v, 32), i); + EXPECT_EQ(get_bitvec(v, 32, "no error"), i); + EXPECT_EQ(get_bitvec(v, 16), bitvec(0xAAAA)); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + CaptureStderr(); + get_bitvec(v, 16, "my error"); + EXPECT_TRUE(Stderr().find("error: my error") != std::string::npos); +} + +TEST(asm_types, get_bitvec_64bit) { + value_t v{tINT, 0, 0}; + v.i = 0xAAAAAAAAAAAAAAAA; + auto i = bitvec(0xAAAAAAAAAAAAAAAA); + CaptureStderr(); + EXPECT_EQ(get_bitvec(v), i); + EXPECT_EQ(get_bitvec(v, 0), i); + EXPECT_EQ(get_bitvec(v, 0, "no error check"), i); + EXPECT_EQ(get_bitvec(v, 64), i); + EXPECT_EQ(get_bitvec(v, 64, "no error"), i); + EXPECT_EQ(get_bitvec(v, 48), bitvec(0xAAAAAAAAAAAA)); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + CaptureStderr(); + get_bitvec(v, 48, "my error"); + EXPECT_TRUE(Stderr().find("error: my error") != std::string::npos); +} + +TEST(asm_types, get_bitvec_bigi_0) { + value_t v{tBIGINT, 0, 0}; + VECTOR_init1(v.bigi, 0); + auto i = bitvec(0); + CaptureStderr(); + EXPECT_EQ(get_bitvec(v), i); + EXPECT_EQ(get_bitvec(v, 0), i); + EXPECT_EQ(get_bitvec(v, 0, "no error check"), i); + EXPECT_EQ(get_bitvec(v, 1), i); + EXPECT_EQ(get_bitvec(v, 1, "no error"), i); + EXPECT_EQ(get_bitvec(v, 64), i); + EXPECT_EQ(get_bitvec(v, 64, "no error"), i); + EXPECT_EQ(get_bitvec(v, 128), i); + EXPECT_EQ(get_bitvec(v, 128, "no error"), i); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); +} + +TEST(asm_types, get_bitvec_bigi_32bit) { + value_t v{tBIGINT, 0, 0}; + VECTOR_init1(v.bigi, 0xAAAAAAAA); + auto i = bitvec(0xAAAAAAAA); + CaptureStderr(); + EXPECT_EQ(get_bitvec(v), i); + EXPECT_EQ(get_bitvec(v, 0), i); + EXPECT_EQ(get_bitvec(v, 0, "no error check"), i); + EXPECT_EQ(get_bitvec(v, 32), i); + EXPECT_EQ(get_bitvec(v, 32, "no error"), i); + EXPECT_EQ(get_bitvec(v, 16), bitvec(0xAAAA)); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + CaptureStderr(); + get_bitvec(v, 16, "my error"); + EXPECT_TRUE(Stderr().find("error: my error") != std::string::npos); +} + +TEST(asm_types, get_bitvec_bigi_64bit) { + value_t v{tBIGINT, 0, 0}; + if (sizeof(uintptr_t) == sizeof(uint32_t)) + VECTOR_init2(v.bigi, 0xAAAAAAAA, 0xAAAAAAAA); + else + VECTOR_init1(v.bigi, 0xAAAAAAAAAAAAAAAA); + auto i = bitvec(0xAAAAAAAAAAAAAAAA); + CaptureStderr(); + EXPECT_EQ(get_bitvec(v), i); + EXPECT_EQ(get_bitvec(v, 0), i); + EXPECT_EQ(get_bitvec(v, 0, "no error check"), i); + EXPECT_EQ(get_bitvec(v, 64), i); + EXPECT_EQ(get_bitvec(v, 64, "no error"), i); + EXPECT_EQ(get_bitvec(v, 48), bitvec(0xAAAAAAAAAAAA)); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + CaptureStderr(); + get_bitvec(v, 48, "my error"); + EXPECT_TRUE(Stderr().find("error: my error") != std::string::npos); +} + +TEST(asm_types, get_bitvec_bigi_128bit) { + value_t v{tBIGINT, 0, 0}; + if (sizeof(uintptr_t) == sizeof(uint32_t)) + VECTOR_init4(v.bigi, 0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA); + else + VECTOR_init2(v.bigi, 0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA); + bitvec i; + for (int j = 0; j < 4; ++j) i.putrange(j * 32, 32, 0xAAAAAAAA); + CaptureStderr(); + EXPECT_EQ(get_bitvec(v), i); + EXPECT_EQ(get_bitvec(v, 0), i); + EXPECT_EQ(get_bitvec(v, 0, "no error check"), i); + EXPECT_EQ(get_bitvec(v, 128), i); + EXPECT_EQ(get_bitvec(v, 128, "no error"), i); + EXPECT_EQ(get_bitvec(v, 192), i); + EXPECT_EQ(get_bitvec(v, 192, "no error"), i); + EXPECT_EQ(get_bitvec(v, 48), bitvec(0xAAAAAAAAAAAA)); + EXPECT_TRUE(Stderr().find("error") == std::string::npos); + CaptureStderr(); + get_bitvec(v, 48, "my error"); + EXPECT_TRUE(Stderr().find("error: my error") != std::string::npos); +} + +} // namespace diff --git a/backends/tofino/bf-asm/gtest/depositfield.cpp b/backends/tofino/bf-asm/gtest/depositfield.cpp new file mode 100644 index 00000000000..b307b445f46 --- /dev/null +++ b/backends/tofino/bf-asm/gtest/depositfield.cpp @@ -0,0 +1,153 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/depositfield.h" + +#include + +#if __cplusplus < 201402L && __cpp_binary_literals < 201304 +#error "Binary literals are required" +// We could fall back on boost/utility/binary.hpp +#endif + +namespace { + +constexpr int conSize8 = 8; +constexpr int conSize32 = 32; +constexpr int tooLarge = 8; +constexpr int tooSmall = -9; +constexpr int tooSmall2 = -5; + +TEST(depositfield, 0) { + int32_t zero = 0; + auto res = DepositField::discoverRotation(zero, conSize8, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, zero); + res = DepositField::discoverRotation(zero, conSize32, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, zero); + res = DepositField::discoverRotation(zero, conSize8, tooLarge, tooSmall2); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, zero); +} + +TEST(depositfield, large) { + int32_t value = tooLarge - 1; + auto res = DepositField::discoverRotation(value, conSize8, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, value); + res = DepositField::discoverRotation(value, conSize32, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, value); + res = DepositField::discoverRotation(value, conSize8, tooLarge, tooSmall2); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, value); +} + +TEST(depositfield, small) { + int32_t value = tooSmall + 1; + int32_t value2 = tooSmall2 + 1; + auto res = DepositField::discoverRotation(value, conSize8, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, value); + res = DepositField::discoverRotation(value, conSize32, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, value); + ASSERT_TRUE(value < tooSmall2); + res = DepositField::discoverRotation(value, conSize8, tooLarge, tooSmall2); + EXPECT_EQ(res.rotate, 0U); // Not possible '0b11111000' + EXPECT_EQ(res.value, value); + res = DepositField::discoverRotation(value2, conSize8, tooLarge, tooSmall2); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, value2); +} + +TEST(depositfield, numTooLarge) { // 0b00001000 + // N.B. other solutions are valid, these are the ones we expect. + auto res = DepositField::discoverRotation(8, conSize8, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 5U); + EXPECT_EQ(res.value, 1); + res = DepositField::discoverRotation(8, conSize32, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 29U); + EXPECT_EQ(res.value, 1); + res = DepositField::discoverRotation(8, conSize8, tooLarge, tooSmall2); + EXPECT_EQ(res.rotate, 5U); + EXPECT_EQ(res.value, 1); +} + +TEST(depositfield, numTooSmall) { // 0b11110111 + auto res = DepositField::discoverRotation(-9, conSize8, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 5U); + EXPECT_EQ(res.value, -2); + res = DepositField::discoverRotation(-9, conSize32, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 29U); + EXPECT_EQ(res.value, -2); + res = DepositField::discoverRotation(-9, conSize8, tooLarge, tooSmall2); + EXPECT_EQ(res.rotate, 5U); + EXPECT_EQ(res.value, -2); +} + +TEST(depositfield, 0b00110000) { + auto res = DepositField::discoverRotation(0b00110000, conSize8, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 4U); + EXPECT_EQ(res.value, 0b00000011); + res = DepositField::discoverRotation(0b00110000, conSize32, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 28U); + EXPECT_EQ(res.value, 0b00000011); + res = DepositField::discoverRotation(0b00110000, conSize8, tooLarge, tooSmall2); + EXPECT_EQ(res.rotate, 4U); + EXPECT_EQ(res.value, 0b00000011); +} + +TEST(depositfield, 0b00100001) { + // Failures are sent back with zero rotation and the value unchanged. + auto res = DepositField::discoverRotation(0b00100001, conSize8, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, 0b00100001); + res = DepositField::discoverRotation(0b00100001, conSize32, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, 0b00100001); + res = DepositField::discoverRotation(0b00100001, conSize8, tooLarge, tooSmall2); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, 0b00100001); +} + +TEST(depositfield, 0b01111111) { // 127 + auto res = DepositField::discoverRotation(0b01111111, conSize8, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 1U); + EXPECT_EQ(res.value, -2); + res = DepositField::discoverRotation(0b01111111, conSize32, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 0U); + EXPECT_EQ(res.value, 0b01111111); // Can't do. + res = DepositField::discoverRotation(0b01111111, conSize8, tooLarge, tooSmall2); + EXPECT_EQ(res.rotate, 1U); + EXPECT_EQ(res.value, -2); +} + +TEST(depositfield, 0b10011111) { // -97 + auto res = DepositField::discoverRotation(-97, conSize8, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 3U); + EXPECT_EQ(res.value, -4); + res = DepositField::discoverRotation(-97, conSize32, tooLarge, tooSmall); + EXPECT_EQ(res.rotate, 27U); + EXPECT_EQ(res.value, -4); + res = DepositField::discoverRotation(-97, conSize8, tooLarge, tooSmall2); + EXPECT_EQ(res.rotate, 3U); + EXPECT_EQ(res.value, -4); +} + +} // namespace diff --git a/backends/tofino/bf-asm/gtest/gateway.cpp b/backends/tofino/bf-asm/gtest/gateway.cpp new file mode 100644 index 00000000000..437afdfc6a2 --- /dev/null +++ b/backends/tofino/bf-asm/gtest/gateway.cpp @@ -0,0 +1,123 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "backends/tofino/bf-asm/bfas.h" +#include "backends/tofino/bf-asm/stage.h" + +namespace { + +// Verify that the next table registers are correctly configured for a standalone gateway with a +// miss next table and no hit next table +TEST(gateway, standalone_miss_next_table) { + const char *gateway_str = R"GATEWAY_CFG( +version: + target: Tofino2 +phv ingress: + ig_intr_md_for_dprsr.mirror_type.$valid: B1(0) + ig_intr_md.ingress_port: { stage 0: W0(16..24) } + hdr.data.h1: MH4 + hdr.data.b1: MB1 + ig_intr_md_for_tm.ucast_egress_port: { stage 1..20: W0(0..8) } + ig_intr_md_for_tm.ucast_egress_port.$valid: { stage 1..20: B1(1) } + ig_intr_md_for_dprsr.mirror_type: { stage 20: MB0(0..3) } + hdr.data.$valid: B1(2) +stage 0 ingress: + gateway cond-1 0: + name: cond-1 + input_xbar: + exact group 0: { 16: hdr.data.b1 } + row: 7 + bus: 0 + unit: 0 + match: { 0: hdr.data.b1 } + 0x12: + next: END + miss: + next: test_0 + condition: + expression: "(hdr.data.b1 != 18)" + true: test_0 + false: END +stage 2 ingress: + dependency: match + mpr_stage_id: 1 + mpr_bus_dep_glob_exec: 0x0 + mpr_bus_dep_long_brch: 0x0 + mpr_always_run: 0x0 + mpr_next_table_lut: + 0: 0xff + ternary_match test_0 0: + always_run: true + p4: { name: ingress.test, size: 512 } + p4_param_order: + hdr.data.h1: { type: ternary, size: 16, full_size: 16 } + row: 0 + bus: 0 + column: 0 + input_xbar: + ternary group 0: { 0: hdr.data.h1 } + match: + - { group: 0, byte_config: 3, dirtcam: 0x5 } + hit: [ END ] + miss: END + indirect: test_0$tind + ternary_indirect test_0$tind: + row: 0 + bus: 0 + column: 2 + input_xbar: + ternary group 0: { 0: hdr.data.h1 } + format: { action: 0..1, immediate: 2..9 } + action_bus: { 0 : immediate(0..7) } + instruction: test_0$tind(action, $DEFAULT) + actions: + ingress.setb1(1, 1): + - p4_param_order: { val: 8 } + - hit_allowed: { allowed: true } + - default_action: { allowed: true } + - handle: 0x20000002 + - next_table: 0 + - { val_1: immediate(0..7), val: val_1 } + - set MB1, val + ingress.noop(2, 0): + - hit_allowed: { allowed: true } + - default_action: { allowed: true } + - handle: 0x20000003 + - next_table: 0 + - { } + default_action: ingress.setb1 + default_action_parameters: + val: "0xAA" +)GATEWAY_CFG"; + + asm_parse_string(gateway_str); + + Section::process_all(); + + Target::JBay::mau_regs regs; + auto &stages = AsmStage::stages(INGRESS); + stages[0].write_regs(regs, false); + for (auto table : stages[0].tables) { + table->write_regs(regs); + } + + EXPECT_EQ(regs.rams.match.merge.pred_is_a_brch, 0x01); +} + +} // namespace diff --git a/backends/tofino/bf-asm/gtest/gtestasm.cpp b/backends/tofino/bf-asm/gtest/gtestasm.cpp new file mode 100644 index 00000000000..85950ee1b9b --- /dev/null +++ b/backends/tofino/bf-asm/gtest/gtestasm.cpp @@ -0,0 +1,84 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include + +#include "lib/compile_context.h" +#include "lib/log.h" +#include "lib/options.h" + +using namespace P4; + +template +class CompileContext : public virtual BaseCompileContext { + public: + /// @return the current compilation context, which must be of type + /// CompileContext. + static CompileContext &get() { return CompileContextStack::top(); } + + CompileContext() {} + + template + CompileContext(CompileContext &context) + : optionsInstance(context.options()) {} + + /// @return the compiler options for this compilation context. + OptionsType &options() { return optionsInstance; } + + private: + /// The compiler options for this compilation context. + OptionsType optionsInstance; +}; + +class GTestOptions : public Util::Options { + static const char *defaultMessage; + + public: + GTestOptions() : Util::Options(defaultMessage) { + registerOption( + "-T", "loglevel", + [](const char *arg) { + Log::addDebugSpec(arg); + return true; + }, + "[Compiler debugging] Adjust logging level per file (see below)"); + } + std::vector *process(int argc, char *const argv[]) { + auto remainingOptions = Util::Options::process(argc, argv); + return remainingOptions; + } + const char *getIncludePath() const override { return ""; } +}; + +const char *GTestOptions::defaultMessage = "bf-asm gtest"; + +using GTestContext = CompileContext; + +GTEST_API_ int main(int argc, char **argv) { + printf("running gtestasm\n"); + + // process gtest flags + ::testing::InitGoogleTest(&argc, argv); + + // process debug flags + AutoCompileContext autoGTestContext(new GTestContext); + GTestContext::get().options().process(argc, argv); + + return RUN_ALL_TESTS(); +} diff --git a/backends/tofino/bf-asm/gtest/hashexpr.cpp b/backends/tofino/bf-asm/gtest/hashexpr.cpp new file mode 100644 index 00000000000..f89c199472a --- /dev/null +++ b/backends/tofino/bf-asm/gtest/hashexpr.cpp @@ -0,0 +1,118 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/hashexpr.h" + +#include + +#include "backends/tofino/bf-asm/bfas.h" +#include "backends/tofino/bf-asm/stage.h" + +namespace { + +// TEST(hashexpr, slice_with_rand_alg) +// +// Verify that a slice with a random algorithm doesn't loop forever +// +// Warning: If it does loop forever, then the test will hang :( Running through ctest should +// result in an eventual timeout, but running from the command line will hang until Ctrl-C. +TEST(hashexpr, slice_with_rand_alg) { + const char *hash_str = R"HASH_CFG( +version: + target: Tofino2 +phv ingress: + Field1: MW0 + Field2: MW1 + Field3: MH8(0..8) + Field4: MB9 + Hdr.$valid: B3(4) +stage 0 ingress: + hash_action _HashTable 0: + always_run: true + p4: { name: HashTable, size: 1, disable_atomic_modify : true } + row: 0 + result_bus: 1 + hash_dist: + 1: { hash: 1, mask: 0xffff, shift: 0 } + input_xbar: + exact group 2: { 0: Field1, 32: Field2, 64: Field3, 80: Field4 } + hash 4: + 16..31: slice(stripe(crc_rev(0xc002, 0x0, 0x0, 81, { 9: Field2, 41: Field1 }, { })), 0..15) + hash 5: + 16..31: slice(stripe(crc_rev(0xc002, 0x0, 0x0, 81, { 0: Field3, 73: Field4 }, { })), 0..15) + hash group 1: + table: [4, 5] + seed: 0x0 + gateway: + name: cond-81 + input_xbar: + exact group 1: { 36: Hdr.$valid } + row: 1 + bus: 0 + unit: 0 + payload_row: 0 + payload_unit: 1 + payload: 0x1 + format: { action(0): 0..0 } + match: { 4: Hdr.$valid } + 0b***1: END + miss: run_table + condition: + expression: "(Hdr.$valid == 1)" + true: END + false: END + next: END + action_bus: { 108..111 : hash_dist(1) } + instruction: _HashTable(action, $DEFAULT) + actions: + MyAction(1, 7): + - hit_allowed: { allowed: true } + - default_action: { allowed: true } + - handle: 0x20000063 + - next_table: 0 + - set W15(0..15), hash_dist(1, 0..15) + default_action: MyAction +)HASH_CFG"; + + asm_parse_string(hash_str); + + Stage *stage = Stage::stage(INGRESS, 0); + Table *table = stage->tables[0]; + InputXbar &ixbar = *table->input_xbar[0]; + for (auto &kv1 : ixbar.get_hash_tables()) { + // Grab the hash table map + auto &htmap = kv1.second; + for (auto &kv2 : htmap) { + // Get the hash column/hash expression and change the hash algorithm + auto &hc = kv2.second; + auto *he = hc.fn; + he->hash_algorithm.hash_alg = RANDOM_DYN; + } + } + + std::cerr << std::endl + << "If this test hangs then there is a problem with handling of RANDOM_DYN at the " + "hash slice level. Terminate the hang with Ctrl-C." + << std::endl + << std::endl; + Section::process_all(); + + // Reset the target type for future tests + options.target = NO_TARGET; +} + +} // namespace diff --git a/backends/tofino/bf-asm/gtest/mirror.cpp b/backends/tofino/bf-asm/gtest/mirror.cpp new file mode 100644 index 00000000000..bfa377d83e6 --- /dev/null +++ b/backends/tofino/bf-asm/gtest/mirror.cpp @@ -0,0 +1,241 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "backends/tofino/bf-asm/deparser.h" +#include "backends/tofino/bf-asm/sections.h" + +namespace { + +/* Tests for mirror + * + * Currently we cannot run tests for multiple targets (e.g., Tofino and JBay) + * in a single run. As a result, all tests except Tofino are disabled. + */ + +#define TOF_MIRR_CFG regs.header.hir.main_i.mirror_cfg +#define TOF_MIRR_TBL regs.header.hir.main_i.mirror_tbl + +#define JBAY_MIRR_BASE regs.dprsrreg.ho_i +#define JBAY_MIRR_ENTRY him.mirr_hdr_tbl.entry +#define JBAY_MIRR_SEL regs.dprsrreg.inp.ipp.ingr.m_mirr_sel + +#define FTR_MDP_MIRR_BASE regs.mdp_mem.tmm_ext_ram.tmm_ext[0] +#define FTR_DPRSR_MIRR_BASE regs.dprsr.dprsr_phvxb_rspec.ehm_xb + +/// Mirror configuration for Tofino +struct TofinoMirrorCfg { + std::string sel_phv_; + int sel_phv_lo_; + + std::map entry_id_phv; + std::map> entry_phvs; + + TofinoMirrorCfg(std::string sel_phv, int sel_phv_lo) + : sel_phv_(sel_phv), sel_phv_lo_(sel_phv_lo) {} +}; + +/// Mirror configuration for JBay +struct JBayMirrorCfg { + std::string sel_phv_; + int sel_phv_lo_; + + std::string sel_pov_; + int sel_pov_lo_; + + std::map entry_id_phv; + std::map> entry_phvs; + + JBayMirrorCfg(std::string sel_phv, int sel_phv_lo, std::string sel_pov, int sel_pov_lo) + : sel_phv_(sel_phv), sel_phv_lo_(sel_phv_lo), sel_pov_(sel_pov), sel_pov_lo_(sel_pov_lo) {} +}; + +/// Map from register name to Phv::Register* +std::map phvRegs; + +/// Populate register name -> register map +void populateRegIds() { + if (!phvRegs.size()) { + // Initialize the PHVs. + // Triggered by requesting a slice for a field. The field does not need to exist. + Phv::get(INGRESS, 0, "jbay_dummy$"); + + // Walk through the registers and record them + for (int i = 0; i < Phv::num_regs(); ++i) { + if (const auto *reg = Phv::reg(i)) phvRegs[reg->name] = reg; + } + } +} + +/// Get the MAU ID of a given register name +int mau_id(std::string name) { return phvRegs.count(name) ? phvRegs.at(name)->mau_id() : -1; } + +/// Get the deparser ID of a given register name +int deparser_id(std::string name) { + return phvRegs.count(name) ? phvRegs.at(name)->deparser_id() : -1; +} + +/// Find a Digest for a given target +Deparser::Digest *findDigest(Deparser *dprsr, target_t target) { + for (auto &digest : dprsr->digests) { + if (digest.type->target == target) return &digest; + } + + BUG("Could not find the Digest for %s", toString(target).c_str()); + return nullptr; +} + +/** Reset all target information + * + * This function should be called when switching from one target to another + * (e.g., Tofino to JBay) in tests to reset state. + */ +void resetTarget() { + options.target = NO_TARGET; + Phv::test_clear(); + phvRegs.clear(); + Deparser *dprsr = dynamic_cast(Section::test_get("deparser")); + dprsr->gtest_clear(); +} + +/// Verify that registers match a mirror configuration (Tofino) +void tofinoCheckMirrorRegs(Target::Tofino::deparser_regs ®s, TofinoMirrorCfg &cfg) { + populateRegIds(); + + Deparser *dprsr = dynamic_cast(Section::test_get("deparser")); + auto *digest = findDigest(dprsr, TOFINO); + + // Tell the digest code to set the registers + digest->type->setregs(regs, *dprsr, *digest); + + // Verify the registers: + // 1. Verify common registers + EXPECT_EQ(TOF_MIRR_CFG.phv, deparser_id(cfg.sel_phv_)); + EXPECT_EQ(TOF_MIRR_CFG.shft, cfg.sel_phv_lo_); + EXPECT_EQ(TOF_MIRR_CFG.valid, 1); + + // 2. Verify the entries + for (auto &kv : cfg.entry_id_phv) { + int id = kv.first; + EXPECT_EQ(TOF_MIRR_TBL[id].id_phv, deparser_id(cfg.entry_id_phv[id])); + int idx = 0; + for (auto &phv : cfg.entry_phvs[id]) { + EXPECT_EQ(TOF_MIRR_TBL[id].phvs[idx], deparser_id(phv)); + idx++; + } + EXPECT_EQ(TOF_MIRR_TBL[id].len, cfg.entry_phvs[id].size()); + } +} + +/// Verify that registers match a mirror configuration (JBay) +void jbayCheckMirrorRegs(Target::JBay::deparser_regs ®s, JBayMirrorCfg &cfg) { + // Base index for POV PHV. Want this to be non-zero. + const int povBase = 64; + + populateRegIds(); + + Deparser *dprsr = dynamic_cast(Section::test_get("deparser")); + auto *digest = findDigest(dprsr, JBAY); + + // Ensure the POV register in the config is actually recorded as a POV in + // the deparser object + int povReg = mau_id(cfg.sel_pov_); + dprsr->pov[INGRESS][Phv::reg(povReg)] = povBase; + + // Tell the digest code to set the registers + digest->type->setregs(regs, *dprsr, *digest); + + // Verify the registers: + // 1. Verify common registers + EXPECT_EQ(JBAY_MIRR_SEL.phv, deparser_id(cfg.sel_phv_)); + EXPECT_EQ(JBAY_MIRR_SEL.pov, povBase + cfg.sel_pov_lo_); + EXPECT_EQ(JBAY_MIRR_SEL.shft, cfg.sel_phv_lo_); + EXPECT_EQ(JBAY_MIRR_SEL.disable_, 0); + + // 2. Verify the entries + for (auto &base : JBAY_MIRR_BASE) { + for (auto &kv : cfg.entry_id_phv) { + int id = kv.first; + EXPECT_EQ(base.JBAY_MIRR_ENTRY[id].id_phv, deparser_id(cfg.entry_id_phv[id])); + int idx = 0; + for (auto &phv : cfg.entry_phvs[id]) { + EXPECT_EQ(base.JBAY_MIRR_ENTRY[id].phvs[idx], deparser_id(phv)); + idx++; + } + EXPECT_EQ(base.JBAY_MIRR_ENTRY[id].len, cfg.entry_phvs[id].size()); + } + } +} + +TEST(mirror, digest_tofino) { + const char *mirror_str = R"MIRR_CFG( +version: + target: Tofino +deparser ingress: + mirror: + select: B9(0..3) # bit[3..0]: ingress::ig_intr_md_for_dprsr.mirror_type + 1: + - H19(0..7) # bit[7..0]: ingress::Thurmond.Circle.LaUnion[7:0].0-7 + - B9 # ingress::Thurmond.Longwood.Matheson + - B9 # ingress::Thurmond.Longwood.Matheson + - H56(0..8) # bit[8..0]: ingress::Thurmond.Armagh.Moorcroft +)MIRR_CFG"; + + resetTarget(); + + auto *digest = ::get(Deparser::Digest::Type::all[TOFINO][INGRESS], "mirror"); + ASSERT_NE(digest, nullptr) << "Unable to find the mirror digest"; + + Target::Tofino::deparser_regs regs; + asm_parse_string(mirror_str); + + TofinoMirrorCfg mirrorCfg("B9", 0); + mirrorCfg.entry_id_phv[1] = "H19"; + mirrorCfg.entry_phvs[1] = {"B9", "B9", "H56", "H56"}; + tofinoCheckMirrorRegs(regs, mirrorCfg); +} + +TEST(mirror, digest_jbay) { + const char *mirror_str = R"MIRR_CFG( +version: + target: Tofino2 +deparser ingress: + mirror: + select: { B9(0..3): B8(1) } # bit[3..0]: ingress::ig_intr_md_for_dprsr.mirror_type + 1: + - H19(0..7) # bit[7..0]: ingress::Thurmond.Circle.LaUnion[7:0].0-7 + - B9 # ingress::Thurmond.Longwood.Matheson + - B9 # ingress::Thurmond.Longwood.Matheson + - H56(0..8) # bit[8..0]: ingress::Thurmond.Armagh.Moorcroft +)MIRR_CFG"; + + resetTarget(); + + auto *digest = ::get(Deparser::Digest::Type::all[JBAY][INGRESS], "mirror"); + ASSERT_NE(digest, nullptr) << "Unable to find the mirror digest"; + + Target::JBay::deparser_regs regs; + asm_parse_string(mirror_str); + + JBayMirrorCfg mirrorCfg("B9", 0, "B8", 1); + mirrorCfg.entry_id_phv[1] = "H19"; + mirrorCfg.entry_phvs[1] = {"B9", "B9", "H56", "H56"}; + jbayCheckMirrorRegs(regs, mirrorCfg); +} + +} // namespace diff --git a/backends/tofino/bf-asm/gtest/parser-test.cpp b/backends/tofino/bf-asm/gtest/parser-test.cpp new file mode 100644 index 00000000000..f2bed038d8d --- /dev/null +++ b/backends/tofino/bf-asm/gtest/parser-test.cpp @@ -0,0 +1,1055 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "backends/tofino/bf-asm/bfas.h" +#include "backends/tofino/bf-asm/parser-tofino-jbay.h" + +namespace { + +// TEST(parser_test, get_parser_deepest_depth) +// +// +// While calculating the maximum depth, the assembler goes through the parser tree +// and visits every state recursively. The parser depth for a state is taken into account +// and included in the calculation at the time it is visited. +// +// Every state used to be visited at most one time, which was the source of the problem: +// +// In cases where parsing trees contained states that were called from more than one +// parent state, the depth calculation would be wrong unless the depth was at its maximum +// value the first time that state was visited. +// +// Made a change in the parse depth calculation to keep track of the largest parser depth +// "seen" for each state. When a state has already been visited, the recursion continues +// when the current parser depth is larger than the largest parser depth seen up to that +// point for that state. + +// The parser code provided in parser_str below contains that behavior as parse_udp and +// parse_tcp are called from both parse_ipv4 and parse_ipv6, two states with different depths, +// the longest one being parse_ipv6 that is visited after parse_ipv4. Without the fix, +// parser->get_prsr_max_dph() returns 6 instead of 7. +// +TEST(parser_test, get_parser_deepest_depth) { + const char *parser_str = R"PARSER_CFG( +version: + target: Tofino +parser egress: + start: $entry_point + init_zero: [ B19, B18, B16 ] + bitwise_or: [ B16, B18 ] + hdr_len_adj: 27 + meta_opt: 8191 + states: + $entry_point: + *: + load: { byte1 : 27 } + buf_req: 28 + next: start + start: + match: [ byte1 ] + 0x0a: + counter: + imm: 38 + 0..1: H16 # bit[7..15] -> H16 bit[8..0]: egress::eg_intr_md.egress_port + intr_md: 9 + shift: 27 + buf_req: 27 + next: parse_mirror_tagging_state + 0x**: + counter: + imm: 38 + 0..1: H16 # bit[7..15] -> H16 bit[8..0]: egress::eg_intr_md.egress_port + intr_md: 9 + shift: 27 + buf_req: 27 + next: parse_normal_tagging_state + parse_mirror_tagging_state: + *: + counter: dec 1 + B19: 10 # value 10 -> B19 bit[7..0]: egress::eg_md.packet_state + load: { half : 13..14 } + shift: 1 + buf_req: 15 + next: parse_ethernet + parse_ethernet: + match: [ half ] + 0x0800: + counter: dec 14 + 0..1: TH32 # egress::hdr.ethernet.dst_addr[47:32].32-47 + 2..5: TW19 # egress::hdr.ethernet.dst_addr[31:0].0-31 + 6..7: TH31 # egress::hdr.ethernet.src_addr[47:32].32-47 + 8..11: TW18 # egress::hdr.ethernet.src_addr[31:0].0-31 + 12..13: TH30 # egress::hdr.ethernet.ether_type + B18: 1 # value 1 -> B18 bit[0]: egress::hdr.ethernet.$valid + load: { byte1 : 23 } + shift: 14 + buf_req: 24 + next: parse_ipv4 + 0x86dd: + counter: dec 14 + 0..1: TH32 # egress::hdr.ethernet.dst_addr[47:32].32-47 + 2..5: TW19 # egress::hdr.ethernet.dst_addr[31:0].0-31 + 6..7: TH31 # egress::hdr.ethernet.src_addr[47:32].32-47 + 8..11: TW18 # egress::hdr.ethernet.src_addr[31:0].0-31 + 12..13: TH30 # egress::hdr.ethernet.ether_type + B18: 1 # value 1 -> B18 bit[0]: egress::hdr.ethernet.$valid + shift: 14 + buf_req: 14 + next: parse_ipv6 + 0x****: + counter: dec 14 + 0..1: TH32 # egress::hdr.ethernet.dst_addr[47:32].32-47 + 2..5: TW19 # egress::hdr.ethernet.dst_addr[31:0].0-31 + 6..7: TH31 # egress::hdr.ethernet.src_addr[47:32].32-47 + 8..11: TW18 # egress::hdr.ethernet.src_addr[31:0].0-31 + 12..13: TH30 # egress::hdr.ethernet.ether_type + B18: 1 # value 1 -> B18 bit[0]: egress::hdr.ethernet.$valid + shift: 14 + buf_req: 14 + next: min_parse_depth_accept_initial + parse_ipv4: + match: [ byte1 ] + 0x06: + counter: dec 20 + 0..3: TW4 + # - bit[0..3] -> TW4 bit[31..28]: egress::hdr.ipv4.version + # - bit[4..7] -> TW4 bit[27..24]: egress::hdr.ipv4.ihl + # - bit[8..15] -> TW4 bit[23..16]: egress::hdr.ipv4.diffserv + # - bit[16..31] -> TW4 bit[15..0]: egress::hdr.ipv4.total_len + 4..7: TW6 + # - bit[32..47] -> TW6 bit[31..16]: egress::hdr.ipv4.identification + # - bit[48..50] -> TW6 bit[15..13]: egress::hdr.ipv4.flags + # - bit[51..63] -> TW6 bit[12..0]: egress::hdr.ipv4.frag_offset + 8..11: TW5 + # - bit[64..71] -> TW5 bit[31..24]: egress::hdr.ipv4.ttl + # - bit[72..79] -> TW5 bit[23..16]: egress::hdr.ipv4.protocol + # - bit[80..95] -> TW5 bit[15..0]: egress::hdr.ipv4.hdr_checksum + 12..13: TH27 # egress::hdr.ipv4.src_addr[31:16].16-31 + 14..15: TH26 # egress::hdr.ipv4.src_addr[15:0].0-15 + 16..17: TH25 # egress::hdr.ipv4.dst_addr[31:16].16-31 + 18..19: TH24 # egress::hdr.ipv4.dst_addr[15:0].0-15 + B18: 2 # value 1 -> B18 bit[1]: egress::hdr.ipv4.$valid + load: { half : 22..23 } + shift: 20 + buf_req: 24 + next: parse_tcp + 0x11: + counter: dec 20 + 0..3: TW4 + # - bit[0..3] -> TW4 bit[31..28]: egress::hdr.ipv4.version + # - bit[4..7] -> TW4 bit[27..24]: egress::hdr.ipv4.ihl + # - bit[8..15] -> TW4 bit[23..16]: egress::hdr.ipv4.diffserv + # - bit[16..31] -> TW4 bit[15..0]: egress::hdr.ipv4.total_len + 4..7: TW6 + # - bit[32..47] -> TW6 bit[31..16]: egress::hdr.ipv4.identification + # - bit[48..50] -> TW6 bit[15..13]: egress::hdr.ipv4.flags + # - bit[51..63] -> TW6 bit[12..0]: egress::hdr.ipv4.frag_offset + 8..11: TW5 + # - bit[64..71] -> TW5 bit[31..24]: egress::hdr.ipv4.ttl + # - bit[72..79] -> TW5 bit[23..16]: egress::hdr.ipv4.protocol + # - bit[80..95] -> TW5 bit[15..0]: egress::hdr.ipv4.hdr_checksum + 12..13: TH27 # egress::hdr.ipv4.src_addr[31:16].16-31 + 14..15: TH26 # egress::hdr.ipv4.src_addr[15:0].0-15 + 16..17: TH25 # egress::hdr.ipv4.dst_addr[31:16].16-31 + 18..19: TH24 # egress::hdr.ipv4.dst_addr[15:0].0-15 + B18: 2 # value 1 -> B18 bit[1]: egress::hdr.ipv4.$valid + load: { half : 20..21 } + shift: 20 + buf_req: 22 + next: parse_udp + 0x**: + counter: dec 20 + 0..3: TW4 + # - bit[0..3] -> TW4 bit[31..28]: egress::hdr.ipv4.version + # - bit[4..7] -> TW4 bit[27..24]: egress::hdr.ipv4.ihl + # - bit[8..15] -> TW4 bit[23..16]: egress::hdr.ipv4.diffserv + # - bit[16..31] -> TW4 bit[15..0]: egress::hdr.ipv4.total_len + 4..7: TW6 + # - bit[32..47] -> TW6 bit[31..16]: egress::hdr.ipv4.identification + # - bit[48..50] -> TW6 bit[15..13]: egress::hdr.ipv4.flags + # - bit[51..63] -> TW6 bit[12..0]: egress::hdr.ipv4.frag_offset + 8..11: TW5 + # - bit[64..71] -> TW5 bit[31..24]: egress::hdr.ipv4.ttl + # - bit[72..79] -> TW5 bit[23..16]: egress::hdr.ipv4.protocol + # - bit[80..95] -> TW5 bit[15..0]: egress::hdr.ipv4.hdr_checksum + 12..13: TH27 # egress::hdr.ipv4.src_addr[31:16].16-31 + 14..15: TH26 # egress::hdr.ipv4.src_addr[15:0].0-15 + 16..17: TH25 # egress::hdr.ipv4.dst_addr[31:16].16-31 + 18..19: TH24 # egress::hdr.ipv4.dst_addr[15:0].0-15 + B18: 2 # value 1 -> B18 bit[1]: egress::hdr.ipv4.$valid + shift: 20 + buf_req: 20 + next: min_parse_depth_accept_initial + parse_tcp: + match: [ half ] + 0x0050: + counter: dec 20 + 0..1: TH8 # egress::hdr.tcp.src_port + 2..3: TH7 # egress::hdr.tcp.dst_port + 4..7: TW17 # egress::hdr.tcp.seq_no + 8..11: TW16 # egress::hdr.tcp.ack_no + 12: TB5 + # - bit[96..99] -> TB5 bit[7..4]: egress::hdr.tcp.data_offset + # - bit[100..103] -> TB5 bit[3..0]: egress::hdr.tcp.res + 13: TB6 # egress::hdr.tcp.flags + 14..15: TH6 # egress::hdr.tcp.window + 16..19: TW7 + # - bit[128..143] -> TW7 bit[31..16]: egress::hdr.tcp.checksum + # - bit[144..159] -> TW7 bit[15..0]: egress::hdr.tcp.urgent_ptr + B18: 4 # value 1 -> B18 bit[2]: egress::hdr.tcp.$valid + shift: 20 + buf_req: 20 + next: parse_app + 0x01bb: + counter: dec 20 + 0..1: TH8 # egress::hdr.tcp.src_port + 2..3: TH7 # egress::hdr.tcp.dst_port + 4..7: TW17 # egress::hdr.tcp.seq_no + 8..11: TW16 # egress::hdr.tcp.ack_no + 12: TB5 + # - bit[96..99] -> TB5 bit[7..4]: egress::hdr.tcp.data_offset + # - bit[100..103] -> TB5 bit[3..0]: egress::hdr.tcp.res + 13: TB6 # egress::hdr.tcp.flags + 14..15: TH6 # egress::hdr.tcp.window + 16..19: TW7 + # - bit[128..143] -> TW7 bit[31..16]: egress::hdr.tcp.checksum + # - bit[144..159] -> TW7 bit[15..0]: egress::hdr.tcp.urgent_ptr + B18: 4 # value 1 -> B18 bit[2]: egress::hdr.tcp.$valid + shift: 20 + buf_req: 20 + next: parse_app + 0x15b3: + counter: dec 20 + 0..1: TH8 # egress::hdr.tcp.src_port + 2..3: TH7 # egress::hdr.tcp.dst_port + 4..7: TW17 # egress::hdr.tcp.seq_no + 8..11: TW16 # egress::hdr.tcp.ack_no + 12: TB5 + # - bit[96..99] -> TB5 bit[7..4]: egress::hdr.tcp.data_offset + # - bit[100..103] -> TB5 bit[3..0]: egress::hdr.tcp.res + 13: TB6 # egress::hdr.tcp.flags + 14..15: TH6 # egress::hdr.tcp.window + 16..19: TW7 + # - bit[128..143] -> TW7 bit[31..16]: egress::hdr.tcp.checksum + # - bit[144..159] -> TW7 bit[15..0]: egress::hdr.tcp.urgent_ptr + B18: 4 # value 1 -> B18 bit[2]: egress::hdr.tcp.$valid + shift: 20 + buf_req: 20 + next: parse_recirculation + 0x****: + counter: dec 20 + 0..1: TH8 # egress::hdr.tcp.src_port + 2..3: TH7 # egress::hdr.tcp.dst_port + 4..7: TW17 # egress::hdr.tcp.seq_no + 8..11: TW16 # egress::hdr.tcp.ack_no + 12: TB5 + # - bit[96..99] -> TB5 bit[7..4]: egress::hdr.tcp.data_offset + # - bit[100..103] -> TB5 bit[3..0]: egress::hdr.tcp.res + 13: TB6 # egress::hdr.tcp.flags + 14..15: TH6 # egress::hdr.tcp.window + 16..19: TW7 + # - bit[128..143] -> TW7 bit[31..16]: egress::hdr.tcp.checksum + # - bit[144..159] -> TW7 bit[15..0]: egress::hdr.tcp.urgent_ptr + B18: 4 # value 1 -> B18 bit[2]: egress::hdr.tcp.$valid + shift: 20 + buf_req: 20 + next: end + parse_app: + *: + counter: dec 1 + 0: TB4 # egress::hdr.app.byte + B18: 8 # value 1 -> B18 bit[3]: egress::hdr.app.$valid + shift: 1 + buf_req: 1 + next: end + parse_recirculation: + *: + counter: dec 3 + 0: B17 # egress::hdr.recir.packet_state + 1..2: TH33 # egress::hdr.recir.pattern_state_machine_state + B18: 16 # value 1 -> B18 bit[4]: egress::hdr.recir.$valid + shift: 3 + buf_req: 3 + next: parse_app + parse_udp: + match: [ half ] + 0x0035: + counter: dec 8 + 0..1: TH7 # egress::hdr.udp.src_port + 2..3: TH6 # egress::hdr.udp.dst_port + 4..7: TW7 + # - bit[32..47] -> TW7 bit[31..16]: egress::hdr.udp.hdr_length + # - bit[48..63] -> TW7 bit[15..0]: egress::hdr.udp.checksum + B18: 32 # value 1 -> B18 bit[5]: egress::hdr.udp.$valid + shift: 8 + buf_req: 8 + next: parse_app + 0x15b3: + counter: dec 8 + 0..1: TH7 # egress::hdr.udp.src_port + 2..3: TH6 # egress::hdr.udp.dst_port + 4..7: TW7 + # - bit[32..47] -> TW7 bit[31..16]: egress::hdr.udp.hdr_length + # - bit[48..63] -> TW7 bit[15..0]: egress::hdr.udp.checksum + B18: 32 # value 1 -> B18 bit[5]: egress::hdr.udp.$valid + shift: 8 + buf_req: 8 + next: parse_recirculation + 0x****: + counter: dec 8 + 0..1: TH7 # egress::hdr.udp.src_port + 2..3: TH6 # egress::hdr.udp.dst_port + 4..7: TW7 + # - bit[32..47] -> TW7 bit[31..16]: egress::hdr.udp.hdr_length + # - bit[48..63] -> TW7 bit[15..0]: egress::hdr.udp.checksum + B18: 32 # value 1 -> B18 bit[5]: egress::hdr.udp.$valid + shift: 8 + buf_req: 8 + next: end + min_parse_depth_accept_initial: + match: [ ctr_neg, ctr_zero ] + 0x0: + counter: dec 11 + 0: TB4 # egress::hdr.prsr_pad_0[0].blob[87:80].80-87 + 1..2: TH28 # egress::hdr.prsr_pad_0[0].blob[79:64].64-79 + 3..4: TH7 # egress::hdr.prsr_pad_0[0].blob[63:48].48-63 + 5..6: TH6 # egress::hdr.prsr_pad_0[0].blob[47:32].32-47 + 7..10: TW7 # egress::hdr.prsr_pad_0[0].blob[31:0].0-31 + B16: 4 # value 4 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 11 + buf_req: 11 + next: min_parse_depth_accept_loop.$split_0 + 0b**: + buf_req: 0 + next: end + min_parse_depth_accept_loop.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + counter: dec 11 + 0: TB5 # egress::hdr.prsr_pad_0[1].blob[87:80].80-87 + 1..2: TH29 # egress::hdr.prsr_pad_0[1].blob[79:64].64-79 + 3..4: TH11 # egress::hdr.prsr_pad_0[1].blob[63:48].48-63 + 5..6: TH10 # egress::hdr.prsr_pad_0[1].blob[47:32].32-47 + 7..8: TH9 # egress::hdr.prsr_pad_0[1].blob[31:16].16-31 + B16: 2 # value 2 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 9 + buf_req: 9 + next: min_parse_depth_accept_loop.$it1.$split_0 + 0b**: + buf_req: 0 + next: end + min_parse_depth_accept_loop.$it1.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + 0..1: TH8 # egress::hdr.prsr_pad_0[1].blob[15:0].0-15 + shift: 2 + buf_req: 2 + next: min_parse_depth_accept_loop.$it2 + 0b**: + 0..1: TH8 # egress::hdr.prsr_pad_0[1].blob[15:0].0-15 + shift: 2 + buf_req: 2 + next: end + min_parse_depth_accept_loop.$it2: + *: + counter: dec 11 + 0: TB6 # egress::hdr.prsr_pad_0[2].blob[87:80].80-87 + 1: TB16 # egress::hdr.prsr_pad_0[2].blob[79:72].72-79 + 2: TB7 # egress::hdr.prsr_pad_0[2].blob[71:64].64-71 + 3..6: TW17 # egress::hdr.prsr_pad_0[2].blob[63:32].32-63 + 7..10: TW16 # egress::hdr.prsr_pad_0[2].blob[31:0].0-31 + B16: 1 # value 1 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 11 + buf_req: 11 + next: min_parse_depth_accept_loop.$it2.$split_0 + min_parse_depth_accept_loop.$it2.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + buf_req: 0 + next: end + 0b**: + buf_req: 0 + next: end + parse_ipv6: + *: + counter: dec 40 + 0..3: TW5 + # - bit[0..3] -> TW5 bit[31..28]: egress::hdr.ipv6.version + # - bit[4..11] -> TW5 bit[27..20]: egress::hdr.ipv6.traffic_class + # - bit[12..31] -> TW5 bit[19..0]: egress::hdr.ipv6.flow_label + 4..7: TW4 + # - bit[32..47] -> TW4 bit[31..16]: egress::hdr.ipv6.payload_len + # - bit[48..55] -> TW4 bit[15..8]: egress::hdr.ipv6.next_hdr + # - bit[56..63] -> TW4 bit[7..0]: egress::hdr.ipv6.hop_limit + 8..11: TW21 # egress::hdr.ipv6.src_addr[127:96].96-127 + 12..15: TW20 # egress::hdr.ipv6.src_addr[95:64].64-95 + 16: TB16 # egress::hdr.ipv6.src_addr[63:56].56-63 + 17: TB7 # egress::hdr.ipv6.src_addr[55:48].48-55 + 18..19: TH29 # egress::hdr.ipv6.src_addr[47:32].32-47 + 20..21: TH28 # egress::hdr.ipv6.src_addr[31:16].16-31 + 22..23: TH27 # egress::hdr.ipv6.src_addr[15:0].0-15 + 24..25: TH26 # egress::hdr.ipv6.dst_addr[127:112].112-127 + B18: 64 # value 1 -> B18 bit[6]: egress::hdr.ipv6.$valid + load: { byte1 : 6 } + shift: 26 + buf_req: 26 + next: parse_ipv6.$split_0 + parse_ipv6.$split_0: + *: + 0..1: TH25 # egress::hdr.ipv6.dst_addr[111:96].96-111 + 2..3: TH24 # egress::hdr.ipv6.dst_addr[95:80].80-95 + 4..5: TH11 # egress::hdr.ipv6.dst_addr[79:64].64-79 + 6..7: TH10 # egress::hdr.ipv6.dst_addr[63:48].48-63 + 10..13: TW6 # egress::hdr.ipv6.dst_addr[31:0].0-31 + shift: 8 + buf_req: 14 + next: parse_ipv6.$split_1 + parse_ipv6.$split_1: + match: [ byte1 ] + 0x06: + 0..1: TH9 # egress::hdr.ipv6.dst_addr[47:32].32-47 + load: { half : 8..9 } + shift: 6 + buf_req: 10 + next: parse_tcp + 0x11: + 0..1: TH9 # egress::hdr.ipv6.dst_addr[47:32].32-47 + load: { half : 6..7 } + shift: 6 + buf_req: 8 + next: parse_udp + 0x**: + 0..1: TH9 # egress::hdr.ipv6.dst_addr[47:32].32-47 + shift: 6 + buf_req: 6 + next: end + parse_normal_tagging_state: + *: + B19: 1 # value 1 -> B19 bit[7..0]: egress::eg_md.packet_state + load: { half : 12..13 } + buf_req: 14 + next: parse_ethernet +)PARSER_CFG"; + + options.target = NO_TARGET; + Phv::test_clear(); + + createSingleAsmParser(); + AsmParser *asm_parser = dynamic_cast(::asm_parser); + asm_parse_string(parser_str); + std::vector parser_vector = asm_parser->test_get_parser(EGRESS); + EXPECT_GT(parser_vector.size(), 0); + Parser *parser = parser_vector.back(); + parser->process(); + EXPECT_EQ(parser->get_prsr_max_dph(), 4); +} + +// TEST(parser_test, get_parser_deepest_depth_loop_no_stack) +// +// verify that parser with loops that do not store into +// header stacks are supported and that the parser max +// depth is set to the maximum supported by the target. +// +TEST(parser_test, get_parser_depth_loop_no_stack) { + const char *parser_str = R"PARSER_CFG( +version: + target: Tofino +parser egress: + start: $entry_point.start + init_zero: [ B17, B16 ] + bitwise_or: [ B16, B17 ] + hdr_len_adj: 27 + meta_opt: 8191 + states: + $entry_point.start: + *: + counter: + imm: 65 + 0..1: H16 # bit[7..15] -> H16 bit[8..0]: egress::eg_intr_md.egress_port + 27..28: TH14 # egress::hdr.ether.dstAddr[47:32].32-47 + B17: 1 # value 1 -> B17 bit[0]: egress::hdr.ether.$valid + intr_md: 9 + shift: 29 + buf_req: 29 + next: $entry_point.start.$split_0 + $entry_point.start.$split_0: + *: + counter: dec 27 + 0..3: TW5 # egress::hdr.ether.dstAddr[31:0].0-31 + 4..5: TH13 # egress::hdr.ether.srcAddr[47:32].32-47 + 6..9: TW4 # egress::hdr.ether.srcAddr[31:0].0-31 + 10..11: TH12 # egress::hdr.ether.etherType + load: { half : 10..11 } + shift: 12 + buf_req: 12 + next: $entry_point.start.$split_1 + $entry_point.start.$split_1: + *: + counter: dec 14 + buf_req: 0 + next: L3_start_0 + L3_start_0: + match: [ half ] + 0x0800: + counter: dec 1 + 0: TB4 # egress::hdr.h.a + B17: 2 # value 1 -> B17 bit[1]: egress::hdr.h.$valid + shift: 1 + buf_req: 1 + next: min_parse_depth_accept_initial + 0x8100: + counter: dec 2 + 0: TB9 # egress::hdr.i.etherType[15:8].8-15 + 1: TB8 # egress::hdr.i.etherType[7:0].0-7 + B17: 4 # value 1 -> B17 bit[2]: egress::hdr.i.$valid + shift: 2 + buf_req: 2 + next: L3_start_0 + 0x****: + buf_req: 0 + next: min_parse_depth_accept_initial + min_parse_depth_accept_initial: + match: [ ctr_neg, ctr_zero ] + 0x0: + counter: dec 11 + 0: TB5 # egress::hdr.prsr_pad_0[0].blob[87:80].80-87 + 1..2: TH15 # egress::hdr.prsr_pad_0[0].blob[79:64].64-79 + 3..6: TW7 # egress::hdr.prsr_pad_0[0].blob[63:32].32-63 + 7..10: TW6 # egress::hdr.prsr_pad_0[0].blob[31:0].0-31 + B16: 4 # value 4 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 11 + buf_req: 11 + next: min_parse_depth_accept_loop.$split_0 + 0b**: + buf_req: 0 + next: end + min_parse_depth_accept_loop.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + counter: dec 11 + 0: TB6 # egress::hdr.prsr_pad_0[1].blob[87:80].80-87 + 1..2: TH16 # egress::hdr.prsr_pad_0[1].blob[79:64].64-79 + 3..4: TH9 # egress::hdr.prsr_pad_0[1].blob[63:48].48-63 + 5..6: TH8 # egress::hdr.prsr_pad_0[1].blob[47:32].32-47 + 7..8: TH7 # egress::hdr.prsr_pad_0[1].blob[31:16].16-31 + B16: 2 # value 2 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 9 + buf_req: 9 + next: min_parse_depth_accept_loop.$it1.$split_0 + 0b**: + buf_req: 0 + next: end + min_parse_depth_accept_loop.$it1.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + 0..1: TH6 # egress::hdr.prsr_pad_0[1].blob[15:0].0-15 + shift: 2 + buf_req: 2 + next: min_parse_depth_accept_loop.$it2 + 0b**: + 0..1: TH6 # egress::hdr.prsr_pad_0[1].blob[15:0].0-15 + shift: 2 + buf_req: 2 + next: end + min_parse_depth_accept_loop.$it2: + *: + counter: dec 11 + 0: TB7 # egress::hdr.prsr_pad_0[2].blob[87:80].80-87 + 1..2: TH17 # egress::hdr.prsr_pad_0[2].blob[79:64].64-79 + 3..6: TW8 # egress::hdr.prsr_pad_0[2].blob[63:32].32-63 + 7..8: TH11 # egress::hdr.prsr_pad_0[2].blob[31:16].16-31 + 9..10: TH10 # egress::hdr.prsr_pad_0[2].blob[15:0].0-15 + B16: 1 # value 1 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 11 + buf_req: 11 + next: min_parse_depth_accept_loop.$it2.$split_0 + min_parse_depth_accept_loop.$it2.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + buf_req: 0 + next: end + 0b**: + buf_req: 0 + next: end +)PARSER_CFG"; + + options.target = NO_TARGET; + Phv::test_clear(); + + createSingleAsmParser(); + AsmParser *asm_parser = dynamic_cast(::asm_parser); + asm_parse_string(parser_str); + std::vector parser_vector = asm_parser->test_get_parser(EGRESS); + EXPECT_GT(parser_vector.size(), 0); + Parser *parser = parser_vector.back(); + parser->process(); + EXPECT_EQ(parser->get_prsr_max_dph(), 0x3ff - 1); +} + +// TEST(parser_test, get_parser_depth_loop_with_stack) +// +// verify that when a parser has loops that store into header +// stacks, that the max parser depth is set according to the +// number of entries in the stack. +// +TEST(parser_test, get_parser_depth_loop_with_stack) { + const char *parser_str = R"PARSER_CFG( +version: + target: Tofino +phv egress: + eg_intr_md.egress_port: H17(0..8) + hdr.vlan$0.pcp: TW0(29..31) + hdr.vlan$0.dei: TW0(28) + hdr.vlan$0.vid: TW0(16..27) + hdr.vlan$0.ether_type: TW0(0..15) + hdr.vlan$1.pcp: TW1(29..31) + hdr.vlan$1.dei: TW1(28) + hdr.vlan$1.vid: TW1(16..27) + hdr.vlan$1.ether_type: TW1(0..15) + hdr.vlan$2.pcp: TW2(29..31) + hdr.vlan$2.dei: TW2(28) + hdr.vlan$2.vid: TW2(16..27) + hdr.vlan$2.ether_type: TW2(0..15) + hdr.vlan$3.pcp: TW3(29..31) + hdr.vlan$3.dei: TW3(28) + hdr.vlan$3.vid: TW3(16..27) + hdr.vlan$3.ether_type: TW3(0..15) + hdr.vlan$4.pcp: TH1(13..15) + hdr.vlan$4.dei: TH1(12) + hdr.vlan$4.vid: TH1(0..11) + hdr.vlan$4.ether_type: TH0 + hdr.vlan$5.pcp: TH3(13..15) + hdr.vlan$5.dei: TH3(12) + hdr.vlan$5.vid: TH3(0..11) + hdr.vlan$5.ether_type: TH2 + hdr.vlan$6.pcp: TH5(13..15) + hdr.vlan$6.dei: TH5(12) + hdr.vlan$6.vid: TH5(0..11) + hdr.vlan$6.ether_type: TH4 + hdr.vlan$7.pcp: TW12(29..31) + hdr.vlan$7.dei: TW12(28) + hdr.vlan$7.vid: TW12(16..27) + hdr.vlan$7.ether_type: TW12(0..15) + hdr.vlan$8.pcp: TW13(29..31) + hdr.vlan$8.dei: TW13(28) + hdr.vlan$8.vid: TW13(16..27) + hdr.vlan$8.ether_type: TW13(0..15) + hdr.vlan$9.pcp: TW14(29..31) + hdr.vlan$9.dei: TW14(28) + hdr.vlan$9.vid: TW14(16..27) + hdr.vlan$9.ether_type: TW14(0..15) + hdr.vlan$10.pcp: TW15(29..31) + hdr.vlan$10.dei: TW15(28) + hdr.vlan$10.vid: TW15(16..27) + hdr.vlan$10.ether_type: TW15(0..15) + hdr.vlan$11.pcp: TH19(13..15) + hdr.vlan$11.dei: TH19(12) + hdr.vlan$11.vid: TH19(0..11) + hdr.vlan$11.ether_type: TH18 + hdr.vlan$12.pcp: TH21(13..15) + hdr.vlan$12.dei: TH21(12) + hdr.vlan$12.vid: TH21(0..11) + hdr.vlan$12.ether_type: TH20 + hdr.vlan$13.pcp: TH23(13..15) + hdr.vlan$13.dei: TH23(12) + hdr.vlan$13.vid: TH23(0..11) + hdr.vlan$13.ether_type: TH22 + hdr.vlan$14.pcp: TB13(5..7) + hdr.vlan$14.dei: TB13(4) + hdr.vlan$14.vid.0-7: TB14 + hdr.vlan$14.vid.8-11: TB13(0..3) + hdr.vlan$14.ether_type.0-7: TB3 + hdr.vlan$14.ether_type.8-15: TB12 + hdr.prsr_pad_0$0.blob.0-31: TW20 + hdr.prsr_pad_0$0.blob.32-63: TW21 + hdr.prsr_pad_0$0.blob.64-79: TH36 + hdr.prsr_pad_0$0.blob.80-87: TB0 + hdr.prsr_pad_0$1.blob.0-31: TW22 + hdr.prsr_pad_0$1.blob.32-63: TW23 + hdr.prsr_pad_0$1.blob.64-79: TH37 + hdr.prsr_pad_0$1.blob.80-87: TB1 + hdr.prsr_pad_0$2.blob.0-15: TH30 + hdr.prsr_pad_0$2.blob.16-31: TH31 + hdr.prsr_pad_0$2.blob.32-47: TH32 + hdr.prsr_pad_0$2.blob.48-63: TH33 + hdr.prsr_pad_0$2.blob.64-79: TH38 + hdr.prsr_pad_0$2.blob.80-87: TB2 + hdr.eth.dst_addr.0-7: TB15 + hdr.eth.dst_addr.8-15: TB20 + hdr.eth.dst_addr.16-23: TB21 + hdr.eth.dst_addr.24-31: TB22 + hdr.eth.dst_addr.32-47: TH41 + hdr.eth.src_addr.0-15: TH34 + hdr.eth.src_addr.16-31: TH35 + hdr.eth.src_addr.32-47: TH40 + hdr.eth.ethertype: TH39 + hdr.eth.$valid: B17(0) + hdr.vlan.$stkvalid: H16(0..14) + hdr.vlan$0.$valid: H16(14) + hdr.vlan$1.$valid: H16(13) + hdr.vlan$2.$valid: H16(12) + hdr.vlan$3.$valid: H16(11) + hdr.vlan$4.$valid: H16(10) + hdr.vlan$5.$valid: H16(9) + hdr.vlan$6.$valid: H16(8) + hdr.vlan$7.$valid: H16(7) + hdr.vlan$8.$valid: H16(6) + hdr.vlan$9.$valid: H16(5) + hdr.vlan$10.$valid: H16(4) + hdr.vlan$11.$valid: H16(3) + hdr.vlan$12.$valid: H16(2) + hdr.vlan$13.$valid: H16(1) + hdr.vlan$14.$valid: H16(0) + hdr.prsr_pad_0.$stkvalid: B16(0..2) + hdr.prsr_pad_0$0.$valid: B16(2) + hdr.prsr_pad_0$1.$valid: B16(1) + hdr.prsr_pad_0$2.$valid: B16(0) + context_json: + B16: + - { name : hdr.prsr_pad_0$0.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.prsr_pad_0.$stkvalid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.prsr_pad_0$1.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.prsr_pad_0$2.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + B17: + - { name : hdr.eth.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + H16: + - { name : hdr.vlan$0.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan.$stkvalid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$1.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$2.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$3.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$4.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$5.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$6.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$7.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$8.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$9.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$10.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$11.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$12.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$13.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + - { name : hdr.vlan$14.$valid, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } + H17: + - { name : eg_intr_md.egress_port, live_start : parser, live_end : deparser, mutually_exclusive_with: [ ] } +parser egress: + start: $entry_point + init_zero: [ B17, H16, B16 ] + bitwise_or: [ TH39, B16, H16 ] + hdr_len_adj: 27 + meta_opt: 8191 + states: + $entry_point: + *: + counter: + imm: 24 + 0..1: H17 # bit[7..15] -> H17 bit[8..0]: egress::eg_intr_md.egress_port + 27..28: TH41 # egress::hdr.eth.dst_addr[47:32].32-47 + 29: TB22 # egress::hdr.eth.dst_addr[31:24].24-31 + 30: TB21 # egress::hdr.eth.dst_addr[23:16].16-23 + 31: TB20 # egress::hdr.eth.dst_addr[15:8].8-15 + B17: 1 # value 1 -> B17 bit[0]: egress::hdr.eth.$valid + intr_md: 9 + shift: 32 + buf_req: 32 + next: start.$oob_stall_0 + start.$oob_stall_0: + *: + load: { half : 7..8 } + buf_req: 9 + next: start.$split_0 + start.$split_0: + match: [ half ] + 0x8100: + 0: TB15 # egress::hdr.eth.dst_addr[7:0].0-7 + 1..2: TH40 # egress::hdr.eth.src_addr[47:32].32-47 + 3..4: TH35 # egress::hdr.eth.src_addr[31:16].16-31 + 5..6: TH34 # egress::hdr.eth.src_addr[15:0].0-15 + 7..8: TH39 # egress::hdr.eth.ethertype + load: { half : 11..12 } + shift: 9 + buf_req: 13 + next: CommonParser_parse_vlan_0 + 0x****: + 0: TB15 # egress::hdr.eth.dst_addr[7:0].0-7 + 1..2: TH40 # egress::hdr.eth.src_addr[47:32].32-47 + 3..4: TH35 # egress::hdr.eth.src_addr[31:16].16-31 + 5..6: TH34 # egress::hdr.eth.src_addr[15:0].0-15 + 7..8: TH39 # egress::hdr.eth.ethertype + shift: 9 + buf_req: 9 + next: min_parse_depth_accept_initial + CommonParser_parse_vlan_0: + match: [ half ] + 0x8100: + counter: dec 4 + 0..3: TW0 + # - bit[0..2] -> TW0 bit[31..29]: egress::hdr.vlan[0].pcp + # - bit[3] -> TW0 bit[28]: egress::hdr.vlan[0].dei + # - bit[4..15] -> TW0 bit[27..16]: egress::hdr.vlan[0].vid + # - bit[16..31] -> TW0 bit[15..0]: egress::hdr.vlan[0].ether_type + H16: 16384 # value 16384 -> H16 bit[14..0]: egress::hdr.vlan.$stkvalid + TH39: 2 # value 2 -> TH39 bit[15..0]: egress::hdr.eth.ethertype + load: { half : 2..3 } + shift: 4 + buf_req: 4 + offset_inc: 1 + next: CommonParser_parse_vlan_0 + 0x****: + counter: dec 4 + 0..3: TW0 + # - bit[0..2] -> TW0 bit[31..29]: egress::hdr.vlan[0].pcp + # - bit[3] -> TW0 bit[28]: egress::hdr.vlan[0].dei + # - bit[4..15] -> TW0 bit[27..16]: egress::hdr.vlan[0].vid + # - bit[16..31] -> TW0 bit[15..0]: egress::hdr.vlan[0].ether_type + H16: 16384 # value 16384 -> H16 bit[14..0]: egress::hdr.vlan.$stkvalid + TH39: 2 # value 2 -> TH39 bit[15..0]: egress::hdr.eth.ethertype + shift: 4 + buf_req: 4 + offset_inc: 1 + next: min_parse_depth_accept_initial + min_parse_depth_accept_initial: + match: [ ctr_neg, ctr_zero ] + 0x0: + counter: dec 11 + 0: TB0 # egress::hdr.prsr_pad_0[0].blob[87:80].80-87 + 1..2: TH36 # egress::hdr.prsr_pad_0[0].blob[79:64].64-79 + 3..6: TW21 # egress::hdr.prsr_pad_0[0].blob[63:32].32-63 + 7..10: TW20 # egress::hdr.prsr_pad_0[0].blob[31:0].0-31 + B16: 4 # value 4 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 11 + buf_req: 11 + next: min_parse_depth_accept_loop.$split_0 + 0b**: + buf_req: 0 + next: end + min_parse_depth_accept_loop.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + counter: dec 11 + 0: TB1 # egress::hdr.prsr_pad_0[1].blob[87:80].80-87 + 1..2: TH37 # egress::hdr.prsr_pad_0[1].blob[79:64].64-79 + 3..6: TW23 # egress::hdr.prsr_pad_0[1].blob[63:32].32-63 + 7..10: TW22 # egress::hdr.prsr_pad_0[1].blob[31:0].0-31 + B16: 2 # value 2 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 11 + buf_req: 11 + next: min_parse_depth_accept_loop.$it1.$split_0 + 0b**: + buf_req: 0 + next: end + min_parse_depth_accept_loop.$it1.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + counter: dec 11 + 0: TB2 # egress::hdr.prsr_pad_0[2].blob[87:80].80-87 + 1..2: TH38 # egress::hdr.prsr_pad_0[2].blob[79:64].64-79 + 3..4: TH33 # egress::hdr.prsr_pad_0[2].blob[63:48].48-63 + 5..6: TH32 # egress::hdr.prsr_pad_0[2].blob[47:32].32-47 + 7..8: TH31 # egress::hdr.prsr_pad_0[2].blob[31:16].16-31 + B16: 1 # value 1 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 9 + buf_req: 9 + next: min_parse_depth_accept_loop.$it2.$split_0 + 0b**: + buf_req: 0 + next: end + min_parse_depth_accept_loop.$it2.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + 0..1: TH30 # egress::hdr.prsr_pad_0[2].blob[15:0].0-15 + shift: 2 + buf_req: 2 + next: end + 0b**: + 0..1: TH30 # egress::hdr.prsr_pad_0[2].blob[15:0].0-15 + shift: 2 + buf_req: 2 + next: end +)PARSER_CFG"; + + options.target = NO_TARGET; + Phv::test_clear(); + + createSingleAsmParser(); + AsmParser *asm_parser = dynamic_cast(::asm_parser); + asm_parse_string(parser_str); + std::vector parser_vector = asm_parser->test_get_parser(EGRESS); + EXPECT_GT(parser_vector.size(), 0); + Parser *parser = parser_vector.back(); + parser->process(); + EXPECT_EQ(parser->get_prsr_max_dph(), 6); +} + +// TEST(parser_test, get_parser_depth_untaken_path) +// +// verify that untaken paths are not considered +// in the parser depth calculation. +// +TEST(parser_test, get_parser_depth_untaken_path) { + const char *parser_str = R"PARSER_CFG( +version: + target: Tofino +parser egress: + start: $entry_point.start + init_zero: [ B17, B16 ] + bitwise_or: [ TH15, B16, B17 ] + hdr_len_adj: 27 + meta_opt: 8191 + states: + $entry_point.start: + *: + counter: + imm: 38 + 0..1: H16 # bit[7..15] -> H16 bit[8..0]: egress::eg_intr_md.egress_port + intr_md: 9 + shift: 27 + buf_req: 27 + next: $entry_point.start.$oob_stall_0 + $entry_point.start.$oob_stall_0: + *: + load: { half : 12..13 } + buf_req: 14 + next: CommonParser_start_0 + CommonParser_start_0: + match: [ half ] + 0x****: + counter: dec 14 + 0..1: TH17 # egress::hdr.eth.dst_addr[47:32].32-47 + 2..5: TW9 # egress::hdr.eth.dst_addr[31:0].0-31 + 6..7: TH16 # egress::hdr.eth.src_addr[47:32].32-47 + 8..11: TW8 # egress::hdr.eth.src_addr[31:0].0-31 + 12..13: TH15 # egress::hdr.eth.ethertype + B17: 1 # value 1 -> B17 bit[0]: egress::hdr.eth.$valid + shift: 14 + buf_req: 14 + next: min_parse_depth_accept_initial + 0x8100: + counter: dec 14 + 0..1: TH17 # egress::hdr.eth.dst_addr[47:32].32-47 + 2..5: TW9 # egress::hdr.eth.dst_addr[31:0].0-31 + 6..7: TH16 # egress::hdr.eth.src_addr[47:32].32-47 + 8..11: TW8 # egress::hdr.eth.src_addr[31:0].0-31 + 12..13: TH15 # egress::hdr.eth.ethertype + B17: 1 # value 1 -> B17 bit[0]: egress::hdr.eth.$valid + load: { half : 16..17 } + shift: 14 + buf_req: 18 + next: CommonParser_parse_vlan_0 + min_parse_depth_accept_initial: + match: [ ctr_neg, ctr_zero ] + 0x0: + counter: dec 11 + 0: TB0 # egress::hdr.prsr_pad_0[0].blob[87:80].80-87 + 1..2: TH12 # egress::hdr.prsr_pad_0[0].blob[79:64].64-79 + 3..6: TW2 # egress::hdr.prsr_pad_0[0].blob[63:32].32-63 + 7..10: TW1 # egress::hdr.prsr_pad_0[0].blob[31:0].0-31 + B16: 4 # value 4 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 11 + buf_req: 11 + next: min_parse_depth_accept_loop.$split_0 + 0b**: + buf_req: 0 + next: end + min_parse_depth_accept_loop.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + counter: dec 11 + 0: TB1 # egress::hdr.prsr_pad_0[1].blob[87:80].80-87 + 1..2: TH13 # egress::hdr.prsr_pad_0[1].blob[79:64].64-79 + 3..4: TH1 # egress::hdr.prsr_pad_0[1].blob[63:48].48-63 + 5..6: TH0 # egress::hdr.prsr_pad_0[1].blob[47:32].32-47 + 7..10: TW3 # egress::hdr.prsr_pad_0[1].blob[31:0].0-31 + B16: 2 # value 2 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 11 + buf_req: 11 + next: min_parse_depth_accept_loop.$it1.$split_0 + 0b**: + buf_req: 0 + next: end + min_parse_depth_accept_loop.$it1.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + counter: dec 11 + 0: TB2 # egress::hdr.prsr_pad_0[2].blob[87:80].80-87 + 1..2: TH14 # egress::hdr.prsr_pad_0[2].blob[79:64].64-79 + 3..4: TH5 # egress::hdr.prsr_pad_0[2].blob[63:48].48-63 + 5..6: TH4 # egress::hdr.prsr_pad_0[2].blob[47:32].32-47 + 7..8: TH3 # egress::hdr.prsr_pad_0[2].blob[31:16].16-31 + B16: 1 # value 1 -> B16 bit[2..0]: egress::hdr.prsr_pad_0.$stkvalid + shift: 9 + buf_req: 9 + next: min_parse_depth_accept_loop.$it2.$split_0 + 0b**: + buf_req: 0 + next: end + min_parse_depth_accept_loop.$it2.$split_0: + match: [ ctr_neg, ctr_zero ] + 0x0: + 0..1: TH2 # egress::hdr.prsr_pad_0[2].blob[15:0].0-15 + shift: 2 + buf_req: 2 + next: end + 0b**: + 0..1: TH2 # egress::hdr.prsr_pad_0[2].blob[15:0].0-15 + shift: 2 + buf_req: 2 + next: end + CommonParser_parse_vlan_0: + match: [ half ] + 0x8100: + counter: dec 4 + 0..3: TW0 + # - bit[0..2] -> TW0 bit[31..29]: egress::hdr.vlan.pcp + # - bit[3] -> TW0 bit[28]: egress::hdr.vlan.dei + # - bit[4..15] -> TW0 bit[27..16]: egress::hdr.vlan.vid + # - bit[16..31] -> TW0 bit[15..0]: egress::hdr.vlan.ether_type + B17: 2 # value 1 -> B17 bit[1]: egress::hdr.vlan.$valid + TH15: 2 # value 2 -> TH15 bit[15..0]: egress::hdr.eth.ethertype + load: { half : 16..17 } + shift: 4 + buf_req: 18 + next: CommonParser_start_0 + 0x****: + counter: dec 4 + 0..3: TW0 + # - bit[0..2] -> TW0 bit[31..29]: egress::hdr.vlan.pcp + # - bit[3] -> TW0 bit[28]: egress::hdr.vlan.dei + # - bit[4..15] -> TW0 bit[27..16]: egress::hdr.vlan.vid + # - bit[16..31] -> TW0 bit[15..0]: egress::hdr.vlan.ether_type + B17: 2 # value 1 -> B17 bit[1]: egress::hdr.vlan.$valid + TH15: 2 # value 2 -> TH15 bit[15..0]: egress::hdr.eth.ethertype + shift: 4 + buf_req: 4 + next: min_parse_depth_accept_initial +)PARSER_CFG"; + + options.target = NO_TARGET; + Phv::test_clear(); + + createSingleAsmParser(); + AsmParser *asm_parser = dynamic_cast(::asm_parser); + asm_parse_string(parser_str); + std::vector parser_vector = asm_parser->test_get_parser(EGRESS); + EXPECT_GT(parser_vector.size(), 0); + Parser *parser = parser_vector.back(); + parser->process(); + EXPECT_EQ(parser->get_prsr_max_dph(), 4); +} + +} // namespace diff --git a/backends/tofino/bf-asm/gtest/register-matcher.cpp b/backends/tofino/bf-asm/gtest/register-matcher.cpp new file mode 100644 index 00000000000..77b5abf64ce --- /dev/null +++ b/backends/tofino/bf-asm/gtest/register-matcher.cpp @@ -0,0 +1,175 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/gtest/register-matcher.h" + +#include +#include + +namespace BfAsm { + +namespace Test { + +RegisterMatcher::RegisterMatcher(const char *spec) : bitsize(0) { + enum State { + INIT, + IDENT, + WIDTH, + BIN_VALUE, + OCT_VALUE, + HEX_VALUE, + } state(INIT); + uint32_t width(0); + bitvec value; + uint8_t digit(0); + bool negate(false); + + while (true) { + switch (state) { + case INIT: + if (std::isdigit(*spec)) { + width = *spec - '0'; + state = WIDTH; + } else if (*spec == '~') { + negate = !negate; + } else if (std::isalpha(*spec) || *spec == '_') { + /* -- ignore identifiers in the spec */ + state = IDENT; + } + break; + + case IDENT: + if (*spec == '~') { + state = INIT; + negate = true; + } else if (!std::isalpha(*spec) && !std::isdigit(*spec) && *spec != '_') { + state = INIT; + negate = false; + } + break; + + case WIDTH: + if (std::isdigit(*spec)) { + width = 10 * width + *spec - '0'; + } else if (*spec == 'b') { + state = BIN_VALUE; + value = bitvec(); + } else if (*spec == 'x') { + state = HEX_VALUE; + value = bitvec(); + } else if (*spec == 'o') { + state = OCT_VALUE; + value = bitvec(); + } + break; + + case BIN_VALUE: + if (*spec == '0' || *spec == '1') { + digit = *spec - '0'; + if (negate) digit = ~digit; + value <<= 1; + value |= bitvec(digit & 0x01); + } else if (*spec == '|' || *spec == 0) { + pushBits(value, width); + state = INIT; + negate = false; + } + break; + + case HEX_VALUE: + if (std::isxdigit(*spec)) { + if (*spec >= '0' && *spec <= '9') { + digit = *spec - '0'; + } else if (*spec >= 'a' && *spec <= 'f') { + digit = *spec - 'a' + 10; + } else if (*spec >= 'A' && *spec <= 'F') { + digit = *spec - 'A' + 10; + } + if (negate) digit = ~digit; + value <<= 4; + value |= bitvec(digit & 0x0f); + } else if (*spec == '|' || *spec == 0) { + pushBits(value, width); + state = INIT; + negate = false; + } + break; + + case OCT_VALUE: + if (*spec >= '0' && *spec <= '7') { + digit = *spec - '0'; + if (negate) digit = ~digit; + value <<= 3; + value |= bitvec(digit & 0x07); + } else if (*spec == '|' || *spec == 0) { + pushBits(value, width); + state = INIT; + negate = false; + } + break; + } + + if (*spec == 0) break; + ++spec; + } +} + +void RegisterMatcher::pushBits(const bitvec &bits, uint32_t width) { + expected <<= width; + bitvec mask; + mask.setrange(0, width); + expected |= bits & mask; + bitsize += width; +} + +bool RegisterMatcher::checkRegister(std::ostream &os, const uint8_t reg[], uint32_t rsize) const { + const uint32_t bytesize((bitsize + 7) / 8); + if (rsize < bytesize) { + os << "checked register is shorter than the expected value"; + return false; + } + + uint32_t bitindex(0); + bool fail(false); + for (int i(0); i < rsize; ++i) { + const uint8_t byte(expected.getrange(bitindex, 8)); + fail = (byte != reg[i]) || fail; + bitindex += 8; + } + + if (fail) { + os << std::hex << std::setfill('0'); + os << " expected: "; + for (auto i(rsize); i > 0; --i) { + uint8_t byte(expected.getrange((i - 1) * 8, 8)); + os << ' ' << std::setw(2) << static_cast(byte); + bitindex += 8; + } + os << '\n'; + os << " actual: "; + for (auto i(rsize); i > 0; --i) { + os << ' ' << std::setw(2) << static_cast(reg[i - 1]); + } + os << '\n'; + } + + return !fail; +} + +} // namespace Test + +} // namespace BfAsm diff --git a/backends/tofino/bf-asm/gtest/register-matcher.h b/backends/tofino/bf-asm/gtest/register-matcher.h new file mode 100644 index 00000000000..aa47a8203fd --- /dev/null +++ b/backends/tofino/bf-asm/gtest/register-matcher.h @@ -0,0 +1,68 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_GTEST_REGISTER_MATCHER_H_ +#define BACKENDS_TOFINO_BF_ASM_GTEST_REGISTER_MATCHER_H_ + +#include + +#include +#include +#include + +#include "backends/tofino/bf-asm/ubits.h" +#include "lib/bitvec.h" + +namespace BfAsm { + +namespace Test { + +class RegisterMatcher { + private: + bitvec expected; + uint32_t bitsize; + + public: + explicit RegisterMatcher(const char *spec); + + bool checkRegister(std::ostream &os, const uint8_t reg[], uint32_t size) const; + + template + bool checkRegister(std::ostream &os, const ubits &bits) const { + static_assert(N > 0 && N <= 64); + const uint64_t value(bits); + return checkRegister(os, reinterpret_cast(&value), (N + 7) / 8); + } + + private: + void pushBits(const bitvec &bits, uint32_t width); +}; + +} // namespace Test + +} // namespace BfAsm + +#define EXPECT_REGISTER(reg, expected) \ + do { \ + RegisterMatcher matcher(expected); \ + std::ostringstream oss; \ + if (!matcher.checkRegister(oss, reg)) { \ + ADD_FAILURE() << "check of the register " << #reg << " has failed:\n" << oss.str(); \ + } \ + } while (false) + +#endif /* BACKENDS_TOFINO_BF_ASM_GTEST_REGISTER_MATCHER_H_ */ diff --git a/backends/tofino/bf-asm/hash_action.cpp b/backends/tofino/bf-asm/hash_action.cpp new file mode 100644 index 00000000000..2c99bbfd4f6 --- /dev/null +++ b/backends/tofino/bf-asm/hash_action.cpp @@ -0,0 +1,231 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "action_bus.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "input_xbar.h" +#include "misc.h" + +// target specific instantiatitions + +Table::Format::Field *HashActionTable::lookup_field(const std::string &n, + const std::string &act) const { + auto *rv = format ? format->field(n) : nullptr; + if (!rv && gateway) rv = gateway->lookup_field(n, act); + if (!rv && !act.empty()) { + if (auto call = get_action()) { + rv = call->lookup_field(n, act); + } + } + return rv; +} + +void HashActionTable::setup(VECTOR(pair_t) & data) { + common_init_setup(data, false, P4Table::MatchEntry); + for (auto &kv : MapIterChecked(data, {"meter", "stats", "stateful"})) { + if (kv.key == "search_bus" || kv.key == "result_bus") { + // already dealt with in Table::setup_layout via common_init_setup + } else if (!common_setup(kv, data, P4Table::MatchEntry)) { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } + if (!action.set() && !actions) + error(lineno, "Table %s has neither action table nor immediate actions", name()); + if (action.args.size() > 2) + error(lineno, "Unexpected number of action table arguments %zu", action.args.size()); + if (actions && !action_bus) action_bus = ActionBus::create(); +} + +void HashActionTable::pass1() { + LOG1("### Hash Action " << name() << " pass1 " << loc()); + MatchTable::pass1(); + for (auto &hd : hash_dist) { + if (hd.xbar_use == 0) hd.xbar_use |= HashDistribution::ACTION_DATA_ADDRESS; + hd.pass1(this, HashDistribution::OTHER, false); + } + if (!gateway && !hash_dist.empty()) + warning(hash_dist[0].lineno, "No gateway in hash_action means hash_dist can't be used"); +} + +void HashActionTable::pass2() { + LOG1("### Hash Action " << name() << " pass2 " << loc()); + if (logical_id < 0) choose_logical_id(); + if (Target::GATEWAY_NEEDS_SEARCH_BUS()) { // FIXME -- misnamed param? + if (layout.size() != 1 || layout[0].bus.empty()) { + error(lineno, "Need explicit row/bus in hash_action table"); + } else if (layout[0].bus.size() > 1) { + error(lineno, "Can't have both bus and result_bus in hash_action table"); + } else { + BUG_CHECK(layout[0].bus.count(Layout::RESULT_BUS), "should have result bus (only)"); + } + } + allocate_physical_ids(); + determine_word_and_result_bus(); + for (auto &ixb : input_xbar) ixb->pass2(); + if (actions) actions->pass2(this); + if (action_bus) action_bus->pass2(this); + if (gateway) gateway->pass2(); + if (idletime) idletime->pass2(); + for (auto &hd : hash_dist) hd.pass2(this); +} + +/** + * Again by definition, the bus of the hash action table by definition is the result bus + */ +void HashActionTable::determine_word_and_result_bus() { + for (auto &row : layout) { + row.word = 0; + } +} + +void HashActionTable::pass3() { + LOG1("### Hash Action " << name() << " pass3 " << loc()); + MatchTable::pass3(); + if (action_bus) action_bus->pass3(this); +} + +template +void HashActionTable::write_merge_regs_vt(REGS ®s, int type, int bus) { + attached.write_merge_regs(regs, this, type, bus); +} + +template +void HashActionTable::write_regs_vt(REGS ®s) { + LOG1("### Hash Action " << name() << " write_regs " << loc()); + /* FIXME -- setup layout with no rams so other functions can write registers properly */ + int bus_type = layout[0].bus[Layout::RESULT_BUS] >> 1; + MatchTable::write_regs(regs, bus_type, this); + auto &merge = regs.rams.match.merge; + merge.exact_match_logical_result_en |= 1 << logical_id; + if (stage->tcam_delay(gress)) merge.exact_match_logical_result_delay |= 1 << logical_id; + if (actions) actions->write_regs(regs, this); + if (idletime) idletime->write_regs(regs); + if (gateway) gateway->write_regs(regs); + for (auto &hd : hash_dist) hd.write_regs(regs, this); + if (options.match_compiler && !enable_action_data_enable && + (!gateway || gateway->empty_match())) { + /* this seems unneeded? (won't actually be used...) */ + merge.next_table_format_data[logical_id].match_next_table_adr_default = + merge.next_table_format_data[logical_id].match_next_table_adr_miss_value.value; + } +} + +/** + * Unlike the hash functions for exact match tables, the hash action table does not require + * the Galois position. On the contrary, the hash action just requires an identity matrix + * of what the address that is to be generated, as they simply use this address as a baseline + * for generating the corresponding address. + * + * Thus, the hash function that is provided starts at bit 0, and is in reverse p4 param order. + * This is under the guarantee that the compiler will allocate the hash in reverse p4 param + * order as well. + * + * FIXME: Possibly this should be validated before this is the output, but currently the + * compiler will set up the hash in that order + */ +void HashActionTable::add_hash_functions(json::map &stage_tbl) const { + json::vector &hash_functions = stage_tbl["hash_functions"] = json::vector(); + + if (input_xbar.empty()) return; + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + auto &ht = input_xbar[0]->get_hash_tables(); + if (ht.size() == 0) return; + + int hash_bit_index = 0; + json::map hash_function; + json::vector &hash_bits = hash_function["hash_bits"] = json::vector(); + for (auto it = p4_params_list.rbegin(); it != p4_params_list.rend(); it++) { + auto &p4_param = *it; + for (size_t i = p4_param.start_bit; i < p4_param.start_bit + p4_param.bit_width; i++) { + // Check if the param bit is used in hash function before adding to + // json. E.g. The param can have a mask which will exclude some bits + // to not be a part of the hash function + if (!input_xbar[0]->is_p4_param_bit_in_hash(p4_param.name, i)) continue; + + json::map hash_bit; + hash_bit["hash_bit"] = hash_bit_index; + hash_bit["seed"] = 0; + json::vector &bits_to_xor = hash_bit["bits_to_xor"] = json::vector(); + json::map field; + std::string field_name, global_name; + field_name = p4_param.key_name.empty() ? p4_param.name : p4_param.key_name; + global_name = p4_param.name; + field["field_bit"] = i; + field["field_name"] = field_name; + field["global_name"] = global_name; + field["hash_match_group"] = 0; + field["hash_match_group_bit"] = 0; + bits_to_xor.push_back(std::move(field)); + hash_bits.push_back(std::move(hash_bit)); + + hash_bit_index++; + } + } + hash_function["hash_function_number"] = 0; + hash_functions.push_back(std::move(hash_function)); +} + +void HashActionTable::gen_tbl_cfg(json::vector &out) const { + // FIXME: Support multiple hash_dist's + int size = hash_dist.empty() ? 1 : 1 + hash_dist[0].mask; + json::map &tbl = *base_tbl_cfg(out, "match_entry", size); + std::string_view stage_tbl_type = "match_with_no_key"; + size = 1; + if (p4_table && p4_table->p4_stage_table_type() == "gateway_with_entries") { + stage_tbl_type = "gateway_with_entries"; + size = p4_size(); + } else if (!p4_params_list.empty()) { + stage_tbl_type = "hash_action"; + size = p4_size(); + } + json::map &match_attributes = tbl["match_attributes"]; + json::vector &stage_tables = match_attributes["stage_tables"]; + json::map &stage_tbl = *add_stage_tbl_cfg(match_attributes, stage_tbl_type.data(), size); + stage_tbl["memory_resource_allocation"] = nullptr; + if (!match_attributes.count("match_type")) + match_attributes["match_type"] = stage_tbl_type.data(); + // This is a only a glass required field, as it is only required when no default action + // is specified, which is impossible for Brig through p4-16 + stage_tbl["default_next_table"] = Stage::end_of_pipe(); + add_pack_format(stage_tbl, 0, 0, hash_dist.empty() ? 1 : 0); + add_result_physical_buses(stage_tbl); + if (actions) { + actions->gen_tbl_cfg(tbl["actions"]); + actions->add_action_format(this, stage_tbl); + } else if (action && action->actions) { + action->actions->gen_tbl_cfg(tbl["actions"]); + action->actions->add_action_format(this, stage_tbl); + } + common_tbl_cfg(tbl); + if (stage_tbl_type == "hash_action" && !p4_params_list.empty()) add_hash_functions(stage_tbl); + if (idletime) + idletime->gen_stage_tbl_cfg(stage_tbl); + else if (options.match_compiler) + stage_tbl["stage_idletime_table"] = nullptr; + add_all_reference_tables(tbl); + gen_idletime_tbl_cfg(stage_tbl); + merge_context_json(tbl, stage_tbl); +} + +DEFINE_TABLE_TYPE(HashActionTable) +FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void HashActionTable::write_merge_regs, + (mau_regs & regs, int type, int bus), + { write_merge_regs_vt(regs, type, bus); }) diff --git a/backends/tofino/bf-asm/hash_dist.cpp b/backends/tofino/bf-asm/hash_dist.cpp new file mode 100644 index 00000000000..f447841020f --- /dev/null +++ b/backends/tofino/bf-asm/hash_dist.cpp @@ -0,0 +1,227 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "hash_dist.h" + +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/stage.h" +#include "lib/range.h" + +static void set_output_bit(unsigned &xbar_use, value_t &v) { + if (CHECKTYPE(v, tSTR)) { + if (v == "immediate_lo" || v == "lo") + xbar_use |= HashDistribution::IMMEDIATE_LOW; + else if (v == "immediate_hi" || v == "hi") + xbar_use |= HashDistribution::IMMEDIATE_HIGH; + else if (v == "meter" || v == "meter_address") + xbar_use |= HashDistribution::METER_ADDRESS; + else if (v == "stats" || v == "stats_address") + xbar_use |= HashDistribution::STATISTICS_ADDRESS; + else if (v == "action" || v == "action_address") + xbar_use |= HashDistribution::ACTION_DATA_ADDRESS; + else if (v == "hashmod") + xbar_use |= HashDistribution::HASHMOD_DIVIDEND; + else + error(v.lineno, "Unrecognized hash_dist output %s", v.s); + } +} + +static const char *xbar_use_string(unsigned xbar_use) { + static char buffer[256]; + static const char *bits[] = {"immed hi", "immed lo", "meter addr", + "stats addr", "action addr", "hashmod-div"}; + char *p = buffer, *e = buffer + sizeof(buffer); + for (int bit = 0; bit < sizeof(bits) / sizeof(bits[0]); ++bit) { + if (!(xbar_use & (1U << bit))) continue; + xbar_use &= ~(1U << bit); + if (p != buffer) p += snprintf(p, p < e ? e - p : 0, xbar_use ? ", " : " and "); + p += snprintf(p, p < e ? e - p : 0, "%s", bits[bit]); + } + if (xbar_use) { + if (p != buffer) p += snprintf(p, p < e ? e - p : 0, " and "); + p += snprintf(p, p < e ? e - p : 0, "<0x%x>", xbar_use); + } + return buffer; +} + +HashDistribution::HashDistribution(int id_, value_t &data, unsigned u) + : lineno(data.lineno), id(id_), xbar_use(u) { + if (id < 0 || id >= 6) error(data.lineno, "Invalid hash_dist unit id %d", id); + if (CHECKTYPE(data, tMAP)) { + for (auto &kv : MapIterChecked(data.map)) { + if (kv.key == "hash") { + if (CHECKTYPE(kv.value, tINT) && (unsigned)(hash_group = kv.value.i) >= 8U) + error(kv.value.lineno, "Invalid hash group"); + } else if (kv.key == "mask") { + if (CHECKTYPE(kv.value, tINT)) mask = kv.value.i; + } else if (kv.key == "shift") { + if (CHECKTYPE(kv.value, tINT)) shift = kv.value.i; + } else if (kv.key == "expand") { + if (CHECKTYPE(kv.value, tINT)) expand = kv.value.i; + } else if (kv.key == "output") { + if (kv.value.type == tVEC) + for (auto &s : kv.value.vec) set_output_bit(xbar_use, s); + else + set_output_bit(xbar_use, kv.value); + } else { + warning(kv.key.lineno, "ignoring unknown item %s in hash_dist", value_desc(kv.key)); + } + } + } +} + +void HashDistribution::parse(std::vector &out, const value_t &data, + unsigned xbar_use) { + if (CHECKTYPE(data, tMAP)) + for (auto &kv : data.map) + if (CHECKTYPE(kv.key, tINT)) out.emplace_back(kv.key.i, kv.value, xbar_use); +} + +bool HashDistribution::compatible(HashDistribution *a) { + if (hash_group != a->hash_group) return false; + if (id != a->id) return false; + if (shift != a->shift) return false; + if (expand != a->expand) return false; + if (delay_type != a->delay_type) return false; + if (non_linear != a->non_linear) return false; + if (meter_pre_color && !a->meter_pre_color && (mask & ~a->mask)) return false; + if (!meter_pre_color && a->meter_pre_color && (~mask & a->mask)) return false; + return true; +} + +void HashDistribution::pass1(Table *tbl, delay_type_t delay_type, bool non_linear) { + LOG1("Hash dist pass1"); + this->tbl = tbl; + this->delay_type = delay_type; + this->non_linear = non_linear; + bool err = false; + for (auto *use : tbl->stage->hash_dist_use[id]) { + if (!compatible(use)) { + err = true; + error(lineno, "hash_dist unit %d in table %s not compatible with", id, tbl->name()); + warning(use->lineno, "previous use in table %s", use->tbl->name()); + } + } + if (expand >= 0) { + int min_shift = 7, diff = 7, other = id - 1; + switch (id % 3) { + case 0: + min_shift = 0; + diff = -7; + other = id + 1; + // fall through + case 1: + if (expand < min_shift || expand >= min_shift + 16) { + error(lineno, "hash_dist unit %d expand can't pull from bit %d", id, expand); + err = true; + } + break; + case 2: + error(lineno, "hash_dist unit %d cannot be expanded", id); + err = true; + break; + default: + error(lineno, + "a mod 3 check should only hit these particular cases, of 0, 1, and 2"); + BUG(); + } + if (!err) { + for (auto *use : tbl->stage->hash_dist_use[other]) + if (use->expand != -1 && use->expand != expand - diff) { + error(lineno, "hash_dist unit %d int table %s expand not compatible with", id, + tbl->name()); + warning(use->lineno, "previous use in table %s", use->tbl->name()); + } + } + } + if (err) return; + tbl->stage->hash_dist_use[id].push_back(this); + for (int i = 0; i < 3; i++) { + if (id % 3 == i) continue; + int m = 3 * (id / 3) + i; + for (auto *use : tbl->stage->hash_dist_use[id]) { + if (use->hash_group != hash_group) { + error(lineno, "hash_dist %d and %d use different hash groups", id, m); + warning(use->lineno, "previous use here"); + } + } + } +} + +void HashDistribution::pass2(Table *tbl) { + for (auto &hd : tbl->hash_dist) { + if (&hd == this) return; + if (id == hd.id) { + error(lineno, "mulitple definitions for hash_dist %d in table %s", id, tbl->name()); + error(hd.lineno, "previous definition"); + break; + } + if (xbar_use & hd.xbar_use) + error(lineno, "confliction output use between hash_dist %d and %d in table %s %s", id, + hd.id, tbl->name(), xbar_use_string(xbar_use & hd.xbar_use)); + } +} + +template +void HashDistribution::write_regs(REGS ®s, Table *tbl) { + /* from HashDistributionResourceAllocation.write_config: */ + auto &merge = regs.rams.match.merge; + if (non_linear) merge.mau_selector_hash_sps_enable |= 1 << id; + if (tbl->gress == EGRESS) merge.mau_hash_group_config.hash_group_egress |= 1 << id; + merge.mau_hash_group_config.hash_group_enable |= 1 << id; + merge.mau_hash_group_config.hash_group_sel.set_subfield(hash_group | 8U, 4 * (id / 3), 4); + merge.mau_hash_group_config.hash_group_ctl.set_subfield(delay_type, 2 * id, 2); + merge.mau_hash_group_shiftcount.set_subfield(shift, 3 * id, 3); + merge.mau_hash_group_mask[id] |= mask; + if (expand >= 0) { + switch (id % 3) { + case 0: + merge.mau_hash_group_expand[id / 3].hash_slice_group0_expand = 1; + merge.mau_hash_group_expand[id / 3].hash_slice_group2_expand = expand; + merge.mau_hash_group_config.hash_group_enable |= 1 << (id + 2); + merge.mau_hash_group_config.hash_group_ctl.set_subfield(delay_type, 2 * (id + 2), + 2); + break; + case 1: + merge.mau_hash_group_expand[id / 3].hash_slice_group1_expand = 1; + merge.mau_hash_group_expand[id / 3].hash_slice_group2_expand = expand - 7; + merge.mau_hash_group_config.hash_group_enable |= 1 << (id + 1); + merge.mau_hash_group_config.hash_group_ctl.set_subfield(delay_type, 2 * (id + 1), + 2); + break; + default: + BUG(); + } + } + for (int oxbar : Range(0, 4)) + if ((xbar_use >> oxbar) & 1) + merge.mau_hash_group_xbar_ctl[oxbar][tbl->logical_id / 8U].set_subfield( + 8 | id, 4 * (tbl->logical_id % 8U), 4); + if (xbar_use & HASHMOD_DIVIDEND) { + int mgroup = tbl->get_selector()->meter_group(); + merge.mau_hash_group_xbar_ctl[5][mgroup / 8U].set_subfield(8 | id, 4 * (mgroup % 8U), 4); + } + if (meter_pre_color) { + merge.mau_meter_precolor_hash_sel.set_subfield(8 | id, 4 * (id / 3), 4); + int ctl = 16 | meter_mask_index; + if (id >= 3) ctl |= 8; + merge.mau_meter_precolor_hash_map_to_logical_ctl[tbl->logical_id / 4U].set_subfield( + ctl, 5 * (tbl->logical_id % 4U), 5); + } +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void HashDistribution::write_regs, mau_regs &, + Table *) diff --git a/backends/tofino/bf-asm/hash_dist.h b/backends/tofino/bf-asm/hash_dist.h new file mode 100644 index 00000000000..de8005d754c --- /dev/null +++ b/backends/tofino/bf-asm/hash_dist.h @@ -0,0 +1,61 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_HASH_DIST_H_ +#define BACKENDS_TOFINO_BF_ASM_HASH_DIST_H_ + +#include + +#include "asm-types.h" + +class Stage; +class Table; + +/* config for a hash distribution unit in match central. + * FIXME -- need to abstract this away rather than have it be explicit + * FIXME -- in the asm code */ + +struct HashDistribution { + // FIXME -- need less 'raw' data for this */ + Table *tbl = 0; + int lineno = -1; + int hash_group = -1, id = -1; + int shift = 0, mask = 0, expand = -1; + bool meter_pre_color = false; + int meter_mask_index = 0; + enum { + IMMEDIATE_HIGH = 1 << 0, + IMMEDIATE_LOW = 1 << 1, + METER_ADDRESS = 1 << 2, + STATISTICS_ADDRESS = 1 << 3, + ACTION_DATA_ADDRESS = 1 << 4, + HASHMOD_DIVIDEND = 1 << 5 + }; + unsigned xbar_use = 0; + enum delay_type_t { SELECTOR = 0, OTHER = 1 }; + delay_type_t delay_type = SELECTOR; + bool non_linear = false; + HashDistribution(int id, value_t &data, unsigned u = 0); + static void parse(std::vector &out, const value_t &v, unsigned u = 0); + bool compatible(HashDistribution *a); + void pass1(Table *tbl, delay_type_t dt, bool nl); + void pass2(Table *tbl); + template + void write_regs(REGS ®s, Table *); +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_HASH_DIST_H_ */ diff --git a/backends/tofino/bf-asm/hashdump.cpp b/backends/tofino/bf-asm/hashdump.cpp new file mode 100644 index 00000000000..572e4e4e31e --- /dev/null +++ b/backends/tofino/bf-asm/hashdump.cpp @@ -0,0 +1,132 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include "backends/tofino/bf-asm/json.h" +#include "gen/tofino/disas.regs.mau_addrmap.h" +#include "lib/hex.h" + +static Tofino::regs_mau_addrmap regs; + +static void dump_hashtables(std::ostream &out); + +int verbose = 0; +int get_file_log_level(const char *file, int *level) { return *level = verbose; } + +int main(int ac, char **av) { + for (int i = 1; i < ac; i++) { + if (av[i][0] == '-' || av[i][0] == '+') { + bool flag = av[i][0] == '+'; + for (char *arg = av[i] + 1; *arg;) switch (*arg++) { + case 'v': + verbose++; + break; + default: + std::cerr << "Unknown option " << (flag ? '+' : '-') << arg[-1] + << std::endl; + std::cerr << "usage: " << av[0] << " file" << std::endl; + } + } else { + std::ifstream in(av[i]); + if (!in) { + std::cerr << "Can't open " << av[i] << std::endl; + continue; + } + std::unique_ptr data; + in >> data; + if (!in || regs.unpack_json(data.get())) { + std::cerr << "Can't read/unpack json from " << av[i] << std::endl; + continue; + } + dump_hashtables(std::cout); + } + } +} + +static bool col_nonzero(int i, int col) { + for (int word = i * 8; word < i * 8 + 8; word++) { + auto &x = regs.dp.xbar_hash.hash.galois_field_matrix[word][col]; + if (x.byte0 || x.byte1) return true; + } + return false; +} + +static bool col_valid_nonzero(int i, int col) { + for (int word = i * 8; word < i * 8 + 8; word++) { + auto &x = regs.dp.xbar_hash.hash.galois_field_matrix[word][col]; + if (x.valid0 || x.valid1) return true; + } + return false; +} + +static bool ht_nonzero(int i) { + for (int col = 0; col < 52; col++) { + if ((regs.dp.xbar_hash.hash.hash_seed[col] >> i) & 1) return true; + if (col_nonzero(i, col)) return true; + if (col_valid_nonzero(i, col)) return true; + } + return false; +} + +static void dump_ht(std::ostream &out, int i) { + for (int col = 0; col < 52; col++) { + if (col_nonzero(i, col)) { + out << " " << col << ": 0x"; + bool pfx = true; + for (int word = 8 * i + 7; word >= 8 * i; word--) { + auto &w = regs.dp.xbar_hash.hash.galois_field_matrix[word][col]; + if (!pfx || w.byte1) { + out << hex(w.byte1, pfx ? 0 : 2, '0'); + pfx = false; + } + if (!pfx || w.byte0) { + out << hex(w.byte0, pfx ? 0 : 2, '0'); + pfx = false; + } + } + out << '\n'; + } + if (col_valid_nonzero(i, col)) { + out << " valid " << col << ": 0b"; + bool pfx = true; + for (int word = 8 * i + 7; word >= 8 * i; word--) { + auto &w = regs.dp.xbar_hash.hash.galois_field_matrix[word][col]; + if (!pfx || w.valid1) { + out << (w.valid1 ? '1' : '0'); + pfx = false; + } + if (!pfx || w.valid0) { + out << (w.valid0 ? '1' : '0'); + pfx = false; + } + } + out << '\n'; + } + if ((regs.dp.xbar_hash.hash.hash_seed[col] >> i) & 1) out << " seed " << col << ": 1\n"; + } +} + +static void dump_hashtables(std::ostream &out) { + for (int i = 0; i < 8; i++) { + if (ht_nonzero(i)) { + out << "hash " << i << ":\n"; + dump_ht(out, i); + } + } +} diff --git a/backends/tofino/bf-asm/hashexpr.cpp b/backends/tofino/bf-asm/hashexpr.cpp new file mode 100644 index 00000000000..00bd2b0bc52 --- /dev/null +++ b/backends/tofino/bf-asm/hashexpr.cpp @@ -0,0 +1,837 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "hashexpr.h" + +#include "input_xbar.h" +#include "lib/bitops.h" +#include "lib/bitvec.h" + +static bool check_ixbar(Phv::Ref &ref, InputXbar *ix, InputXbar::HashTable hash_table) { + if (!ref.check()) return false; + if (ref->reg.mau_id() < 0) { + error(ref.lineno, "%s not accessable in mau", ref->reg.name); + return false; + } + if (!hash_table) return true; + for (auto in : ix->find_hash_inputs(*ref, hash_table)) { + BUG_CHECK(in->lo >= 0, "invalid lo in IXBar::Input"); + return true; + } + error(ref.lineno, "%s not in %s input", ref.name(), hash_table.toString().c_str()); + return false; +} + +/** + * Generating a list of ixbar_input_t and hash_matrix_output_t to be sent to the + * dynamic_hash library. The vectors are part of the function call as they + * must be on the stack to avoid using new and delete + */ +void HashExpr::gen_ixbar_init(ixbar_init_t *ixbar_init, std::vector &inputs, + std::vector &outputs, int logical_hash_bit, + InputXbar *ix, InputXbar::HashTable hash_table) { + inputs.clear(); + outputs.clear(); + + gen_ixbar_inputs(inputs, ix, hash_table); + hash_matrix_output_t hmo; + hmo.p4_hash_output_bit = logical_hash_bit; + hmo.gfm_start_bit = 0; + hmo.bit_size = 1; + outputs.push_back(hmo); + + ixbar_init->ixbar_inputs = inputs.data(); + ixbar_init->inputs_sz = inputs.size(); + ixbar_init->hash_matrix_outputs = outputs.data(); + ixbar_init->outputs_sz = outputs.size(); +} + +/** + * The function call for PhvRef, Random, Identity, and Crc functions. The input xbar is + * initialized, and the data returned writes out a vector of inputs. For Stripe, + * Slice, and others, they recursively will call this function + */ +void HashExpr::gen_data(bitvec &data, int logical_hash_bit, InputXbar *ix, + InputXbar::HashTable hash_table) { + ixbar_init_t ixbar_init; + hash_column_t hash_matrix[PARITY_GROUPS_DYN][HASH_MATRIX_WIDTH_DYN] = {}; + std::vector inputs; + std::vector outputs; + + gen_ixbar_init(&ixbar_init, inputs, outputs, logical_hash_bit, ix, hash_table); + + bool non_zero = false; + int loops = 0; + // It is possible that a hash column can be genereated as all 0s if using RANDOM_DYN algo, so + // regeneration is required if a hash column is all 0s and using RANDOM_DYN. + while (!non_zero) { + determine_hash_matrix(&ixbar_init, ixbar_init.ixbar_inputs, ixbar_init.inputs_sz, + &hash_algorithm, hash_matrix); + if (hash_algorithm.hash_alg != RANDOM_DYN || + ix->global_column0_extract(hash_table, hash_matrix)) { + non_zero = true; + } + BUG_CHECK(loops++ < 1000, "Looping trying to get a valid RANDOM_DYN matrix"); + } + data |= ix->global_column0_extract(hash_table, hash_matrix); +} + +class HashExpr::PhvRef : HashExpr { + Phv::Ref what; + PhvRef(gress_t gr, int stg, const value_t &v) : HashExpr(v.lineno), what(gr, stg, v) {} + friend class HashExpr; + bool check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) override { + return ::check_ixbar(what, ix, hash_table); + } + int width() override { return what.size(); } + int input_size() override { return what.size(); } + bool match_phvref(const Phv::Ref &ref) override { + if (what->reg != ref->reg || what->lo != ref->lo) return false; + return true; + } + bool operator==(const HashExpr &a_) const override { + if (typeid(*this) != typeid(a_)) return false; + auto &a = static_cast(a_); + return *what == *a.what; + } + void build_algorithm() override { + hash_algorithm.hash_alg = IDENTITY_DYN; + hash_algorithm.msb = false; + hash_algorithm.extend = false; + hash_algorithm.final_xor = 0ULL; + hash_algorithm.poly = 0ULL; + hash_algorithm.init = 0ULL; + hash_algorithm.reverse = false; + } + + void gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) override; + void get_sources(int bit, std::vector &rv) const override { + if (bit >= 0) + rv.emplace_back(what, bit, bit); + else + rv.emplace_back(what); + } + Phv::Ref *get_ghost_slice() override { return &what; } + void dbprint(std::ostream &out) const override { + out << "HashExpr: PhvRef" << std::endl; + out << "hash algorithm: [ algo : " << hash_algorithm.hash_alg + << ", msb : " << hash_algorithm.msb << ", extend : " << hash_algorithm.extend + << ", final_xor : " << hash_algorithm.final_xor << ", poly : " << hash_algorithm.poly + << ", init : " << hash_algorithm.init << ", reverse : " << hash_algorithm.reverse + << std::endl; + if (what) out << "Phv: " << what << std::endl; + } +}; + +class HashExpr::Random : HashExpr { + std::vector what; + explicit Random(int lineno) : HashExpr(lineno) {} + friend class HashExpr; + bool check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) override { + bool rv = true; + for (auto &ref : what) rv &= ::check_ixbar(ref, ix, hash_table); + return rv; + } + int width() override { return 0; } + int input_size() override { + int rv = 0; + for (auto &ref : what) rv += ref->size(); + return rv; + } + bool operator==(const HashExpr &a_) const override { + if (typeid(*this) != typeid(a_)) return false; + auto &a = static_cast(a_); + if (what.size() != a.what.size()) return false; + auto it = a.what.begin(); + for (auto &el : what) + if (*el != **it++) return false; + return true; + } + void build_algorithm() override { + hash_algorithm.hash_alg = RANDOM_DYN; + hash_algorithm.msb = false; + hash_algorithm.extend = false; + hash_algorithm.final_xor = 0ULL; + hash_algorithm.poly = 0ULL; + hash_algorithm.init = 0ULL; + hash_algorithm.reverse = false; + } + void gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) override; + void get_sources(int, std::vector &rv) const override { + rv.insert(rv.end(), what.begin(), what.end()); + } + void dbprint(std::ostream &out) const override { + out << "HashExpr: Random" << std::endl; + out << "hash algorithm: [ algo : " << hash_algorithm.hash_alg + << ", msb : " << hash_algorithm.msb << ", extend : " << hash_algorithm.extend + << ", final_xor : " << hash_algorithm.final_xor << ", poly : " << hash_algorithm.poly + << ", init : " << hash_algorithm.init << ", reverse : " << hash_algorithm.reverse + << std::endl; + for (auto &e : what) { + out << "Phv: " << e << std::endl; + } + } +}; + +class HashExpr::Crc : HashExpr { + bitvec poly; + bitvec init; + bitvec final_xor; + ///> It is a multimap to allow two fields to have the exact same hash matrix requirements + std::multimap what; + std::map constants; + std::vector vec_what; + bool reverse = false; + int total_input_bits = -1; + explicit Crc(int lineno) : HashExpr(lineno) {} + friend class HashExpr; + bool check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) override; + int width() override { return poly.max().index(); } + int input_size() override { + if (total_input_bits >= 0) return total_input_bits; + if (what.empty()) { + int rv = 0; + for (auto &ref : vec_what) rv += ref->size(); + return rv; + } else { + return what.rbegin()->first + what.rbegin()->second->size(); + } + } + bool operator==(const HashExpr &a_) const override { + if (typeid(*this) != typeid(a_)) return false; + auto &a = static_cast(a_); + if (what.size() != a.what.size()) return false; + if (vec_what.size() != a.vec_what.size()) return false; + auto it = a.what.begin(); + for (auto &el : what) + if (el.first != it->first || *el.second != *(it++)->second) return false; + auto it2 = a.vec_what.begin(); + for (auto &el : vec_what) + if (*el != **it2++) return false; + return true; + } + void build_algorithm() override { + hash_algorithm.hash_bit_width = poly.max().index(); + hash_algorithm.hash_alg = CRC_DYN; + hash_algorithm.reverse = reverse; + hash_algorithm.poly = poly.getrange(32, 32) << 32; + hash_algorithm.poly |= poly.getrange(0, 32); + hash_algorithm.init = init.getrange(32, 32) << 32; + hash_algorithm.init |= init.getrange(0, 32); + hash_algorithm.final_xor = final_xor.getrange(0, 32); + hash_algorithm.final_xor |= final_xor.getrange(32, 32) << 32; + hash_algorithm.extend = false; + hash_algorithm.msb = false; + } + + void gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) override; + void get_sources(int, std::vector &rv) const override { + rv.insert(rv.end(), vec_what.begin(), vec_what.end()); + } +}; + +/** + * @brief XOR hashing algorithm implemented on the hashing matrix + * + * This expression implements XOR over the hasing matrix. The input + * message is handled as a big integer number - the highest bit is + * the begining, the zero-th bit is the end. The message is split + * from the begining into blocks of length bit_width and these blocks + * are bitwise XORed together. + */ +class HashExpr::XorHash : public HashExpr { + private: + std::multimap what; + int bit_width; + friend class HashExpr; + + public: + explicit XorHash(int lineno, int bit_width_); + + /* -- avoid copying */ + XorHash &operator=(XorHash &&) = delete; + + bool check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) override; + int width() override; + int input_size() override; + bool operator==(const HashExpr &a_) const override; + void build_algorithm() override; + void gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) override; + void get_sources(int, std::vector &rv) const override; +}; + +class HashExpr::Xor : HashExpr { + std::vector what; + explicit Xor(int lineno) : HashExpr(lineno) {} + friend class HashExpr; + bool check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) override { + bool rv = true; + for (auto *e : what) rv |= e->check_ixbar(ix, hash_table); + return rv; + } + void gen_data(bitvec &data, int logical_hash_bit, InputXbar *ix, + InputXbar::HashTable hash_table) override; + int width() override { + int rv = 0; + for (auto *e : what) { + int w = e->width(); + if (w > rv) rv = w; + } + return rv; + } + int input_size() override { + int rv = 0; + for (auto *e : what) rv += e->input_size(); + return rv; + } + bool operator==(const HashExpr &a_) const override { + if (typeid(*this) != typeid(a_)) return false; + auto &a = static_cast(a_); + if (what.size() != a.what.size()) return false; + auto it = a.what.begin(); + for (auto &el : what) + if (*el != **it++) return false; + return true; + } + void build_algorithm() override { + for (auto *e : what) { + if (e) e->build_algorithm(); + } + } + + void gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) override {} + void get_sources(int bit, std::vector &rv) const override { + for (auto *e : what) e->get_sources(bit, rv); + } + Phv::Ref *get_ghost_slice() override { + for (auto *e : what) { + auto g = e->get_ghost_slice(); + if (g) return g; + } + return nullptr; + } + void dbprint(std::ostream &out) const override { + out << "HashExpr: Xor" << std::endl; + for (auto *e : what) { + e->dbprint(out); + } + } +}; + +class HashExpr::Mask : HashExpr { + HashExpr *what; + bitvec mask; + Mask(int lineno, HashExpr *w, bitvec m) : HashExpr(lineno), what(w), mask(m) {} + friend class HashExpr; + bool check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) override { + return what->check_ixbar(ix, hash_table); + } + void gen_data(bitvec &data, int bit, InputXbar *ix, InputXbar::HashTable hash_table) override { + if (mask[bit]) what->gen_data(data, bit, ix, hash_table); + } + int width() override { return what->width(); } + int input_size() override { return what->input_size(); } + bool operator==(const HashExpr &a_) const override { + if (typeid(*this) != typeid(a_)) return false; + auto &a = static_cast(a_); + return mask == a.mask && *what == *a.what; + } + void build_algorithm() override { what->build_algorithm(); } + + void gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) override {} + void get_sources(int bit, std::vector &rv) const override { + if (mask[bit]) what->get_sources(bit, rv); + } + Phv::Ref *get_ghost_slice() override { return what->get_ghost_slice(); } + void dbprint(std::ostream &out) const override { + out << "HashExpr: Mask " << mask << std::endl; + what->dbprint(out); + } +}; + +class HashExpr::Stripe : HashExpr { + std::vector what; + bool supress_error_cascade = false; + explicit Stripe(int lineno) : HashExpr(lineno) {} + friend class HashExpr; + bool check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) override { + bool rv = true; + for (auto *e : what) rv |= e->check_ixbar(ix, hash_table); + return rv; + } + void gen_data(bitvec &data, int logical_hash_bit, InputXbar *ix, + InputXbar::HashTable hash_table) override; + int width() override { return 0; } + int input_size() override { + int rv = 0; + for (auto *e : what) rv += e->input_size(); + return rv; + } + bool operator==(const HashExpr &a_) const override { + if (typeid(*this) != typeid(a_)) return false; + auto &a = static_cast(a_); + if (what.size() != a.what.size()) return false; + auto it = a.what.begin(); + for (auto &el : what) + if (*el != **it++) return false; + return true; + } + void build_algorithm() override { + for (auto *e : what) { + e->build_algorithm(); + } + // Does not set the extend algorithm, as the gen_data for extend does this + // in the source + } + + void gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) override {} + void get_sources(int bit, std::vector &rv) const override { + for (auto *e : what) { + if (bit >= 0) { + int width = e->width(); + if (bit < width) { + e->get_sources(bit, rv); + break; + } + bit -= width; + } else { + e->get_sources(bit, rv); + } + } + } + void dbprint(std::ostream &out) const override { + out << "HashExpr: Stripe" << std::endl; + for (auto *e : what) { + e->dbprint(out); + } + } +}; + +class HashExpr::Slice : HashExpr { + HashExpr *what = nullptr; + int start = 0, _width = 0; + explicit Slice(int lineno) : HashExpr(lineno) {} + friend class HashExpr; + bool check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) override { + return what->check_ixbar(ix, hash_table); + } + void gen_data(bitvec &data, int logical_hash_bit, InputXbar *ix, + InputXbar::HashTable hash_table) override { + what->gen_data(data, logical_hash_bit + start, ix, hash_table); + } + int width() override { + if (_width == 0) { + _width = what->width(); + if (_width > 0) { + _width -= start; + if (_width <= 0) _width = -1; + } + } + return _width; + } + int input_size() override { return what->input_size(); } + bool operator==(const HashExpr &a_) const override { + if (typeid(*this) != typeid(a_)) return false; + auto &a = static_cast(a_); + if (start != a.start || _width != a._width) return false; + return *what == *a.what; + } + void build_algorithm() override { what->build_algorithm(); } + void gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) override {} + void get_sources(int bit, std::vector &rv) const override { + if (bit >= start) + what->get_sources(bit - start, rv); + else if (bit < 0) + what->get_sources(bit, rv); + } + void dbprint(std::ostream &out) const override { + out << "HashExpr: Slice" << std::endl; + if (what) out << what << std::endl; + out << "start: " << start << " ,width: " << _width << std::endl; + } +}; + +class HashExpr::SExtend : HashExpr { + HashExpr *what; + SExtend(int lineno, HashExpr *w) : HashExpr(lineno), what(w) {} + friend class HashExpr; + bool check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) override { + return what->check_ixbar(ix, hash_table); + } + void gen_data(bitvec &data, int bit, InputXbar *ix, InputXbar::HashTable hash_table) override { + int width = what->width(); + if (width > 0 && bit >= width) bit = width - 1; + what->gen_data(data, bit, ix, hash_table); + } + int width() override { return 0; } + int input_size() override { return what->input_size(); } + bool operator==(const HashExpr &a_) const override { + if (typeid(*this) != typeid(a_)) return false; + auto &a = static_cast(a_); + return *what == *a.what; + } + void build_algorithm() override { what->build_algorithm(); } + void gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) override {} + void get_sources(int bit, std::vector &rv) const override { + int width = what->width(); + if (width > 0 && bit >= width) bit = width - 1; + what->get_sources(bit, rv); + } + void dbprint(std::ostream &out) const override { + out << "HashExpr: SExtend" << std::endl; + if (what) out << what << std::endl; + } +}; + +// The ordering for crc expression is: +// crc(poly, @optional init, @optional input_bits, map) +HashExpr *HashExpr::create(gress_t gress, int stage, const value_t &what) { + if (what.type == tCMD) { + if (what[0] == "random") { + Random *rv = new Random(what.lineno); + for (int i = 1; i < what.vec.size; i++) rv->what.emplace_back(gress, stage, what[i]); + return rv; + } else if (what[0] == "xor") { + if (what.vec.size != 3) { + error(what[1].lineno, + "Syntax error, invalid number of parameters for 'xor' hash expression"); + return nullptr; + } + if (!CHECKTYPE(what[1], tINT)) { + return nullptr; + } + if (!CHECKTYPE(what[2], tMAP)) { + return nullptr; + } + std::unique_ptr rv(new XorHash(what.lineno, what[1].i)); + for (auto &kv : what[2].map) { + if (CHECKTYPE(kv.key, tINT)) { + rv->what.emplace(kv.key.i, Phv::Ref(gress, stage, kv.value)); + } else { + return nullptr; + } + } + + return rv.release(); + } else if ((what[0] == "crc" || what[0] == "crc_rev" || what[0] == "crc_reverse") && + CHECKTYPE2(what[1], tBIGINT, tINT)) { + Crc *rv = new Crc(what.lineno); + if (what[0] != "crc") rv->reverse = true; + rv->poly = get_bitvec(what[1]); + // Shift and set LSB to 1 to generate polynomial from Koopman number + // provided in assembly + rv->poly <<= 1; + rv->poly[0] = 1; + int i = 2; + + if (what.vec.size > i && (what[i].type == tINT || what[i].type == tBIGINT)) + rv->init = get_bitvec(what[i++]); + if (what.vec.size > i && (what[i].type == tINT || what[i].type == tBIGINT)) + rv->final_xor = get_bitvec(what[i++]); + if (what.vec.size > i && what[i].type == tINT) rv->total_input_bits = what[i++].i; + + if (what.vec.size > i && what[i].type == tMAP) { + for (auto &kv : what[i].map) { + if (CHECKTYPE(kv.key, tINT)) { + rv->what.emplace(kv.key.i, Phv::Ref(gress, stage, kv.value)); + } + } + } else { + for (; i < what.vec.size; i++) { + rv->vec_what.emplace_back(gress, stage, what[i]); + } + } + return rv; + } else if (what[0] == "^") { + Xor *rv = new Xor(what.lineno); + for (int i = 1; i < what.vec.size; i++) + rv->what.push_back(create(gress, stage, what[i])); + return rv; + } else if (what[0] == "&") { + HashExpr *op = nullptr; + bitvec mask; + bool have_mask = false; + for (int i = 1; i < what.vec.size; i++) { + if (what[i].type == tINT || what[i].type == tBIGINT) { + if (have_mask) { + mask &= get_bitvec(what[i]); + } else { + mask = get_bitvec(what[i]); + have_mask = true; + } + } else if (op) { + error(what.lineno, "Invalid mask operation"); + return nullptr; + } else { + op = create(gress, stage, what[i]); + } + } + if (!op) { + error(what.lineno, "Invalid mask operation"); + return nullptr; + } else if (have_mask) { + return new Mask(what.lineno, op, mask); + } else { + return op; + } + } else if (what[0] == "stripe") { + Stripe *rv = new Stripe(what.lineno); + for (int i = 1; i < what.vec.size; i++) + rv->what.push_back(create(gress, stage, what[i])); + return rv; + } else if (what[0] == "slice") { + if (what.vec.size < 3 || what[2].type == tRANGE + ? what.vec.size > 3 || what[2].range.hi < what[2].range.lo + : what[2].type != tINT || what.vec.size > 4 || + (what.vec.size == 4 && what[3].type != tINT)) { + error(what.lineno, "Invalid slice operation"); + return nullptr; + } + Slice *rv = new Slice(what.lineno); + rv->what = create(gress, stage, what[1]); + if (what[2].type == tRANGE) { + rv->start = what[2].range.lo; + rv->_width = what[2].range.hi - what[2].range.lo + 1; + } else { + rv->start = what[2].i; + if (what.vec.size > 3) rv->_width = what[3].i; + } + return rv; + } else if (what[0] == "sextend" || what[0] == "sign_extend") { + if (what.vec.size != 2) { + error(what.lineno, "Invalid sign extension"); + return nullptr; + } + return new SExtend(what.lineno, create(gress, stage, what[1])); + } else if (what.vec.size == 2) { + return new PhvRef(gress, stage, what); + } else { + error(what.lineno, "Unsupported hash operation '%s'", what[0].s); + } + } else if (what.type == tSTR) { + return new PhvRef(gress, stage, what); + } else { + error(what.lineno, "Syntax error, expecting hash expression"); + } + return nullptr; +} + +void HashExpr::find_input(Phv::Ref what, std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) { + bool found = false; + auto vec = ix->find_hash_inputs(*what, hash_table); + for (auto *in : vec) { + int group_bit_position = in->lo + (what->lo - in->what->lo); + ixbar_input_t input; + input.type = ixbar_input_type::tPHV; + input.ixbar_bit_position = group_bit_position + ix->global_bit_position_adjust(hash_table); + input.bit_size = what->size(); + input.u.valid = true; + input.symmetric_info.is_symmetric = false; + inputs.push_back(input); + found = true; + break; + } + if (!found) { + error(ix->lineno, "Cannot find associated field %s[%d:%d] in %s", what->reg.name, what->hi, + what->lo, hash_table.toString().c_str()); + } +} + +void HashExpr::generate_ixbar_inputs_with_gaps(const std::multimap &what, + std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) { + unsigned previous_range_hi = 0; + for (auto &entry : what) { + if (previous_range_hi != entry.first) { + ixbar_input_t invalid_input = { + ixbar_input_type::tPHV, // type + 0, // ixbar_bit_position + entry.first - previous_range_hi, // bit_size + {}, // symmetric_info + false // u.valid + }; + inputs.push_back(invalid_input); + } + + auto &ref = entry.second; + find_input(ref, inputs, ix, hash_table); + previous_range_hi = entry.first + ref->size(); + } + if (previous_range_hi != input_size()) { + ixbar_input_t invalid_input = { + ixbar_input_type::tPHV, // type + 0, // ixbar_bit_position + input_size() - previous_range_hi, // bit_size + {}, // symmetric_info + false // u.valid + }; + inputs.push_back(invalid_input); + } +} + +/** + * Creates a vector with a single entry corresponding to the identity input + */ +void HashExpr::PhvRef::gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) { + find_input(what, inputs, ix, hash_table); +} + +/** + * Iterates through the list of references to build a corresponding vector for the + * dynamic hash library + */ +void HashExpr::Random::gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) { + for (auto &ref : what) { + find_input(ref, inputs, ix, hash_table); + } +} + +/** + * Iterates through the crc map, and will generate ixbar_input_t inputs for the holes. + * These are marked as invalid, so that the hash calculation will be correct + */ +void HashExpr::Crc::gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) { + generate_ixbar_inputs_with_gaps(what, inputs, ix, hash_table); +} + +bool HashExpr::Crc::check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) { + bool rv = true; + if (!vec_what.empty()) { + int off = 0; + for (auto &ref : vec_what) { + rv &= ::check_ixbar(ref, ix, InputXbar::HashTable()); + if (ref) { + for (auto *in : ix->find_hash_inputs(*ref, hash_table)) { + if (in->lo >= 0) { + what.emplace(off, ref); + break; + } + } + off += ref.size(); + } + } + vec_what.clear(); + } else { + int max = -1; + for (auto &ref : what) { + rv &= ::check_ixbar(ref.second, ix, hash_table); + } + } + return rv; +} + +HashExpr::XorHash::XorHash(int lineno, int bit_width_) : HashExpr(lineno), bit_width(bit_width_) {} + +bool HashExpr::XorHash::check_ixbar(InputXbar *ix, InputXbar::HashTable hash_table) { + bool rv(true); + for (auto &ref : what) { + rv = ::check_ixbar(ref.second, ix, hash_table) && rv; + } + return rv; +} + +int HashExpr::XorHash::width() { return bit_width; } + +int HashExpr::XorHash::input_size() { + if (what.empty()) return 0; + return what.rbegin()->first + what.rbegin()->second->size(); +} + +bool HashExpr::XorHash::operator==(const HashExpr &a_) const { + if (typeid(*this) != typeid(a_)) return false; + auto &a = static_cast(a_); + + if (what.size() != a.what.size()) return false; + if (bit_width != a.bit_width) return false; + + auto iter1(what.begin()); + auto iter2(a.what.begin()); + while (iter1 != what.end()) { + if (*iter1 != *iter2) return false; + ++iter1; + ++iter2; + } + return true; +} + +void HashExpr::XorHash::build_algorithm() { + memset(&hash_algorithm, 0, sizeof(hash_algorithm)); + hash_algorithm.hash_alg = XOR_DYN; + hash_algorithm.extend = false; + hash_algorithm.msb = false; + hash_algorithm.hash_bit_width = bit_width; +} + +void HashExpr::XorHash::gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) { + generate_ixbar_inputs_with_gaps(what, inputs, ix, hash_table); +} + +void HashExpr::XorHash::get_sources(int, std::vector &rv) const {} + +void HashExpr::Xor::gen_data(bitvec &data, int bit, InputXbar *ix, + InputXbar::HashTable hash_table) { + for (auto *e : what) e->gen_data(data, bit, ix, hash_table); +} + +void HashExpr::Stripe::gen_data(bitvec &data, int bit, InputXbar *ix, + InputXbar::HashTable hash_table) { + while (1) { + int total_size = 0; + for (auto *e : what) { + int sz = e->width(); + if (bit < total_size + sz) { + e->gen_data(data, bit - total_size, ix, hash_table); + return; + } + total_size += sz; + } + if (total_size == 0) { + if (!supress_error_cascade) { + error(lineno, "Can't stripe unsized data"); + supress_error_cascade = true; + } + break; + } + bit %= total_size; + } +} + +void dump(const HashExpr *h) { + if (h) + h->dbprint(std::cout); + else + std::cout << "(null)"; + std::cout << std::endl; +} +void dump(const HashExpr &h) { + h.dbprint(std::cout); + std::cout << std::endl; +} diff --git a/backends/tofino/bf-asm/hashexpr.h b/backends/tofino/bf-asm/hashexpr.h new file mode 100644 index 00000000000..cb63eca5e34 --- /dev/null +++ b/backends/tofino/bf-asm/hashexpr.h @@ -0,0 +1,77 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_HASHEXPR_H_ +#define BACKENDS_TOFINO_BF_ASM_HASHEXPR_H_ + +#include "backends/tofino/bf-utils/dynamic_hash/dynamic_hash.h" +#include "input_xbar.h" +#include "phv.h" + +class HashExpr : public IHasDbPrint { + class PhvRef; + class Random; + class Crc; + class XorHash; + class Xor; + class Mask; + class Stripe; + class Slice; + class SExtend; + + protected: + explicit HashExpr(int l) : lineno(l) {} + + public: + int lineno; + bfn_hash_algorithm_t hash_algorithm = {}; // Zero-init to make Klockwork happy + static HashExpr *create(gress_t, int stage, const value_t &); + virtual void build_algorithm() = 0; + virtual bool check_ixbar(InputXbar *ix, InputXbar::HashTable ht) = 0; + virtual void gen_data(bitvec &data, int bit, InputXbar *ix, InputXbar::HashTable hash_table); + void gen_ixbar_init(ixbar_init_t *ixbar_init, std::vector &inputs, + std::vector &outputs, int logical_hash_bit, + InputXbar *ix, InputXbar::HashTable hash_table); + virtual void gen_ixbar_inputs(std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table) = 0; + virtual void get_sources(int bit, std::vector &) const = 0; + std::vector get_sources(int bit) const { + std::vector rv; + get_sources(bit, rv); + return rv; + } + virtual int width() = 0; + virtual int input_size() = 0; + virtual bool match_phvref(const Phv::Ref &ref) { return false; } + virtual bool operator==(const HashExpr &) const = 0; + void find_input(Phv::Ref what, std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table); + bool operator!=(const HashExpr &a) const { return !operator==(a); } + virtual void dbprint(std::ostream &out) const {} + virtual Phv::Ref *get_ghost_slice() { return nullptr; } + virtual ~HashExpr() {} + + private: + void generate_ixbar_inputs_with_gaps(const std::multimap &what, + std::vector &inputs, InputXbar *ix, + InputXbar::HashTable hash_table); +}; + +extern void dump(const HashExpr *); +extern void dump(const HashExpr &); + +#endif /* BACKENDS_TOFINO_BF_ASM_HASHEXPR_H_ */ diff --git a/backends/tofino/bf-asm/idletime.cpp b/backends/tofino/bf-asm/idletime.cpp new file mode 100644 index 00000000000..297988c4621 --- /dev/null +++ b/backends/tofino/bf-asm/idletime.cpp @@ -0,0 +1,217 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "misc.h" + +void IdletimeTable::setup(VECTOR(pair_t) & data) { + setup_layout(layout, data); + for (auto &kv : MapIterChecked(data, true)) { + if (kv.key == "precision") { + if (CHECKTYPE(kv.value, tINT)) { + precision = kv.value.i; + if (precision != 1 && precision != 2 && precision != 3 && precision != 6) + error(kv.value.lineno, "Invalid idletime precision %d", precision); + } + } else if (kv.key == "sweep_interval") { + if (CHECKTYPE(kv.value, tINT)) sweep_interval = kv.value.i; + } else if (kv.key == "notification") { + if (kv.value == "disable") + disable_notification = true; + else if (kv.value == "two_way") + two_way_notification = true; + else if (kv.value != "enable") + error(kv.value.lineno, "Unknown notification style '%s'", value_desc(kv.value)); + } else if (kv.key == "per_flow_enable") { + per_flow_enable = get_bool(kv.value); + } else if (kv.key == "context_json") { + setup_context_json(kv.value); + } else if (kv.key == "row" || kv.key == "column" || kv.key == "bus") { + /* already done in setup_layout */ + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } + alloc_rams(false, stage->mapram_use); + for (auto &r : layout) { + if (!r.bus.count(Layout::IDLE_BUS)) continue; + int &idle_bus = r.bus.at(Layout::IDLE_BUS); + if (idle_bus >= IDLETIME_BUSSES) { + error(r.lineno, "bus %d invalid", idle_bus); + continue; + } + if (r.row >= 4 && idle_bus < 10) + idle_bus += 10; + else if (r.row < 4 && idle_bus >= 10) + error(r.lineno, "idletime bus %d not accessable on row %d", idle_bus, r.row); + if (Table *old = stage->idletime_bus_use[idle_bus]) { + if (old != this) + error(r.lineno, + "Table %s trying to use idletime bus %d which is already in " + "use by table %s", + name(), idle_bus, old->name()); + } else { + stage->idletime_bus_use[idle_bus] = this; + } + } +} + +void IdletimeTable::pass1() { + LOG1("### Idletime table " << name() << " pass1 " << loc()); + alloc_vpns(); +} + +void IdletimeTable::pass2() { LOG1("### Idletime table " << name() << " pass2 " << loc()); } + +void IdletimeTable::pass3() { LOG1("### Idletime table " << name() << " pass3 " << loc()); } + +// This is the same as AttachedTable::json_memunit, but IdletimeTable is not a derived class +// of AttachedTable, so we duplicate it +int IdletimeTable::json_memunit(const MemUnit &r) const { + if (r.stage >= 0) { + return r.stage * Target::SRAM_STRIDE_STAGE() + r.row * Target::SRAM_STRIDE_ROW() + + r.col * Target::SRAM_STRIDE_COLUMN(); + } else if (r.row >= 0) { + // per-stage logical sram + return r.row * Target::SRAM_LOGICAL_UNITS_PER_ROW() + r.col; + } else { + // lamb + return r.col; + } +} + +static int precision_bits[] = {0, 0, 1, 2, 0, 0, 3}; + +template +void IdletimeTable::write_merge_regs_vt(REGS ®s, int type, int bus) { + auto &merge = regs.rams.match.merge; + merge.mau_payload_shifter_enable[type][bus].idletime_adr_payload_shifter_en = 1; + merge.mau_idletime_adr_mask[type][bus] = + (~1U << precision_bits[precision]) & ((1U << IDLETIME_ADDRESS_BITS) - 1); + merge.mau_idletime_adr_default[type][bus] = + (1U << IDLETIME_ADDRESS_PER_FLOW_ENABLE_START_BIT) | ((1 << precision_bits[precision]) - 1); +} + +FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void IdletimeTable::write_merge_regs, + (mau_regs & regs, int type, int bus), + { write_merge_regs_vt(regs, type, bus); }) + +int IdletimeTable::precision_shift() const { return precision_bits[precision] + 1; } +int IdletimeTable::direct_shiftcount() const { return 67 - precision_bits[precision]; } + +template +void IdletimeTable::write_regs_vt(REGS ®s) { + LOG1("### Idletime table " << name() << " write_regs " << loc()); + auto &map_alu = regs.rams.map_alu; + auto &adrdist = regs.rams.match.adrdist; + int minvpn = 1000000, maxvpn = -1; + for (Layout &logical_row : layout) + for (auto v : logical_row.vpns) { + if (v < minvpn) minvpn = v; + if (v > maxvpn) maxvpn = v; + } + // regs.cfg_regs.mau_cfg_lt_has_idle |= 1 << logical_id; + for (Layout &row : layout) { + int idle_bus = row.bus.at(Layout::IDLE_BUS); + auto &map_alu_row = map_alu.row[row.row]; + auto &adrmux = map_alu_row.adrmux; + auto vpn = row.vpns.begin(); + for (auto &memunit : row.memunits) { + int col = memunit.col; + BUG_CHECK(memunit.stage == INT_MIN && memunit.row == row.row, "bogus %s in row %d", + memunit.desc(), row.row); + setup_muxctl(map_alu_row.vh_xbars.adr_dist_idletime_adr_xbar_ctl[col], idle_bus % 10); + auto &mapram_cfg = adrmux.mapram_config[col]; + // auto &mapram_ctl = adrmux.mapram_ctl[col]; + if (disable_notification) mapram_cfg.idletime_disable_notification = 1; + if (two_way_notification) mapram_cfg.two_way_idletime_notification = 1; + if (per_flow_enable) mapram_cfg.per_flow_idletime = 1; + mapram_cfg.idletime_bitwidth = precision_bits[precision]; + mapram_cfg.mapram_type = MapRam::IDLETIME; + mapram_cfg.mapram_logical_table = logical_id; + mapram_cfg.mapram_vpn_members = 0; // FIXME + mapram_cfg.mapram_vpn = *vpn++; + if (gress == INGRESS) + mapram_cfg.mapram_ingress = 1; + else + mapram_cfg.mapram_egress = 1; + mapram_cfg.mapram_enable = 1; + if ((precision == 1) || (precision == 2)) { + mapram_cfg.mapram_parity_generate = 1; + mapram_cfg.mapram_parity_check = 1; + } else { + if ((precision != 3) && (precision != 6)) + error(lineno, "Unknown idletime precision = %d", precision); + mapram_cfg.mapram_ecc_generate = 1; + mapram_cfg.mapram_ecc_check = 1; + } + auto &adrmux_ctl = adrmux.ram_address_mux_ctl[1][col]; + adrmux_ctl.map_ram_wadr_mux_select = MapRam::Mux::IDLETIME; + adrmux_ctl.map_ram_wadr_mux_enable = 1; + adrmux_ctl.map_ram_radr_mux_select_smoflo = 1; + adrmux_ctl.ram_ofo_stats_mux_select_statsmeter = 1; + adrmux_ctl.ram_stats_meter_adr_mux_select_idlet = 1; + setup_muxctl(adrmux.idletime_logical_to_physical_sweep_grant_ctl[col], logical_id); + setup_muxctl(adrmux.idletime_physical_to_logical_req_inc_ctl[col], logical_id); + unsigned clear_val = ~(~0U << precision); + if (per_flow_enable || precision == 1) clear_val &= ~1U; + for (unsigned i = 0; i < 8U / precision; i++) + adrmux.idletime_cfg_rd_clear_val[col].set_subfield(clear_val, i * precision, + precision); + if (gress) + regs.cfg_regs.mau_cfg_mram_thread[col / 3U] |= 1U << (col % 3U * 8U + row.row); + } + adrdist.adr_dist_idletime_adr_oxbar_ctl[idle_bus / 4].set_subfield(logical_id | 0x10, + 5 * (idle_bus % 4), 5); + } + // don't enable initially -- runtime will enable + // adrdist.idletime_sweep_ctl[logical_id].idletime_en = 1; + adrdist.idletime_sweep_ctl[logical_id].idletime_sweep_offset = minvpn; + adrdist.idletime_sweep_ctl[logical_id].idletime_sweep_size = layout_size() - 1; + adrdist.idletime_sweep_ctl[logical_id].idletime_sweep_remove_hole_pos = 0; // TODO + adrdist.idletime_sweep_ctl[logical_id].idletime_sweep_remove_hole_en = 0; // TODO + adrdist.idletime_sweep_ctl[logical_id].idletime_sweep_interval = sweep_interval; + auto &idle_dump_ctl = regs.cfg_regs.idle_dump_ctl[logical_id]; + idle_dump_ctl.idletime_dump_offset = minvpn; + idle_dump_ctl.idletime_dump_size = maxvpn; + idle_dump_ctl.idletime_dump_remove_hole_pos = 0; // TODO + idle_dump_ctl.idletime_dump_remove_hole_en = 0; // TODO + adrdist.movereg_idle_ctl[logical_id].movereg_idle_ctl_size = precision_bits[precision]; + adrdist.movereg_idle_ctl[logical_id].movereg_idle_ctl_direct = 1; + adrdist.movereg_ad_direct[MoveReg::IDLE] |= 1 << logical_id; + adrdist.idle_bubble_req[timing_thread(gress)].bubble_req_1x_class_en |= 1 << logical_id; +} +FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void IdletimeTable::write_regs, (mau_regs & regs), + { write_regs_vt(regs); }) + +void IdletimeTable::gen_stage_tbl_cfg(json::map &out) const { + unsigned number_entries = layout_size() * (8U / precision) * SRAM_DEPTH; + json::map &tbl = out["idletime_stage_table"] = json::map(); + tbl["stage_number"] = stage->stageno; + tbl["size"] = number_entries; + tbl["stage_table_type"] = "idletime"; + tbl["precision"] = precision; + tbl["disable_notification"] = disable_notification; + tbl["two_way_notification"] = two_way_notification; + // ?? + tbl["logical_table_id"] = match_table->logical_id; + tbl["enable_pfe"] = per_flow_enable; + add_pack_format(tbl, 11, 1, 8U / precision); + tbl["memory_resource_allocation"] = gen_memory_resource_allocation_tbl_cfg("map_ram", layout); +} diff --git a/backends/tofino/bf-asm/input_xbar.cpp b/backends/tofino/bf-asm/input_xbar.cpp new file mode 100644 index 00000000000..cdee0a81cc8 --- /dev/null +++ b/backends/tofino/bf-asm/input_xbar.cpp @@ -0,0 +1,1137 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "input_xbar.h" + +#include + +#include + +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "hashexpr.h" +#include "lib/log.h" +#include "lib/range.h" +#include "misc.h" +#include "power_ctl.h" + +// template specialization declarations +#include "backends/tofino/bf-asm/jbay/input_xbar.h" +#include "backends/tofino/bf-asm/tofino/input_xbar.h" + +void HashCol::dbprint(std::ostream &out) const { + out << "HashCol: " << " lineno: " << lineno << " bit: " << bit << " data: " << data + << " valid: " << valid; + if (fn) out << " fn: " << *fn << std::endl; +} + +DynamicIXbar::DynamicIXbar(const Table *tbl, const pair_t &data) { + if (CHECKTYPE(data.key, tINT)) { + bit = data.key.i; + if (bit < 0 || bit >= Target::DYNAMIC_CONFIG_INPUT_BITS()) + error(data.key.lineno, "Invalid dynamic config bit %d", bit); + } + if (CHECKTYPE2(data.value, tMAP, tMATCH)) { + if (data.value.type == tMAP) { + for (auto &kv : data.value.map) + if (CHECKTYPE(kv.value, tMATCH)) + match_phv.emplace_back(Phv::Ref(tbl->gress, tbl->stage->stageno, data.key), + data.value.m); + } else { + match = data.value.m; + } + } +} + +int InputXbar::group_max_index(Group::type_t t) const { + switch (t) { + case Group::EXACT: + return EXACT_XBAR_GROUPS; + case Group::TERNARY: + return TCAM_XBAR_GROUPS; + case Group::BYTE: + return BYTE_XBAR_GROUPS; + default: + BUG("invalid group type for %s: %s", Target::name(), group_type(t)); + } + return 0; +} + +InputXbar::Group InputXbar::group_name(bool tern, const value_t &key) const { + if (CHECKTYPE(key, tCMD)) { + int index = 1; + if (key[0] != "group" && (key[1] == "group" || key[1] == "table")) ++index; + if (PCHECKTYPE(key.vec.size == index + 1, key[index], tINT)) { + index = key[index].i; + if (key[0] == "group") return Group(tern ? Group::TERNARY : Group::EXACT, index); + if (key[0] == "exact" && key[1] == "group") return Group(Group::EXACT, index); + if (key[0] == "ternary" && key[1] == "group") return Group(Group::TERNARY, index); + if (key[0] == "byte" && key[1] == "group") return Group(Group::BYTE, index); + } + } + return Group(Group::INVALID, 0); +} + +int InputXbar::group_size(Group::type_t t) const { + switch (t) { + case Group::EXACT: + return EXACT_XBAR_GROUP_SIZE; + case Group::TERNARY: + return TCAM_XBAR_GROUP_SIZE; + case Group::BYTE: + return BYTE_XBAR_GROUP_SIZE; + default: + BUG("invalid group type for %s: %s", Target::name(), group_type(t)); + } + return 0; +} + +const char *InputXbar::group_type(Group::type_t t) const { + switch (t) { + case Group::EXACT: + return "exact"; + case Group::TERNARY: + return "ternary"; + case Group::BYTE: + return "byte"; + case Group::GATEWAY: + return "gateway"; + case Group::XCMP: + return "xcmp"; + default: + return ""; + } +} + +void InputXbar::parse_group(Table *t, Group gr, const value_t &value) { + BUG_CHECK(gr.index >= 0, "invalid group"); + auto &group = groups[gr]; + if (value.type == tVEC) { + for (auto ® : value.vec) group.emplace_back(Phv::Ref(t->gress, t->stage->stageno, reg)); + } else if (value.type == tMAP) { + for (auto ® : value.map) { + if (!CHECKTYPE2(reg.key, tINT, tRANGE)) continue; + int lo = -1, hi = -1; + if (reg.key.type == tINT) { + lo = reg.key.i; + } else { + lo = reg.key.range.lo; + hi = reg.key.range.hi; + } + if (lo < 0 || lo >= group_size(gr.type)) { + error(reg.key.lineno, "Invalid offset for %s group", group_type(gr.type)); + } else if (gr.type == Group::TERNARY && lo >= 40) { + if (hi >= lo) hi -= 40; + groups[Group(Group::BYTE, gr.index / 2)].emplace_back( + Phv::Ref(t->gress, t->stage->stageno, reg.value), lo - 40, hi); + } else { + group.emplace_back(Phv::Ref(t->gress, t->stage->stageno, reg.value), lo, hi); + } + } + } else { + group.emplace_back(Phv::Ref(t->gress, t->stage->stageno, value)); + } +} + +void InputXbar::parse_hash_group(HashGrp &hash_group, const value_t &value) { + if (value.type == tINT && (unsigned)value.i < Target::EXACT_HASH_TABLES()) { + hash_group.tables |= 1U << value.i; + return; + } + if (!CHECKTYPE2(value, tVEC, tMAP)) return; + const VECTOR(value_t) *tbl = 0; + if (value.type == tMAP) { + for (auto &el : MapIterChecked(value.map)) { + if (el.key == "seed") { + if (!CHECKTYPE2(el.value, tINT, tBIGINT)) continue; + if (el.value.type == tBIGINT) { + int shift = 0; + for (int i = 0; i < el.value.bigi.size; ++i) { + if (shift >= 64) { + error(el.key.lineno, "Invalid seed %s too large", + value_desc(&el.value)); + break; + } + hash_group.seed |= el.value.bigi.data[i] << shift; + shift += CHAR_BIT * sizeof(el.value.bigi.data[i]); + } + } else { + hash_group.seed |= el.value.i & 0xFFFFFFFF; + } + } else if (el.key == "table") { + if (el.value.type == tINT) { + if (el.value.i < 0 || el.value.i >= Target::EXACT_HASH_TABLES()) + error(el.value.lineno, "invalid hash group descriptor"); + else + hash_group.tables |= 1U << el.value.i; + } else if (CHECKTYPE(el.value, tVEC)) { + tbl = &el.value.vec; + } + } else if (el.key == "seed_parity") { + if (el.value.type == tSTR && el.value == "true") hash_group.seed_parity = true; + } else { + error(el.key.lineno, "invalid hash group descriptor"); + } + } + } else { + tbl = &value.vec; + } + if (tbl) { + for (auto &v : *tbl) { + if (!CHECKTYPE(v, tINT)) continue; + if (v.i < 0 || v.i >= Target::EXACT_HASH_TABLES()) { + error(v.lineno, "invalid hash group descriptor"); + } else { + hash_group.tables |= 1U << v.i; + } + } + } +} + +void InputXbar::parse_hash_table(Table *t, HashTable ht, const value_t &value) { + if (!CHECKTYPE(value, tMAP)) return; + for (auto &c : value.map) { + if (c.key.type == tINT) { + setup_hash(hash_tables[ht], ht, t->gress, t->stage->stageno, c.value, c.key.lineno, + c.key.i, c.key.i); + } else if (c.key.type == tRANGE) { + setup_hash(hash_tables[ht], ht, t->gress, t->stage->stageno, c.value, c.key.lineno, + c.key.range.lo, c.key.range.hi); + } else if (CHECKTYPEM(c.key, tCMD, "hash column decriptor")) { + if (c.key.vec.size != 2 || c.key[0] != "valid" || c.key[1].type != tINT || + options.target != TOFINO) { + error(c.key.lineno, "Invalid hash column descriptor"); + continue; + } + int col = c.key[1].i; + if (col < 0 || col >= 52) { + error(c.key.lineno, "Hash column out of range"); + continue; + } + if (!CHECKTYPE(c.value, tINT)) continue; + if (hash_tables[ht][col].valid) + error(c.key.lineno, "Hash table %d column %d valid duplicated", ht.index, col); + else if (c.value.i >= 0x10000) + error(c.value.lineno, "Hash valid value out of range"); + else + hash_tables[ht][col].valid = c.value.i; + } + } +} + +void InputXbar::setup_hash(std::map &hash_table, HashTable ht, gress_t gress, + int stage, value_t &what, int lineno, int lo, int hi) { + if (lo < 0 || lo >= hash_num_columns(ht) || hi < 0 || hi >= hash_num_columns(ht)) { + error(lineno, "Hash column out of range"); + return; + } + if (lo == hi) { + if (what.type == tINT || what.type == tBIGINT) { + hash_table[lo].data = get_bitvec(what, 64, "Hash column value out of range"); + return; + } else if ((what.type == tSTR) && (what == "parity")) { + options.disable_gfm_parity = false; + hash_table_parity[ht] = lo; + return; + } + } else if (what.type == tINT && what.i == 0) { + for (int i = lo; i <= hi; ++i) { + hash_table[i].data.setraw(what.i); + } + return; + } + HashExpr *fn = HashExpr::create(gress, stage, what); // TODO Set the crcSize. + if (!fn) return; + fn->build_algorithm(); + int width = fn->width(); + if (width && width != abs(hi - lo) + 1) + error(what.lineno, "hash expression width mismatch (%d != %d)", width, abs(hi - lo) + 1); + int bit = 0; + int errlo = -1; + bool fn_assigned = false; + for (int col : Range(lo, hi)) { + if (hash_table[col].data || hash_table[col].fn) { + if (errlo < 0) errlo = col; + } else { + if (errlo >= 0) { + if (errlo == col - 1) { + error(lineno, "%s column %d duplicated", ht.toString().c_str(), errlo); + } else { + error(lineno, "%s column %d..%d duplicated", ht.toString().c_str(), errlo, + col - 1); + } + errlo = -1; + } + hash_table[col].lineno = what.lineno; + hash_table[col].fn = fn; + hash_table[col].bit = bit++; + fn_assigned = true; + } + } + + if (!fn_assigned) delete fn; + + if (errlo >= 0) { + error(lineno, "%s column %d..%d duplicated", ht.toString().c_str(), errlo, hi); + } +} + +void InputXbar::input(Table *t, bool tern, const VECTOR(pair_t) & data) { + for (auto &kv : data) { + if ((kv.key.type == tSTR) && (kv.key == "random_seed")) { + random_seed = kv.value.i; + continue; + } + if (kv.key.type == tCMD && kv.key.vec.size == 2 && kv.key[1] == "unit" && + parse_unit(t, kv)) { + continue; + } + if (auto grp = group_name(tern, kv.key)) { + if (grp.index >= group_max_index(grp.type)) { + error(kv.key.lineno, "invalid group descriptor"); + continue; + } + parse_group(t, grp, kv.value); + } else if (kv.key.type == tCMD && kv.key[0] == "hash") { + if (!CHECKTYPE(kv.key.vec.back(), tINT)) continue; + int index = kv.key.vec.back().i; + if (kv.key[1] == "group") { + if (index >= Target::EXACT_HASH_GROUPS()) { + error(kv.key.lineno, "invalid hash group descriptor"); + continue; + } + if (hash_groups[index].lineno >= 0) { + // FIXME -- should be an error? but the compiler generates it this way + warning(kv.key.lineno, "duplicate hash group %d, will merge with", index); + warning(hash_groups[index].lineno, "previous definition here"); + } + hash_groups[index].lineno = kv.key.lineno; + parse_hash_group(hash_groups[index], kv.value); + } else if (index >= Target::EXACT_HASH_TABLES()) { + error(kv.key.lineno, "invalid hash descriptor"); + } else { + parse_hash_table(t, HashTable(HashTable::EXACT, index), kv.value); + } + } else if (kv.key.type == tCMD && kv.key[1] == "hash" && parse_hash(t, kv)) { + continue; + } else { + error(kv.key.lineno, "expecting a group or hash descriptor"); + } + } +} + +std::unique_ptr InputXbar::create(Table *table, const value_t *key) { + if (key && key->type != tSTR) + error(key->lineno, "%s does not support dynamic key mux", Target::name()); + return std::unique_ptr(new InputXbar(table, key ? key->lineno : -1)); +} + +std::unique_ptr InputXbar::create(Table *table, bool tern, const value_t &key, + const VECTOR(pair_t) & data) { + auto rv = create(table, &key); + rv->input(table, tern, data); + return rv; +} + +unsigned InputXbar::tcam_width() { + unsigned words = 0, bytes = 0; + for (auto &group : groups) { + if (group.first.type != Group::TERNARY) { + if (group.first.type == Group::BYTE) ++bytes; + continue; + } + unsigned in_word = 0, in_byte = 0; + for (auto &input : group.second) { + if (input.lo < 40) in_word = 1; + if (input.lo >= 40 || input.hi >= 40) in_byte = 1; + } + words += in_word; + bytes += in_byte; + } + if (bytes * 2 > words) error(lineno, "Too many byte groups in tcam input xbar"); + return words; +} + +int InputXbar::tcam_byte_group(int idx) { + for (auto &group : groups) { + if (group.first.type != Group::TERNARY) continue; + for (auto &input : group.second) + if (input.lo >= 40 || input.hi >= 40) { + if (--idx < 0) return group.first.index / 2; + break; + } + } + return -1; +} + +int InputXbar::tcam_word_group(int idx) { + for (auto &group : groups) { + if (group.first.type != Group::TERNARY) continue; + for (auto &input : group.second) + if (input.lo < 40) { + if (--idx < 0) return group.first.index; + break; + } + } + return -1; +} + +const std::map &InputXbar::get_hash_table(HashTable id) { + for (auto &ht : hash_tables) + if (ht.first == id) return ht.second; + warning(lineno, "%s does not exist in table %s", id.toString().c_str(), table->name()); + static const std::map empty_hash_table = {}; + return empty_hash_table; +} + +bool InputXbar::conflict(const std::vector &a, const std::vector &b) { + for (auto &i1 : a) { + if (i1.lo < 0) continue; + for (auto &i2 : b) { + if (i2.lo < 0) continue; + if (i2.lo > i1.hi || i1.lo > i2.hi) continue; + if (i1.what->reg != i2.what->reg) return true; + if (i1.lo - i1.what->lo != i2.lo - i2.what->lo) return true; + } + } + return false; +} + +bool InputXbar::conflict(const std::map &a, const std::map &b, + int *col) { + for (auto &acol : a) { + if (auto bcol = ::getref(b, acol.first)) { + if (acol.second.data != bcol->data || acol.second.valid != bcol->valid) { + if (col) *col = acol.first; + return true; + } + } + } + return false; +} + +bool InputXbar::conflict(const HashGrp &a, const HashGrp &b) { + if (a.tables != b.tables) return true; + if (a.seed && b.seed && a.seed != b.seed) return true; + return false; +} + +uint64_t InputXbar::hash_columns_used(HashTable hash) { + uint64_t rv = 0; + if (hash_tables.count(hash)) + for (auto &col : hash_tables[hash]) rv |= UINT64_C(1) << col.first; + return rv; +} + +/* FIXME -- this is questionable, but the compiler produces hash groups that conflict + * FIXME -- so we try to tag ones that may be ok as merely warnings */ +bool InputXbar::can_merge(HashGrp &a, HashGrp &b) { + unsigned both = a.tables & b.tables; + uint64_t both_cols = 0, a_cols = 0, b_cols = 0; + for (unsigned i = 0; i < 16; i++) { + unsigned mask = 1U << i; + if (!((a.tables | b.tables) & mask)) continue; + for (InputXbar *other : table->stage->hash_table_use[i]) { + if (both & mask) both_cols |= other->hash_columns_used(i); + if (a.tables & mask) a_cols |= other->hash_columns_used(i); + if (b.tables & mask) b_cols |= other->hash_columns_used(i); + for (auto htp : hash_table_parity) { + if (other->hash_table_parity.count(htp.first) && + other->hash_table_parity.at(htp.first) != htp.second) + return false; + } + } + } + a_cols &= ~both_cols; + b_cols &= ~both_cols; + if (a_cols & b_cols) return false; + if ((a_cols & b.seed & ~a.seed) || (b_cols & a.seed & ~b.seed)) return false; + if (a.tables && b.tables) { + a.tables |= b.tables; + b.tables |= a.tables; + } + if (a.seed && b.seed) { + a.seed |= b.seed; + b.seed |= a.seed; + } + return true; +} + +static int tcam_swizzle_offset[4][4] = { + {0, +1, -2, -1}, + {+3, 0, +1, -2}, + {+2, -1, 0, -3}, + {+1, +2, -1, 0}, +}; + +// FIXME -- when swizlling 16 bit PHVs, there are 2 places we could copy from, but +// FIXME -- we only consider the closest/easiest +static int tcam_swizzle_16[2][2]{{0, -1}, {+1, 0}}; + +int InputXbar::tcam_input_use(int out_byte, int phv_byte, int phv_size) { + int rv = out_byte; + BUG_CHECK(phv_byte >= 0 && phv_byte < phv_size / 8); + switch (phv_size) { + case 8: + break; + case 32: + rv += tcam_swizzle_offset[out_byte & 3][phv_byte]; + break; + case 16: + rv += tcam_swizzle_16[out_byte & 1][phv_byte]; + break; + default: + BUG(); + } + return rv; +} + +void InputXbar::tcam_update_use(TcamUseCache &use) { + if (use.ixbars_added.count(this)) return; + use.ixbars_added.insert(this); + for (auto &group : groups) { + if (group.first.type == Group::EXACT) continue; + for (auto &input : group.second) { + if (input.lo < 0) continue; + int group_base = (group.first.index * 11 + 1) / 2U; + int half_byte = 5 + 11 * (group.first.index / 2U); + if (group.first.type == Group::BYTE) { + group_base = 5 + 11 * group.first.index; + half_byte = -1; + } + int group_byte = input.lo / 8; + for (int phv_byte = input.what->lo / 8; phv_byte <= input.what->hi / 8; + phv_byte++, group_byte++) { + BUG_CHECK(group_byte <= 5); + int out_byte = group_byte == 5 ? half_byte : group_base + group_byte; + int in_byte = tcam_input_use(out_byte, phv_byte, input.what->reg.size); + use.tcam_use.emplace(in_byte, std::pair(input, phv_byte)); + } + } + } +} + +void InputXbar::check_input(InputXbar::Group group, Input &input, TcamUseCache &use) { + if (group.type == Group::EXACT) { + if (input.lo % input.what->reg.size != input.what->lo) + error(input.what.lineno, "%s misaligned on input_xbar", input.what.name()); + return; + } + unsigned bit_align_mask = input.lo >= 40 ? 3 : 7; + unsigned byte_align_mask = (input.what->reg.size - 1) >> 3; + int group_base = (group.index * 11 + 1) / 2U; + int half_byte = 5 + 11 * (group.index / 2U); + if (group.type == Group::BYTE) { + bit_align_mask = 3; + group_base = 5 + 11 * group.index; + half_byte = -1; + } + int group_byte = input.lo / 8; + if ((input.lo ^ input.what->lo) & bit_align_mask) { + error(input.what.lineno, "%s misaligned on input_xbar", input.what.name()); + return; + } + for (int phv_byte = input.what->lo / 8; phv_byte <= input.what->hi / 8; + phv_byte++, group_byte++) { + BUG_CHECK(group_byte <= 5); + int out_byte = group_byte == 5 ? half_byte : group_base + group_byte; + int in_byte = tcam_input_use(out_byte, phv_byte, input.what->reg.size); + if (in_byte < 0 || in_byte >= TCAM_XBAR_INPUT_BYTES) { + error(input.what.lineno, "%s misaligned on input_xbar", input.what.name()); + break; + } + auto *tbl = table->stage->tcam_ixbar_input[in_byte]; + if (tbl) { + BUG_CHECK(tbl->input_xbar.size() == 1, "%s does not have one input xbar", tbl->name()); + tbl->input_xbar[0]->tcam_update_use(use); + } + if (use.tcam_use.count(in_byte)) { + if (use.tcam_use.at(in_byte).first.what->reg != input.what->reg || + use.tcam_use.at(in_byte).second != phv_byte) { + error(input.what.lineno, "Use of tcam ixbar for %s", input.what.name()); + error(use.tcam_use.at(in_byte).first.what.lineno, "...conflicts with %s", + use.tcam_use.at(in_byte).first.what.name()); + break; + } + } else { + use.tcam_use.emplace(in_byte, std::pair(input, phv_byte)); + table->stage->tcam_ixbar_input[in_byte] = tbl; + } + } +} + +bool InputXbar::copy_existing_hash(HashTable ht, std::pair &col) { + for (InputXbar *other : table->stage->hash_table_use[ht.index]) { + if (other == this) continue; + if (other->hash_tables.count(ht)) { + auto &o = other->hash_tables.at(ht); + if (o.count(col.first)) { + auto ocol = o.at(col.first); + if (ocol.fn && *ocol.fn == *col.second.fn) { + col.second.data = ocol.data; + return true; + } + } + } + } + return false; +} + +void InputXbar::gen_hash_column(std::pair &col, + std::pair> &hash) { + col.second.fn->gen_data(col.second.data, col.second.bit, this, hash.first); +} + +void InputXbar::pass1() { + TcamUseCache tcam_use; + tcam_use.ixbars_added.insert(this); + if (random_seed >= 0) srandom(random_seed); + for (auto &group : groups) { + for (auto &input : group.second) { + if (!input.what.check()) continue; + if (input.what->reg.ixbar_id() < 0) + error(input.what.lineno, "%s not accessable in input xbar", input.what->reg.name); + table->stage->match_use[table->gress][input.what->reg.uid] = 1; + if (input.lo < 0 && group.first.type == Group::BYTE) input.lo = input.what->lo % 8U; + if (input.lo >= 0) { + if (input.hi >= 0) { + if (input.size() != input.what->size()) + error(input.what.lineno, "Input xbar size doesn't match register size"); + } else { + input.hi = input.lo + input.what->size() - 1; + } + if (input.lo >= group_size(group.first.type)) + error(input.what.lineno, "placing %s off the top of the input xbar", + input.what.name()); + } + check_input(group.first, input, tcam_use); + } + auto &use = table->stage->ixbar_use; + for (InputXbar *other : use[group.first]) { + if (other->groups.count(group.first) && + conflict(other->groups.at(group.first), group.second)) { + error(lineno, "Input xbar group %d conflict in stage %d", group.first.index, + table->stage->stageno); + warning(other->lineno, "conflicting group definition here"); + } + } + use[group.first].push_back(this); + } + for (auto &hash : hash_tables) { + bool ok = true; + HashExpr *prev = 0; + for (auto &col : hash.second) { + if (col.second.fn && col.second.fn != prev) + ok = (prev = col.second.fn)->check_ixbar(this, hash.first); + if (ok && col.second.fn && !copy_existing_hash(hash.first, col)) { + gen_hash_column(col, hash); + } + } + bool add_to_use = true; + for (InputXbar *other : table->stage->hash_table_use[hash.first.uid()]) { + if (other == this) { + add_to_use = false; + continue; + } + int column; + if (other->hash_tables.count(hash.first) && + conflict(other->hash_tables[hash.first], hash.second, &column)) { + error(hash.second.at(column).lineno, "%s column %d conflict in stage %d", + hash.first.toString().c_str(), column, table->stage->stageno); + error(other->hash_tables[hash.first].at(column).lineno, + "conflicting hash definition here"); + } + } + if (add_to_use) table->stage->hash_table_use[hash.first.uid()].push_back(this); + } + for (auto &group : hash_groups) { + bool add_to_use = true; + for (InputXbar *other : table->stage->hash_group_use[group.first]) { + if (other == this) { + add_to_use = false; + break; + } + if (other->hash_groups.count(group.first) && + conflict(other->hash_groups[group.first], group.second)) { + if (can_merge(other->hash_groups[group.first], group.second)) + warning(group.second.lineno, + "Input xbar hash group %d mergeable conflict " + "in stage %d", + group.first, table->stage->stageno); + else + error(group.second.lineno, "Input xbar hash group %d conflict in stage %d", + group.first, table->stage->stageno); + warning(other->hash_groups[group.first].lineno, + "conflicting hash group definition here"); + } + } + if (add_to_use) table->stage->hash_group_use[group.first].push_back(this); + } +} + +void InputXbar::add_use(unsigned &byte_use, std::vector &inputs) { + for (auto &i : inputs) { + if (i.lo < 0) continue; + for (int byte = i.lo / 8; byte <= i.hi / 8; byte++) byte_use |= 1 << byte; + ; + } +} + +const InputXbar::Input *InputXbar::GroupSet::find(Phv::Slice sl) const { + for (InputXbar *i : use) + if (auto rv = i->find(sl, group)) return rv; + return 0; +} + +std::vector InputXbar::GroupSet::find_all(Phv::Slice sl) const { + std::vector rv; + for (const InputXbar *i : use) { + auto vec = i->find_all(sl, group); + rv.insert(rv.end(), vec.begin(), vec.end()); + } + return rv; +} + +void InputXbar::GroupSet::dbprint(std::ostream &out) const { + std::map byte_use; + for (const InputXbar *ixbar : use) { + if (ixbar->groups.count(group)) { + for (auto &i : ixbar->groups.at(group)) { + if (i.lo < 0) continue; + for (int byte = i.lo / 8; byte <= i.hi / 8; byte++) byte_use[byte] = &i; + } + } + } + const InputXbar::Input *prev = 0; + for (auto &in : byte_use) { + if (prev == in.second) continue; + if (prev) out << ", "; + prev = in.second; + out << prev->what << ':' << prev->lo << ".." << prev->hi; + } +} + +void InputXbar::pass2() { + auto &use = table->stage->ixbar_use; + for (auto &group : groups) { + unsigned bytes_in_use = 0; + for (auto &input : group.second) { + if (input.lo >= 0) continue; + if (auto *at = GroupSet(use, group.first).find(*input.what)) { + input.lo = at->lo; + input.hi = at->hi; + LOG1(input.what << " found in bytes " << at->lo / 8 << ".." << at->hi / 8 << " of " + << group.first << " in stage " << table->stage->stageno); + continue; + } + if (bytes_in_use == 0) + for (InputXbar *other : table->stage->ixbar_use[group.first]) + if (other->groups.count(group.first)) + add_use(bytes_in_use, other->groups.at(group.first)); + int need = input.what->hi / 8U - input.what->lo / 8U + 1; + unsigned mask = (1U << need) - 1; + int max = (group_size(group.first.type) + 7) / 8 - need; + for (int i = 0; i <= max; i++, mask <<= 1) + if (!(bytes_in_use & mask)) { + input.lo = i * 8 + input.what->lo % 8U; + input.hi = (i + need - 1) * 8 + input.what->hi % 8U; + bytes_in_use |= mask; + LOG1("Putting " << input.what << " in bytes " << i << ".." << i + need - 1 + << " of " << group.first << " in stage " + << table->stage->stageno); + break; + } + if (input.lo < 0) { + error(input.what.lineno, "No space in input xbar %s group %d for %s", + group_type(group.first.type), group.first.index, input.what.name()); + LOG1("Failed to put " << input.what << " into " << group.first << " in stage " + << table->stage->stageno); + LOG1(" inuse: " << GroupSet(use, group.first)); + } + } + } + for (auto &hash : hash_tables) { + for (auto &col : hash.second) { + if (!col.second.data && col.second.fn) { + gen_hash_column(col, hash); + } + } + } +} + +template +void InputXbar::write_regs(REGS ®s) { + LOG1("### Input xbar " << table->name() << " write_regs " << table->loc()); + auto &xbar = regs.dp.xbar_hash.xbar; + auto gress = timing_thread(table->gress); + for (auto &group : groups) { + if (group.second.empty()) continue; + LOG1(" # Input xbar group " << group.first); + unsigned group_base = 0; + unsigned half_byte = 0; + unsigned bytes_used = 0; + switch (group.first.type) { + case Group::EXACT: + group_base = group.first.index * 16U; + break; + case Group::TERNARY: + group_base = 128 + (group.first.index * 11 + 1) / 2U; + half_byte = 133 + 11 * (group.first.index / 2U); + xbar.mau_match_input_xbar_ternary_match_enable[gress] |= + 1 << (group.first.index) / 2U; + break; + case Group::BYTE: + group_base = 133 + 11 * group.first.index; + xbar.mau_match_input_xbar_ternary_match_enable[gress] |= 1 << (group.first.index); + break; + default: + BUG(); + } + for (auto &input : group.second) { + BUG_CHECK(input.lo >= 0); + unsigned word_group = 0, word_index = 0, swizzle_mask = 0; + bool hi_enable = false; + switch (input.what->reg.size) { + case 8: + word_group = (input.what->reg.ixbar_id() - 64) / 8U; + word_index = (input.what->reg.ixbar_id() - 64) % 8U + (word_group & 4) * 2; + swizzle_mask = 0; + break; + case 16: + word_group = (input.what->reg.ixbar_id() - 128) / 12U; + word_index = + (input.what->reg.ixbar_id() - 128) % 12U + 16 + (word_group & 4) * 3; + swizzle_mask = 1; + break; + case 32: + word_group = input.what->reg.ixbar_id() / 8U; + word_index = input.what->reg.ixbar_id() % 8U; + hi_enable = word_group & 4; + swizzle_mask = 3; + break; + default: + BUG(); + } + word_group &= 3; + unsigned phv_byte = input.what->lo / 8U; + unsigned phv_size = input.what->reg.size / 8U; + for (unsigned byte = input.lo / 8U; byte <= input.hi / 8U; byte++, phv_byte++) { + bytes_used |= 1U << byte; + unsigned i = group_base + byte; + if (half_byte && byte == 5) i = half_byte; + if (i % phv_size != phv_byte) { + if (group.first.type != Group::EXACT) { + int off; + if (phv_size == 2) + off = (i & 2) ? -1 : 1; + else + off = tcam_swizzle_offset[i & 3][phv_byte]; + xbar.tswizzle.tcam_byte_swizzle_ctl[(i & 0x7f) / 4U].set_subfield( + off & 3U, 2 * (i % 4U), 2); + i += off; + } else { + error(input.what.lineno, "misaligned phv access on input_xbar"); + } + } + if (input.what->reg.ixbar_id() < 64) { + BUG_CHECK(input.what->reg.size == 32); + xbar.match_input_xbar_32b_ctl[word_group][i].match_input_xbar_32b_ctl_address = + word_index; + if (hi_enable) + xbar.match_input_xbar_32b_ctl[word_group][i] + .match_input_xbar_32b_ctl_hi_enable = 1; + else + xbar.match_input_xbar_32b_ctl[word_group][i] + .match_input_xbar_32b_ctl_lo_enable = 1; + } else { + xbar.match_input_xbar_816b_ctl[word_group][i] + .match_input_xbar_816b_ctl_address = word_index; + xbar.match_input_xbar_816b_ctl[word_group][i].match_input_xbar_816b_ctl_enable = + 1; + } + if ((i ^ phv_byte) & swizzle_mask) + error(input.what.lineno, "Need tcam swizzle for %s", + input.what.toString().c_str()); + } + auto &power_ctl = regs.dp.match_input_xbar_din_power_ctl; + // we do in fact want mau_id, not ixbar_id here! + set_power_ctl_reg(power_ctl, input.what->reg.mau_id()); + } + if (group.first.type == Group::EXACT) { + unsigned enable = 0; + if (bytes_used & 0xff) enable |= 1; + if (bytes_used & 0xff00) enable |= 2; + enable <<= group.first.index * 2; + regs.dp.mau_match_input_xbar_exact_match_enable[gress].rewrite(); + regs.dp.mau_match_input_xbar_exact_match_enable[gress] |= enable; + } + } + auto &hash = regs.dp.xbar_hash.hash; + for (auto &ht : hash_tables) { + if (ht.second.empty()) continue; + LOG1(" # Input xbar hash table " << ht.first); + write_galois_matrix(regs, ht.first, ht.second); + } + for (auto &hg : hash_groups) { + LOG1(" # Input xbar hash group " << hg.first); + int grp = hg.first; + if (hg.second.tables) { + hash.parity_group_mask[grp][0] = hg.second.tables & 0xff; + hash.parity_group_mask[grp][1] = (hg.second.tables >> 8) & 0xff; + regs.dp.mau_match_input_xbar_exact_match_enable[gress].rewrite(); + regs.dp.mau_match_input_xbar_exact_match_enable[gress] |= hg.second.tables; + } + if (hg.second.seed) { + for (int bit = 0; bit < 52; ++bit) { + if ((hg.second.seed >> bit) & 1) { + hash.hash_seed[bit] |= UINT64_C(1) << grp; + } + } + } + if (gress == INGRESS) + regs.dp.hashout_ctl.hash_group_ingress_enable |= 1 << grp; + else + regs.dp.hashout_ctl.hash_group_egress_enable |= 1 << grp; + // Set hash parity check if enabled. The hash parity column data is set + // in pass2 + if (hg.second.tables && !options.disable_gfm_parity) { + // Enable check if parity bit is set on all tables in hash group + int parity_bit = -1; + for (int index : bitvec(hg.second.tables)) { + HashTable ht(HashTable::EXACT, index); + if (!hash_table_parity.count(ht)) { + continue; + } else { + if (parity_bit == -1) { + parity_bit = hash_table_parity[ht]; + } else { + if (hash_table_parity[ht] != parity_bit) + error(hg.second.lineno, + "Hash tables within a hash group " + "do not have the same parity bit - %d", + grp); + } + } + } + if (parity_bit >= 0) { + regs.dp.hashout_ctl.hash_parity_check_enable |= 1 << grp; + // Hash seed must have even parity for the group. Loop through + // all bits set on the group for hash seed to determine if the + // parity bit must be set + int seed_parity = 0; + for (int bit = 0; bit < 52; ++bit) { + auto seed_bit = (hash.hash_seed[bit] >> grp) & 0x1; + seed_parity ^= seed_bit; + } + if (seed_parity) { // flip parity bit setup on group for even parity + if (!hg.second.seed_parity) + warning(hg.second.lineno, + "hash group %d has parity enabled, but setting seed_parity" + " is disabled, changing seed to even parity", + grp); + hash.hash_seed[parity_bit] ^= (1 << grp); + } + } + } + } +} + +template void InputXbar::write_regs(Target::Tofino::mau_regs &); +template void InputXbar::write_regs(Target::JBay::mau_regs &); + +template +void InputXbar::write_xmu_regs(REGS ®s) { + BUG("no XMU regs for %s", Target::name()); +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void InputXbar::write_xmu_regs, mau_regs &) + +const InputXbar::Input *InputXbar::find(Phv::Slice sl, Group grp, Group *found) const { + const InputXbar::Input *rv = nullptr; + if (groups.count(grp)) { + for (auto &in : groups.at(grp)) { + if (in.lo < 0) continue; + if (in.what->reg.uid != sl.reg.uid) continue; + if (in.what->lo / 8U > sl.lo / 8U) continue; + if (in.what->hi / 8U < sl.hi / 8U) continue; + rv = ∈ + if (in.what->lo > sl.lo) continue; + if (in.what->hi < sl.hi) continue; + if (found) *found = grp; + return ∈ + } + } else if (grp.index == -1) { + for (auto &g : Keys(groups)) { + if (g.type != grp.type) continue; + if ((rv = find(sl, g))) { + if (found) *found = g; + return rv; + } + } + } + return rv; +} + +int InputXbar::find_offset(const MatchSource *, Group, int) const { + BUG("find_offset should not be needed on %s", Target::name()); +} + +std::vector InputXbar::find_all(Phv::Slice sl, Group grp) const { + std::vector rv; + if (groups.count(grp)) { + for (auto &in : groups.at(grp)) { + if (in.lo < 0) continue; + if (in.what->reg.uid != sl.reg.uid) continue; + if (in.what->lo / 8U > sl.lo / 8U) continue; + if (in.what->hi / 8U < sl.hi / 8U) continue; + rv.push_back(&in); + } + } else if (grp.index == -1) { + for (auto &g : Keys(groups)) { + if (g.type != grp.type) continue; + auto tmp = find_all(sl, g); + rv.insert(rv.end(), tmp.begin(), tmp.end()); + } + } + return rv; +} + +/** + * InputXbar::find_hash_inputs: find all of the ixbar inputs that feed a particular phv slice + * to a hash table + * @param sl the PHV container slice we're interested in + * @param hash_table which hash table we want the input for (-1 for all hash tables) + */ +std::vector InputXbar::find_hash_inputs(Phv::Slice sl, + HashTable ht) const { + /* code for tofino1/2 -- all hash tables take input from exact ixbar groups, with + * two hash tables per group (even in lower bits and odd in upper bits) + */ + BUG_CHECK(ht.type == HashTable::EXACT, "not an exact hash table: %s", ht.toString().c_str()); + auto rv = find_all(sl, Group(Group::EXACT, ht.index >= 0 ? ht.index / 2 : -1)); + if (ht.index >= 0) { + unsigned upper = ht.index % 2; + for (auto it = rv.begin(); it != rv.end();) { + unsigned bit = (*it)->lo + (sl.lo - (*it)->what->lo); + if (bit / 64 != upper || (bit + sl.size() - 1) / 64 != upper) + it = rv.erase(it); + else + ++it; + } + } + return rv; +} + +bitvec InputXbar::hash_group_bituse(int grp) const { + bitvec rv; + unsigned tables = 0; + for (auto &g : hash_groups) { + if (grp == -1 || static_cast(g.first) == grp) { + tables |= g.second.tables; + rv |= g.second.seed; + } + } + for (auto &tbl : hash_tables) { + if (tbl.first.type != HashTable::EXACT) continue; + if (!((tables >> tbl.first.index) & 1)) continue; + // Skip parity bit if set on hash table + auto hash_parity_bit = -1; + if (hash_table_parity.count(tbl.first)) { + hash_parity_bit = hash_table_parity.at(tbl.first); + } + for (auto &col : tbl.second) { + if (col.first == hash_parity_bit) continue; + rv[col.first] = 1; + } + } + return rv; +} + +// Used by LPF/WRED meters to determine the bytemask input +bitvec InputXbar::bytemask() { + bitvec bytemask; + // Only one ixbar group allowed for a meter input + if (match_group() == -1) return bytemask; + for (auto group : groups) { + auto &inputs = group.second; + for (auto &input : inputs) { + int byte_lo = input.lo / 8; + int byte_hi = input.hi / 8; + int byte_size = byte_hi - byte_lo + 1; + bytemask.setrange(byte_lo, byte_size); + } + } + return bytemask; +} + +std::vector InputXbar::hash_column(int col, int grp) const { + unsigned tables = 0; + std::vector rv; + for (auto &g : hash_groups) + if (grp == -1 || static_cast(g.first) == grp) tables |= g.second.tables; + for (auto &tbl : hash_tables) { + if (tbl.first.type != HashTable::EXACT) continue; + if (!((tables >> tbl.first.index) & 1)) continue; + if (const HashCol *c = getref(tbl.second, col)) rv.push_back(c); + } + return rv; +} + +bool InputXbar::log_hashes(std::ofstream &out) const { + bool logged = false; + for (auto &ht : hash_tables) { + // ht.first is HashTable + // ht.second is std::map, key is col + if (ht.second.empty()) continue; + out << std::endl << ht.first << std::endl; + logged = true; + for (auto &col : ht.second) { + // col.first is hash result bit + // col.second is bits XOR'd in + out << "result[" << col.first << "] = "; + out << get_seed_bit(ht.first.index / 2, col.first); + for (const auto &bit : col.second.data) { + if (auto ref = get_hashtable_bit(ht.first, bit)) { + std::string field_name = ref.name(); + auto field_bit = remove_name_tail_range(field_name) + ref.lobit(); + out << " ^ " << field_name << "[" << field_bit << "]"; + } + } + out << std::endl; + } + } + return logged; +} + +std::string InputXbar::HashTable::toString() const { + std::stringstream tmp; + tmp << *this; + return tmp.str(); +} + +unsigned InputXbar::HashTable::uid() const { + switch (type) { + case EXACT: + BUG_CHECK(index < Target::EXACT_HASH_TABLES(), "index too large: %s", + toString().c_str()); + return index; + case XCMP: + return index + Target::EXACT_HASH_TABLES(); + default: + BUG("invalid type: %s", toString().c_str()); + } +} diff --git a/backends/tofino/bf-asm/input_xbar.h b/backends/tofino/bf-asm/input_xbar.h new file mode 100644 index 00000000000..f5d64a18d8c --- /dev/null +++ b/backends/tofino/bf-asm/input_xbar.h @@ -0,0 +1,367 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_INPUT_XBAR_H_ +#define BACKENDS_TOFINO_BF_ASM_INPUT_XBAR_H_ + +#include + +#include "backends/tofino/bf-utils/dynamic_hash/dynamic_hash.h" +#include "constants.h" +#include "lib/ordered_map.h" +#include "phv.h" + +class Table; +class HashExpr; + +struct HashCol { + int lineno = -1; + HashExpr *fn = 0; + int bit = 0; + bitvec data; + unsigned valid = 0; // Used only in Tofino + void dbprint(std::ostream &out) const; +}; + +inline std::ostream &operator<<(std::ostream &out, HashCol &col) { + col.dbprint(out); + return out; +} + +struct DynamicIXbar { + int bit = -1; + std::vector> match_phv; + match_t match; + + DynamicIXbar() = default; + DynamicIXbar(const DynamicIXbar &) = default; + DynamicIXbar(DynamicIXbar &&) = default; + DynamicIXbar &operator=(const DynamicIXbar &) = default; + DynamicIXbar &operator=(DynamicIXbar &&) = default; + DynamicIXbar(const Table *, const pair_t &); +}; + +class InputXbar { + public: + struct Group { + short index; + enum type_t { INVALID, EXACT, TERNARY, BYTE, GATEWAY, XCMP } type; + Group() : index(-1), type(INVALID) {} + Group(Group::type_t t, int i) : index(i), type(t) {} + explicit operator bool() const { return type != INVALID; } + bool operator==(const Group &a) const { return type == a.type && index == a.index; } + bool operator<(const Group &a) const { + return (type << 16) + index < (a.type << 16) + a.index; + } + }; + struct HashTable { + short index; + enum type_t { INVALID, EXACT, XCMP } type; + HashTable() : index(-1), type(INVALID) {} + HashTable(type_t t, int i) : index(i), type(t) {} + explicit operator bool() const { return type != INVALID; } + bool operator==(const HashTable &a) const { return type == a.type && index == a.index; } + bool operator<(const HashTable &a) const { + return (type << 16) + index < (a.type << 16) + a.index; + } + std::string toString() const; + unsigned uid() const; + }; + + protected: + struct Input { + Phv::Ref what; + int lo, hi; + explicit Input(const Phv::Ref &a) : what(a), lo(-1), hi(-1) {} + Input(const Phv::Ref &a, int s) : what(a), lo(s), hi(-1) {} + Input(const Phv::Ref &a, int l, int h) : what(a), lo(l), hi(h) {} + unsigned size() const { return hi - lo + 1; } + }; + struct HashGrp { + int lineno = -1; + unsigned tables = 0; // Bit set for table index + uint64_t seed = 0; + bool seed_parity = false; // Parity to be set on the seed value + }; + Table *table; + ordered_map> groups; + std::map> hash_tables; + // Map of hash table index to parity bit set on the table + std::map hash_table_parity; + std::map hash_groups; + static bool conflict(const std::vector &a, const std::vector &b); + static bool conflict(const std::map &, const std::map &, int * = 0); + static bool conflict(const HashGrp &a, const HashGrp &b); + bool copy_existing_hash(HashTable ht, std::pair &col); + uint64_t hash_columns_used(HashTable hash); + uint64_t hash_columns_used(unsigned id) { + BUG_CHECK(id < Target::EXACT_HASH_TABLES(), "%d out of range for exact hash", id); + return hash_columns_used(HashTable(HashTable::EXACT, id)); + } + bool can_merge(HashGrp &a, HashGrp &b); + void add_use(unsigned &byte_use, std::vector &a); + virtual int hash_num_columns(HashTable ht) const { return 52; } + virtual int group_max_index(Group::type_t t) const; + virtual Group group_name(bool ternary, const value_t &value) const; + virtual int group_size(Group::type_t t) const; + const char *group_type(Group::type_t t) const; + void parse_group(Table *t, Group gr, const value_t &value); + virtual bool parse_hash(Table *t, const pair_t &kv) { return false; } + void parse_hash_group(HashGrp &hash_group, const value_t &value); + void parse_hash_table(Table *t, HashTable ht, const value_t &value); + virtual bool parse_unit(Table *t, const pair_t &kv) { return false; } + void setup_hash(std::map &, HashTable ht, gress_t, int stage, value_t &, + int lineno, int lo, int hi); + struct TcamUseCache { + std::map> tcam_use; + std::set ixbars_added; + }; + virtual void check_input(Group group, Input &input, TcamUseCache &tcam_use); + int tcam_input_use(int out_byte, int phv_byte, int phv_size); + void tcam_update_use(TcamUseCache &use); + void gen_hash_column(std::pair &col, + std::pair> &hash); + + struct GroupSet : public IHasDbPrint { + Group group; + const std::vector &use; + GroupSet(const std::vector &u, Group g) : group(g), use(u) {} + GroupSet(ordered_map> &u, Group g) : group(g), use(u[g]) {} + void dbprint(std::ostream &) const; + const Input *find(Phv::Slice sl) const; + std::vector find_all(Phv::Slice sl) const; + }; + + InputXbar() = delete; + InputXbar(const InputXbar &) = delete; + void input(Table *table, bool ternary, const VECTOR(pair_t) & data); + InputXbar(Table *table, int lineno) : table(table), lineno(lineno) {} + + public: + const int lineno; + int random_seed = -1; + static std::unique_ptr create(Table *table, const value_t *key = nullptr); + static std::unique_ptr create(Table *table, bool tern, const value_t &key, + const VECTOR(pair_t) & data); + void pass1(); + virtual void pass2(); + template + void write_regs(REGS ®s); + template + void write_xmu_regs(REGS ®s); + template + void write_galois_matrix(REGS ®s, HashTable id, const std::map &mat); + bool have_exact() const { + for (auto &grp : groups) + if (grp.first.type == Group::EXACT) return true; + return false; + } + bool have_ternary() const { + for (auto &grp : groups) + if (grp.first.type != Group::EXACT) return true; + return false; + } + int hash_group() const { + /* used by gateways to get the associated hash group */ + if (hash_groups.size() != 1) return -1; + return hash_groups.begin()->first; + } + bitvec hash_group_bituse(int grp = -1) const; + std::vector hash_column(int col, int grp = -1) const; + int match_group() { + /* used by gateways and stateful to get the associated match group */ + if (groups.size() != 1 || groups.begin()->first.type != Group::EXACT) return -1; + return groups.begin()->first.index; + } + bitvec bytemask(); + /* functions for tcam ixbar that take into account funny byte/word group stuff */ + unsigned tcam_width(); + int tcam_byte_group(int n); + int tcam_word_group(int n); + std::map> &get_hash_tables() { return hash_tables; } + const std::map &get_hash_table(HashTable id); + const std::map &get_hash_table(unsigned id = 0) { + return get_hash_table(HashTable(HashTable::EXACT, id)); + } + + // which Group provides the input for a given HashTable + virtual Group hashtable_input_group(HashTable ht) const { + BUG_CHECK(ht.type == HashTable::EXACT, "not an exact hash table"); + return Group(Group::EXACT, ht.index / 2); + } + virtual Phv::Ref get_hashtable_bit(HashTable id, unsigned bit) const { + BUG_CHECK(id.type == HashTable::EXACT, "not an exact hash table"); + return get_group_bit(Group(Group::EXACT, id.index / 2), bit + 64 * (id.index & 0x1)); + } + Phv::Ref get_hashtable_bit(unsigned id, unsigned bit) const { + return get_hashtable_bit(HashTable(HashTable::EXACT, id), bit); + } + Phv::Ref get_group_bit(Group grp, unsigned bit) const { + if (groups.count(grp)) + for (auto &in : groups.at(grp)) + if (bit >= unsigned(in.lo) && bit <= unsigned(in.hi)) + return Phv::Ref(in.what, bit - in.lo, bit - in.lo); + return Phv::Ref(); + } + std::string get_field_name(int bit) { + for (auto &g : groups) { + for (auto &p : g.second) { + if (bit <= p.hi && bit >= p.lo) return p.what.name(); + } + } + return ""; + } + bool is_p4_param_bit_in_hash(std::string p4_param_name, unsigned bit) { + for (auto &g : groups) { + for (auto &p : g.second) { + std::string phv_field_name = p.what.name(); + auto phv_field_lobit = remove_name_tail_range(phv_field_name); + phv_field_lobit += p.what.fieldlobit(); + auto phv_field_hibit = phv_field_lobit + p.size() - 1; + if (p4_param_name == phv_field_name && bit <= phv_field_hibit && + bit >= phv_field_lobit) + return true; + } + } + return false; + } + unsigned get_seed_bit(unsigned group, unsigned bit) const { + if (hash_groups.count(group)) return ((hash_groups.at(group).seed >> bit) & 0x1); + return 0; + } + HashGrp *get_hash_group(unsigned group = -1) { return ::getref(hash_groups, group); } + HashGrp *get_hash_group_from_hash_table(int hash_table) { + if (hash_table < 0 || hash_table >= Target::EXACT_HASH_TABLES()) return nullptr; + for (auto &hg : hash_groups) { + if (hg.second.tables & (1U << hash_table)) return &hg.second; + } + return nullptr; + } + bool log_hashes(std::ofstream &out) const; + virtual unsigned exact_physical_ids() const { return -1; } + + class all_iter { + decltype(groups)::const_iterator outer, outer_end; + bool inner_valid; + std::vector::const_iterator inner; + void mk_inner_valid() { + if (!inner_valid) { + if (outer == outer_end) return; + inner = outer->second.begin(); + } + while (inner == outer->second.end()) { + if (++outer == outer_end) return; + inner = outer->second.begin(); + } + inner_valid = true; + } + struct iter_deref : public std::pair { + explicit iter_deref(const std::pair &a) + : std::pair(a) {} + iter_deref *operator->() { return this; } + }; + + public: + all_iter(decltype(groups)::const_iterator o, decltype(groups)::const_iterator oend) + : outer(o), outer_end(oend), inner_valid(false) { + mk_inner_valid(); + } + bool operator==(const all_iter &a) { + if (outer != a.outer) return false; + if (inner_valid != a.inner_valid) return false; + return inner_valid ? inner == a.inner : true; + } + all_iter &operator++() { + if (inner_valid && ++inner == outer->second.end()) { + ++outer; + inner_valid = false; + mk_inner_valid(); + } + return *this; + } + std::pair operator*() { + return std::pair(outer->first, *inner); + } + iter_deref operator->() { return iter_deref(**this); } + }; + all_iter begin() const { return all_iter(groups.begin(), groups.end()); } + all_iter end() const { return all_iter(groups.end(), groups.end()); } + + const Input *find(Phv::Slice sl, Group grp, Group *found = nullptr) const; + const Input *find_exact(Phv::Slice sl, int group) const { + return find(sl, Group(Group::EXACT, group)); + } + virtual int find_offset(const MatchSource *, Group grp, int offset) const; + int find_gateway_offset(const MatchSource *ms, int offset) const { + return find_offset(ms, Group(Group::GATEWAY, 0), offset); + } + int find_match_offset(const MatchSource *ms, int offset = -1) const { + return find_offset(ms, Group(Group::EXACT, -1), offset); + } + + std::vector find_all(Phv::Slice sl, Group grp) const; + virtual std::vector find_hash_inputs(Phv::Slice sl, HashTable ht) const; + virtual int global_bit_position_adjust(HashTable ht) const { + BUG_CHECK(ht.type == HashTable::EXACT, "not an exact hash table"); + return (ht.index / 2) * 128; + } + virtual bitvec global_column0_extract( + HashTable ht, const hash_column_t matrix[PARITY_GROUPS_DYN][HASH_MATRIX_WIDTH_DYN]) const { + BUG_CHECK(ht.type == HashTable::EXACT, "not an exact hash table"); + return bitvec(matrix[ht.index][0].column_value); + } + virtual void setup_match_key_cfg(const MatchSource *) {} // noop for tofino1/2 +}; + +inline std::ostream &operator<<(std::ostream &out, InputXbar::Group gr) { + switch (gr.type) { + case InputXbar::Group::EXACT: + out << "exact"; + break; + case InputXbar::Group::TERNARY: + out << "ternary"; + break; + case InputXbar::Group::BYTE: + out << "byte"; + break; + case InputXbar::Group::GATEWAY: + out << "gateway"; + break; + case InputXbar::Group::XCMP: + out << "xcmp"; + break; + default: + out << "(gr.type) << ">"; + } + return out << " ixbar group " << gr.index; +} + +inline std::ostream &operator<<(std::ostream &out, InputXbar::HashTable ht) { + switch (ht.type) { + case InputXbar::HashTable::EXACT: + out << "exact"; + break; + case InputXbar::HashTable::XCMP: + out << "xcmp"; + break; + default: + out << "(ht.type) << ">"; + } + return out << " hashtable " << ht.index; +} + +#endif /* BACKENDS_TOFINO_BF_ASM_INPUT_XBAR_H_ */ diff --git a/backends/tofino/bf-asm/instruction.cpp b/backends/tofino/bf-asm/instruction.cpp new file mode 100644 index 00000000000..415930d8d24 --- /dev/null +++ b/backends/tofino/bf-asm/instruction.cpp @@ -0,0 +1,1738 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "instruction.h" + +#include "action_bus.h" +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "depositfield.h" +#include "phv.h" +#include "power_ctl.h" + +namespace { +constexpr int RotationBits = 16; +} + +std::multimap + Instruction::Decode::opcode[Instruction::NUM_SETS]; + +Instruction::Decode::Decode(const char *name, int set, bool ts) : type_suffix(ts) { + targets = ~0U; + for (auto d : ValuesForKey(opcode[set], name)) { + BUG_CHECK(!(d->targets & 1)); + targets &= ~d->targets; + } + BUG_CHECK(targets > 1); + opcode[set].emplace(name, this); +} +Instruction::Decode::Decode(const char *name, target_t target, int set, bool ts) : type_suffix(ts) { + targets = 1 << target; + for (auto d : ValuesForKey(opcode[set], name)) { + if (d->targets & 1) { + d->targets &= ~targets; + BUG_CHECK(d->targets > 1); + } + } + opcode[set].emplace(name, this); +} +Instruction::Decode::Decode(const char *name, std::set target, int set, bool ts) + : type_suffix(ts), targets(0) { + for (auto t : target) targets |= 1 << t; + BUG_CHECK(targets > 1); + for (auto d : ValuesForKey(opcode[set], name)) { + if (d->targets & 1) { + d->targets &= ~targets; + BUG_CHECK(d->targets > 1); + } + } + opcode[set].emplace(name, this); +} + +Instruction *Instruction::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) { + for (auto d : ValuesForKey(Instruction::Decode::opcode[tbl->instruction_set()], op[0].s)) { + if ((d->targets >> Target::register_set()) & 1) { + auto inst = d->decode(tbl, act, op); + if (!inst) continue; + return inst; + } + } + if (auto p = strchr(op[0].s, '.')) { + std::string opname(op[0].s, p - op[0].s); + for (auto d : ValuesForKey(Instruction::Decode::opcode[tbl->instruction_set()], opname)) { + if (((d->targets >> options.target) & 1) && d->type_suffix) { + auto inst = d->decode(tbl, act, op); + if (!inst) continue; + return inst; + } + } + } + return 0; +} + +namespace VLIW { +static const int group_size[] = {32, 32, 32, 32, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16}; + +struct Operand : public IHasDbPrint { + /** A source operand to a VLIW instruction -- this can be a variety of things, so we + * have a pointer to an abstract base class and a number of derived concrete classes for + * the different kinds of operands. When we parse the operand, the type may be determined, + * or if it is just a name, we will have to wait to a later pass to resolve what the + * name refers to. At that point, the `Named' object created in parsing will be replaced + * with the actual operand type */ + static const int ACTIONBUS_OPERAND = 0x20; + struct Base { + int lineno; + explicit Base(int line) : lineno(line) {} + Base(const Base &a) : lineno(a.lineno) {} + virtual ~Base() {} + virtual Base *clone() = 0; + virtual Base *lookup(Base *&ref) { return this; } + virtual bool check() { return true; } + virtual int phvGroup() { return -1; } + virtual int bits(int group, int dest_size = -1) = 0; + virtual unsigned bitoffset(int group) const { return 0; } + virtual void dbprint(std::ostream &) const = 0; + virtual bool equiv(const Base *) const = 0; + virtual bool phvRead(std::function) { return false; } + /** pass1 called as part of pass1 processing of stage + * @param tbl table containing the action with the instruction with this operand + * @param group mau PHV group of the ALU (dest) for this instruction */ + virtual void pass1(Table *tbl, int group) {} + /** pass2 called as part of pass2 processing of stage + * @param group mau PHV group of the ALU (dest) for this instruction */ + virtual void pass2(int group) {} + } *op; + struct Const : Base { + int64_t value; + Const(int line, int64_t v) : Base(line), value(v) {} + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return value == a->value; + } else { + return false; + } + } + Const *clone() override { return new Const(*this); } + int32_t bits(int group, int dest_size = -1) override { + // assert(value <= 0xffffffffLL); + int32_t val = value; + if (val > 0 && ((val >> (group_size[group] - 1)) & 1)) + val |= UINT64_MAX << group_size[group]; + int minconst = Target::MINIMUM_INSTR_CONSTANT(); + + if (dest_size != -1) { // DepositField::encode() calling. + auto rotConst = + DepositField::discoverRotation(val, group_size[group], 8, minconst - 1); + if (rotConst.rotate) + return (rotConst.value + 24) | (rotConst.rotate << RotationBits); + } + + if (val >= minconst && val < 8) return val + 24; + error(lineno, "constant value %" PRId64 " out of range for immediate", value); + return -1; + } + void dbprint(std::ostream &out) const override { out << value; } + }; + struct Phv : Base { + ::Phv::Ref reg; + Phv(int line, gress_t g, int stage, const value_t &n) : Base(line), reg(g, stage, n) {} + Phv(int line, gress_t g, int stage, const std::string &n, int l, int h) + : Base(line), reg(g, stage, line, n, l, h) {} + explicit Phv(const ::Phv::Ref &r) : Base(r.lineno), reg(r) {} + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return reg == a->reg; + } else { + return false; + } + } + Phv *clone() override { return new Phv(*this); } + bool check() override { + if (!reg.check()) return false; + if (reg->reg.mau_id() < 0) { + error(reg.lineno, "%s not accessable in mau", reg->reg.name); + return false; + } + return true; + } + int phvGroup() override { return reg->reg.mau_id() / ::Phv::mau_groupsize(); } + int bits(int group, int dest_size = -1) override { + if (group != phvGroup()) { + error(lineno, "registers in an instruction must all be in the same phv group"); + return -1; + } + return reg->reg.mau_id() % ::Phv::mau_groupsize(); + } + unsigned bitoffset(int group) const override { return reg->lo; } + void pass1(Table *tbl, int) override { + tbl->stage->action_use[tbl->gress][reg->reg.uid] = true; + } + void dbprint(std::ostream &out) const override { out << reg; } + bool phvRead(std::function fn) override { + fn(*reg); + return true; + } + }; + struct Action : Base { + /* source referring to either an action data or immediate field OR an attached table + * output. All of these are accessed via the action data bus */ + std::string name; + std::string p4name; + TableOutputModifier mod = TableOutputModifier::NONE; + Table *table; + Table::Format::Field *field; + int lo, hi; + + Action(int line, const std::string &n, Table *tbl, Table::Format::Field *f, unsigned l, + unsigned h) + : Base(line), name(n), table(tbl), field(f), lo(l), hi(h) {} + Action(int line, const std::string &n, TableOutputModifier mod, Table *tbl, unsigned l, + unsigned h) + : Base(line), name(n), mod(mod), table(tbl), field(nullptr), lo(l), hi(h) {} + Action(int line, const std::string &n, Table *tbl, Table::Format::Field *f, unsigned l, + unsigned h, const std::string &m) + : Base(line), name(n), p4name(m), table(tbl), field(f), lo(l), hi(h) {} + Action(int line, const std::string &n, TableOutputModifier mod, Table *tbl, unsigned l, + unsigned h, const std::string &m) + : Base(line), name(n), p4name(m), mod(mod), table(tbl), field(nullptr), lo(l), hi(h) {} + bool equiv(const Base *a_) const override { + auto *a = dynamic_cast(a_); + if (!a || lo != a->lo || hi != a->hi) return false; + if (name == a->name && table == a->table && field == a->field && mod == a->mod) + return true; + if (field != a->field && (!field || !a->field)) return false; + int b1 = field ? table->find_on_actionbus(field, lo, hi, 0) + : table->find_on_actionbus(name, mod, lo, hi, 0); + int b2 = a->field ? a->table->find_on_actionbus(a->field, lo, hi, 0) + : a->table->find_on_actionbus(a->name, mod, lo, hi, 0); + return b1 == b2 && b1 >= 0; + } + Action *clone() override { return new Action(*this); } + int bits(int group, int dest_size = -1) override { + int size = group_size[group] / 8U; + BUG_CHECK(lo >= 0 && hi >= 0); + unsigned lo = this->lo, hi = this->hi; + if (dest_size > 0) { + // override size based on destination size for deposit-field + hi = lo + dest_size - 1; + unsigned mask = group_size[group] - 1; // group size is power of 2 (8, 16, or 32) + if ((hi | mask) != (lo | mask)) { + // crosses slot boundary, so is a wrap-around rotated source -- need all of it + lo &= ~mask; + hi = lo | mask; + } + } + int byte = field ? table->find_on_actionbus(field, lo, hi, size) + : table->find_on_actionbus(name, mod, lo, hi, size); + if (byte < 0) { + if (this->lo > 0 || (field && this->hi + 1 < int(field->size))) + error(lineno, "%s(%d..%d) is not on the action bus", name.c_str(), lo, hi); + else + error(lineno, "%s is not on the action bus", name.c_str()); + return -1; + } + int byte_value = byte; + if (size == 2) byte -= 32; + if (byte < 0 || byte > 32 * size) + error(lineno, "action bus entry %d(%s) out of range for %d-bit access", byte_value, + name.c_str(), size * 8); + // else if (byte % size != 0) + // error(lineno, "action bus entry %d(%s) misaligned for %d-bit access", + // byte_value, name.c_str(), size*8); + else + return ACTIONBUS_OPERAND + byte / size; + return -1; + } + void pass1(Table *tbl, int group) override { + if (field) field->flags |= Table::Format::Field::USED_IMMED; + if (lo >= 0 && hi >= 0 && lo / group_size[group] != hi / group_size[group]) { + error(lineno, + "action bus slice (%d..%d) can't fit in a single slot for %d bit " + "access", + lo, hi, group_size[group]); + // chop it down to be in range (avoid error cascade) + hi = lo | (group_size[group] - 1); + } + } + void pass2(int group) override { + int bits = group_size[group]; + unsigned bytes = bits / 8U; + if (lo < 0) lo = 0; + if (hi < 0) hi = lo + bits - 1; + if (hi > lo + bits - 1) { + warning(lineno, "%s(%d..%d) larger than %d bit access", name.c_str(), lo, hi, bits); + hi = lo + bits - 1; + } + if ((lo ^ hi) & ~(bits - 1)) + error(lineno, "%s(%d..%d) can't be accessed by %d bit PHV", name.c_str(), lo, hi, + bits); + if (field && table->find_on_actionbus(field, lo, hi, bytes) < 0) { + int immed_offset = 0; + if (table->format && table->format->immed) + immed_offset = table->format->immed->bit(0); + int l = field->bit(lo) - immed_offset, h = field->bit(hi) - immed_offset; + if (l % bits != 0 && l / bits != h / bits) + error(lineno, "%s misaligned for action bus", name.c_str()); + table->need_on_actionbus(field, lo, hi, bytes); + } else if (!field && table->find_on_actionbus(name, mod, lo, hi, bytes) < 0) { + if (auto *tbl = ::get(Table::all, name)) + table->need_on_actionbus(tbl, mod, lo, hi, bytes); + else + error(lineno, "Can't find any operand named %s", name.c_str()); + } + } + unsigned bitoffset(int group) const override { + int size = group_size[group] / 8U; + int byte = field ? table->find_on_actionbus(field, lo, hi, size) + : table->find_on_actionbus(name, lo, hi, size); + return 8 * (byte % size) + lo % 8; + } + void dbprint(std::ostream &out) const override { + out << name << mod << '(' << lo << ".." << hi << ')'; + if (field) + out << '[' << field->bits[0].lo << ':' << field->size << ", " << field->group + << ']'; + } + }; + struct RawAction : Base { + int index; + unsigned offset; + + RawAction(int line, int idx, unsigned off) : Base(line), index(idx), offset(off) {} + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return index == a->index && offset == a->offset; + } else { + return false; + } + } + RawAction *clone() override { return new RawAction(*this); } + int bits(int group, int dest_size = -1) override { return ACTIONBUS_OPERAND + index; } + unsigned bitoffset(int group) const override { return offset; } + void dbprint(std::ostream &out) const override { out << 'A' << index; } + }; + struct HashDist : Base { + Table *table; + std::vector units; + int lo = -1, hi = -1; + + HashDist(int line, Table *t) : Base(line), table(t) {} + HashDist(int line, Table *t, int unit) : Base(line), table(t) { units.push_back(unit); } + unsigned bitoffset(int group) const override { return lo >= 0 ? lo : 0; } + static HashDist *parse(Table *tbl, const VECTOR(value_t) & v) { + if (v.size < 2 || v[0] != "hash_dist") return nullptr; + auto *rv = new HashDist(v[0].lineno, tbl); + for (int i = 1; i < v.size; ++i) { + if (v[i].type == tRANGE && rv->lo == -1) { + rv->lo = v[i].range.lo; + rv->hi = v[i].range.hi; + } else if (CHECKTYPE(v[i], tINT)) { + rv->units.push_back(v[i].i); + } else { + delete rv; + return nullptr; + } + } + return rv; + } + + HashDistribution *find_hash_dist(int unit) const { + if (auto rv = table->find_hash_dist(unit)) return rv; + for (auto *mtab : table->get_match_tables()) + if (auto rv = mtab->find_hash_dist(unit)) return rv; + return nullptr; + } + bool equiv(const Base *a_) const override { + auto *a = dynamic_cast(a_); + if (!a || units != a->units || lo != a->lo || hi != a->hi) return false; + if (table == a->table) return true; + int elo = this->lo < 0 ? 0 : lo; + int ehi = this->hi < 0 ? 15 : hi; + for (auto unit : units) { + int b1 = table->find_on_actionbus(find_hash_dist(unit), elo, ehi, 0); + int b2 = a->table->find_on_actionbus(a->find_hash_dist(unit), elo, ehi, 0); + if (b1 != b2 || b1 < 0) return false; + } + return true; + } + HashDist *clone() override { return new HashDist(*this); } + void pass2(int group) override { + if (units.size() > 2) { + error(lineno, "Can't use more than 2 hash_dist units together in an action"); + return; + } + int size = group_size[group] / 8U; + if (lo < 0) lo = 0; + if (hi < 0) hi = 8 * size - 1; + if ((lo ^ hi) & ~(8 * size - 1)) + error(lineno, "hash dist slice(%d..%d) can't be accessed by %d bit PHV", lo, hi, + 8 * size); + if (units.size() == 2) { + if (size != 4) + error(lineno, "Can't combine hash_dist units in %d bit operation", size * 8); + auto xbar_use = HashDistribution::IMMEDIATE_LOW; + for (auto u : units) { + if (auto hd = find_hash_dist(u)) + hd->xbar_use |= xbar_use; + else + error(lineno, "No hash dist %d in table %s", u, table->name()); + xbar_use = HashDistribution::IMMEDIATE_HIGH; + } + } else if (auto hd = find_hash_dist(units.at(0))) { + if (hd->xbar_use & HashDistribution::IMMEDIATE_HIGH) { + if (size == 4) { + lo += 16; + hi += 16; + } + } else { + hd->xbar_use |= HashDistribution::IMMEDIATE_LOW; + } + } else { + error(lineno, "No hash dist %d in table %s", units.at(0), table->name()); + } + int lo = this->lo; + for (auto u : units) { + if (auto hd = find_hash_dist(u)) { + if (table->find_on_actionbus(hd, lo, hi, size) < 0) + table->need_on_actionbus(hd, lo, hi, size); + lo += 16; + } + } + } + int bits(int group, int dest_size = -1) override { + int size = group_size[group] / 8U; + auto hd = find_hash_dist(units.at(0)); + if (!hd) error(lineno, "could not find hash dist"); + int byte = table->find_on_actionbus(hd, lo, hi, size); + if (byte < 0) { + error(lineno, "hash dist %d is not on the action bus", (hd ? hd->id : -1)); + return -1; + } + if (units.size() == 2) { + auto hd1 = find_hash_dist(units.at(1)); + if (!hd1) error(lineno, "could not find hash dist"); + if (table->find_on_actionbus(ActionBusSource(hd, hd1), lo + 16, hi, size) < 0) + error(lineno, "hash dists %d and %d not contiguous on the action bus", + (hd ? hd->id : -1), (hd1 ? hd1->id : -1)); + } + if (size == 2) byte -= 32; + if (byte >= 0 && byte < 32 * size) return ACTIONBUS_OPERAND + byte / size; + error(lineno, "action bus entry %d(hash_dist %d) out of range for %d-bit access", + size == 2 ? byte + 32 : byte, hd->id, size * 8); + return -1; + } + void dbprint(std::ostream &out) const override { + out << "hash_dist("; + const char *sep = ""; + for (auto u : units) { + out << sep << u; + sep = ", "; + } + out << ")"; + } + }; + struct RandomGen : Base { + Table *table; + RandomNumberGen rng; + int lo = 0, hi = -1; + RandomGen(Table *t, const VECTOR(value_t) & v) : Base(v[0].lineno), table(t), rng(0) { + if (v.size > 1 && CHECKTYPE(v[1], tINT)) rng.unit = v[1].i; + if (rng.unit < 0 || rng.unit > 1) error(v[0].lineno, "invalid random number generator"); + if (v.size > 2 && CHECKTYPE(v[2], tRANGE)) { + lo = v[2].range.lo; + hi = v[2].range.hi; + if (lo < 0 || hi > 31 || hi < lo) + error(v[2].lineno, "invalid random number generator slice"); + } + } + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return rng == a->rng && lo == a->lo && hi == a->hi; + } else { + return false; + } + } + RandomGen *clone() override { return new RandomGen(*this); } + void pass2(int group) override { + unsigned size = group_size[group]; + if (hi < 0) hi = lo + 8 * size - 1; + if ((lo ^ hi) & ~(8 * size - 1)) + error(lineno, "invalid slice(%d..%d) of rng %d for use with %d bit PHV", lo, hi, + rng.unit, size); + if (table->find_on_actionbus(rng, lo, hi, size / 8U)) + table->need_on_actionbus(rng, lo, hi, size / 8U); + } + int bits(int group, int dest_size = -1) override { + int size = group_size[group] / 8U; + int byte = table->find_on_actionbus(rng, lo, hi, size); + if (byte < 0) { + error(lineno, "rng %d is not on the action bus", rng.unit); + return -1; + } + if (size == 2) byte -= 32; + if (byte >= 0 && byte < 32 * size) return ACTIONBUS_OPERAND + byte / size; + error(lineno, "action bus entry %d(rng %d) out of range for %d-bit access", + size == 2 ? byte + 32 : byte, rng.unit, size * 8); + return -1; + } + unsigned bitoffset(int group) const override { return lo; } + void dbprint(std::ostream &out) const override { + out << "rng " << rng.unit << '(' << lo << ".." << hi << ')'; + } + }; + struct Named : Base { + std::string name; + std::string p4name; + TableOutputModifier mod = TableOutputModifier::NONE; + int lo, hi; + Table *tbl; + std::string action; + + Named(int line, const std::string &n, int l, int h, Table *t, const std::string &act) + : Base(line), name(n), lo(l), hi(h), tbl(t), action(act) {} + Named(int line, const std::string &n, TableOutputModifier m, int l, int h, Table *t, + const std::string &act) + : Base(line), name(n), mod(m), lo(l), hi(h), tbl(t), action(act) {} + Named(int line, const std::string &n, int l, int h, Table *t, const std::string &act, + std::string &m) + : Base(line), name(n), p4name(m), lo(l), hi(h), tbl(t), action(act) {} + Named(int line, const std::string &n, TableOutputModifier mod, int l, int h, Table *t, + const std::string &act, std::string &m) + : Base(line), name(n), p4name(m), mod(mod), lo(l), hi(h), tbl(t), action(act) {} + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return name == a->name && lo == a->lo && hi == a->hi && tbl == a->tbl && + action == a->action; + } else { + return false; + } + } + Base *lookup(Base *&ref) override; + Named *clone() override { return new Named(*this); } + bool check() override { + BUG(); + return true; + } + int phvGroup() override { + BUG(); + return -1; + } + int bits(int group, int dest_size = -1) override { + BUG(); + return 0; + } + unsigned bitoffset(int group) const override { + BUG(); + return 0; + } + void pass1(Table *, int) override { BUG(); } + void dbprint(std::ostream &out) const override { + out << name; + if (lo >= 0) { + out << '(' << lo; + if (hi >= 0 && hi != lo) out << ".. " << hi; + out << ')'; + } + out << '[' << tbl->name() << ':' << action << ']'; + } + }; + Operand() : op(0) {} + Operand(const Operand &a) : op(a.op ? a.op->clone() : 0) {} + Operand(Operand &&a) : op(a.op) { a.op = 0; } + Operand &operator=(const Operand &a) { + if (&a != this) { + delete op; + op = a.op ? a.op->clone() : 0; + } + return *this; + } + Operand &operator=(Operand &&a) { + if (&a != this) { + delete op; + op = a.op; + a.op = 0; + } + return *this; + } + ~Operand() { delete op; } + Operand(Table *tbl, const Table::Actions::Action *act, const value_t &v); + Operand(gress_t gress, int stage, const value_t &v) : op(new Phv(v.lineno, gress, stage, v)) {} + explicit Operand(const ::Phv::Ref &r) : op(new Phv(r)) {} + bool valid() const { return op != 0; } + bool operator==(Operand &a) { + return op == a.op || (op && a.op && op->lookup(op)->equiv(a.op->lookup(a.op))); + } + unsigned bitoffset(int group) { return op->lookup(op)->bitoffset(group); } + bool check() { return op && op->lookup(op) ? op->check() : false; } + int phvGroup() { return op->lookup(op)->phvGroup(); } + bool phvRead(std::function fn) { + return op->lookup(op)->phvRead(fn); + } + int bits(int group, int dest_size = -1) { return op->lookup(op)->bits(group, dest_size); } + void dbprint(std::ostream &out) const { op->dbprint(out); } + Base *operator->() { return op->lookup(op); } + template + T *to() { + return dynamic_cast(op->lookup(op)); + } +}; + +static void parse_slice(const VECTOR(value_t) & vec, int idx, int &lo, int &hi) { + if (PCHECKTYPE2(vec.size == idx + 1, vec[idx], tINT, tRANGE)) { + if (vec[idx].type == tINT) { + lo = hi = vec[idx].i; + } else { + lo = vec[idx].range.lo; + hi = vec[idx].range.hi; + } + } +} + +Operand::Operand(Table *tbl, const Table::Actions::Action *act, const value_t &v) : op(0) { + if (v.type == tINT) { + op = new Const(v.lineno, v.i); + } else if (CHECKTYPE2(v, tSTR, tCMD)) { + std::string name = v.type == tSTR ? v.s : v[0].s; + std::string p4name = name; + TableOutputModifier mod = TableOutputModifier::NONE; + int lo = -1, hi = -1; + if (v.type == tCMD) { + if (v == "hash_dist" && (op = HashDist::parse(tbl, v.vec))) return; + if (v == "rng" && (op = new RandomGen(tbl, v.vec))) return; + if (v.vec.size > 1 && (v[1] == "color" || v[1] == "address")) { + if (v[1] == "color") mod = TableOutputModifier::Color; + if (v[1] == "address") mod = TableOutputModifier::Address; + if (v[1].type == tCMD) + parse_slice(v[1].vec, 1, lo, hi); + else if (v.vec.size > 2) + parse_slice(v.vec, 2, lo, hi); + } else { + parse_slice(v.vec, 1, lo, hi); + } + } + name = act->alias_lookup(v.lineno, name, lo, hi); + if (name == "hash_dist" && lo == hi) { + auto hd = new HashDist(v.lineno, tbl, lo); + if (v.type == tCMD && v[1].type == tRANGE) { + hd->lo = v[1].range.lo; + hd->hi = v[1].range.hi; + } + op = hd; + return; + } + op = new Named(v.lineno, name, mod, lo, hi, tbl, act->name, p4name); + } +} + +auto Operand::Named::lookup(Base *&ref) -> Base * { + int slot, len = -1; + if (tbl->action) tbl = tbl->action; + int lo = this->lo >= 0 ? this->lo : 0; + if (auto *field = tbl->lookup_field(name, action)) { + if (!options.match_compiler) { + /* FIXME -- The glass compiler generates refs past the end of action table fields + * like these, and just accesses whatever bits happen to be there. So we + * supress these error checks for compatibility (ex: tests/action_bus1.p4) */ + if ((unsigned)lo >= field->size) { + error(lineno, "Bit %d out of range for field %s", lo, name.c_str()); + ref = 0; + } else if (hi >= 0 && (unsigned)hi >= field->size) { + error(lineno, "Bit %d out of range for field %s", hi, name.c_str()); + ref = 0; + } + } + if (ref) { + ref = new Action(lineno, name, tbl, field, lo, hi >= 0 ? hi : field->size - 1, p4name); + } + } else if (tbl->find_on_actionbus(name, mod, lo, hi >= 0 ? hi : 7, 0, &len) >= 0) { + ref = new Action(lineno, name, mod, tbl, lo, hi >= 0 ? hi : len - 1, p4name); + } else if (::Phv::get(tbl->gress, tbl->stage->stageno, name)) { + ref = new Phv(lineno, tbl->gress, tbl->stage->stageno, name, lo, hi); + } else if (sscanf(name.c_str(), "A%d%n", &slot, &len) >= 1 && + len == static_cast(name.size()) && slot >= 0 && slot < 32) { + ref = new RawAction(lineno, slot, lo); + } else if (name == "hash_dist" && (lo == hi || hi < 0)) { + ref = new HashDist(lineno, tbl, lo); + } else if (Table::all->count(name)) { + ref = new Action(lineno, name, mod, tbl, lo, hi, p4name); + } else { + ref = new Phv(lineno, tbl->gress, tbl->stage->stageno, name, this->lo, hi); + } + if (ref != this) delete this; + return ref; +} + +struct VLIWInstruction : Instruction { + explicit VLIWInstruction(int l) : Instruction(l) {} + virtual int encode() = 0; + template + void write_regs_2(REGS ®s, Table *tbl, Table::Actions::Action *act); + FOR_ALL_REGISTER_SETS(DECLARE_FORWARD_VIRTUAL_INSTRUCTION_WRITE_REGS) +}; + +// target specific template specializations +#include "jbay/instruction.cpp" // NOLINT(build/include) +#include "tofino/instruction.cpp" // NOLINT(build/include) + +struct AluOP : VLIWInstruction { + enum special_flags { + Commutative = 1, + IgnoreSrc1 = 2, + IgnoreSrc2 = 4, + IgnoreSrcs = 6, + CanSliceWithConst = 8 + }; + const struct Decode : Instruction::Decode { + std::string name; + unsigned opcode; + const Decode *swap_args; + int flags = 0; + Decode(const char *n, unsigned opc, int flgs = 0, const char *alias_name = 0) + : Instruction::Decode(n), + name(n), + opcode(opc), + swap_args(flgs & Commutative ? this : 0), + flags(flgs) { + if (alias_name) alias(alias_name); + } + Decode(const char *n, target_t targ, unsigned opc, int flgs = 0) + : Instruction::Decode(n, targ), + name(n), + opcode(opc), + swap_args(flgs & Commutative ? this : 0), + flags(flgs) {} + Decode(const char *n, std::set targ, unsigned opc, int flgs = 0, + const char *alias_name = 0) + : Instruction::Decode(n, targ), + name(n), + opcode(opc), + swap_args(flgs & Commutative ? this : 0), + flags(flgs) { + if (alias_name) alias(alias_name); + } + Decode(const char *n, unsigned opc, int flgs, Decode *sw, const char *alias_name = 0) + : Instruction::Decode(n), name(n), opcode(opc), swap_args(sw), flags(flgs) { + if (sw && !sw->swap_args) sw->swap_args = this; + if (alias_name) alias(alias_name); + } + Decode(const char *n, unsigned opc, Decode *sw, const char *alias_name = 0) + : Instruction::Decode(n), name(n), opcode(opc), swap_args(sw) { + if (sw && !sw->swap_args) sw->swap_args = this; + if (alias_name) alias(alias_name); + } + Decode(const char *n, target_t targ, unsigned opc, Decode *sw, const char *alias_name = 0) + : Instruction::Decode(n, targ), name(n), opcode(opc), swap_args(sw) { + if (sw && !sw->swap_args) sw->swap_args = this; + if (alias_name) alias(alias_name); + } + Decode(const char *n, std::set targ, unsigned opc, Decode *sw, + const char *alias_name = 0) + : Instruction::Decode(n, targ), name(n), opcode(opc), swap_args(sw) { + if (sw && !sw->swap_args) sw->swap_args = this; + if (alias_name) alias(alias_name); + } + Decode(const char *n, std::set targ, unsigned opc, int flgs, Decode *sw, + const char *alias_name = 0) + : Instruction::Decode(n, targ), name(n), opcode(opc), swap_args(sw), flags(flgs) { + if (sw && !sw->swap_args) sw->swap_args = this; + if (alias_name) alias(alias_name); + } + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + } *opc; + Phv::Ref dest; + Operand src1, src2; + bool ignoreSrc1 = false, ignoreSrc2 = false; + AluOP(const Decode *op, Table *tbl, const Table::Actions::Action *act, const value_t &d, + const value_t &s1, const value_t &s2) + : VLIWInstruction(d.lineno), + opc(op), + dest(tbl->gress, tbl->stage->stageno + 1, d), + src1(tbl, act, s1), + src2(tbl, act, s2) {} + std::string name() override { return opc->name; } + Instruction *pass1(Table *tbl, Table::Actions::Action *) override; + void pass2(Table *tbl, Table::Actions::Action *) override { + if (!ignoreSrc1) src1->pass2(slot / Phv::mau_groupsize()); + if (!ignoreSrc2) src2->pass2(slot / Phv::mau_groupsize()); + } + int encode() override; + bool equiv(Instruction *a_) override; + bool phvRead(std::function fn) override { + bool rv = false; + if (!ignoreSrc1) rv |= src1.phvRead(fn); + if (!ignoreSrc2) rv |= src2.phvRead(fn); + return rv; + } + void dbprint(std::ostream &out) const override { + out << "INSTR: " << opc->name << ' ' << dest << ", " << src1 << ", " << src2; + } +}; + +struct AluOP3Src : AluOP { + struct Decode : AluOP::Decode { + Decode(const char *n, unsigned opc) : AluOP::Decode(n, opc) {} + Decode(const char *n, target_t t, unsigned opc) : AluOP::Decode(n, t, opc) {} + Decode(const char *n, std::set t, unsigned opc) : AluOP::Decode(n, t, opc) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + }; + Operand src3; + AluOP3Src(const Decode *op, Table *tbl, const Table::Actions::Action *act, const value_t &d, + const value_t &s1, const value_t &s2, const value_t &s3) + : AluOP(op, tbl, act, d, s1, s2), src3(tbl, act, s3) {} + Instruction *pass1(Table *tbl, Table::Actions::Action *); + void pass2(Table *tbl, Table::Actions::Action *); +}; + +Instruction *AluOP::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + AluOP *rv; + if (op.size == 4) { + rv = new AluOP(this, tbl, act, op.data[1], op.data[2], op.data[3]); + } else if (op.size == 3) { + if (!(flags & IgnoreSrc1) && (flags & IgnoreSrc2)) { + rv = new AluOP(this, tbl, act, op.data[1], op.data[2], op.data[2]); + rv->ignoreSrc2 = true; + } else { + rv = new AluOP(this, tbl, act, op.data[1], op.data[1], op.data[2]); + rv->ignoreSrc1 = (flags & IgnoreSrc1) != 0; + } + } else if (op.size == 3 && (flags & IgnoreSrc1) && (flags & IgnoreSrc2)) { + rv = new AluOP(this, tbl, act, op.data[1], op.data[1], op.data[1]); + rv->ignoreSrc1 = rv->ignoreSrc2 = true; + } else { + error(op[0].lineno, "%s requires 2 or 3 operands", op[0].s); + return 0; + } + if (!rv->src1.valid()) + error(op[2].lineno, "invalid src1"); + else if (!rv->src2.valid()) + error(op[3].lineno, "invalid src2"); + else + return rv; + delete rv; + return 0; +} +Instruction *AluOP3Src::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + if (op.size != 5) { + if (op.size < 3 || op.size > 5) { + error(op[0].lineno, "%s requires 2, 3 or 4 operands", op[0].s); + return 0; + } else { + } + return AluOP::Decode::decode(tbl, act, op); + } + auto rv = new AluOP3Src(this, tbl, act, op.data[1], op.data[2], op.data[3], op.data[4]); + if (!rv->src1.valid()) + error(op[2].lineno, "invalid src1"); + else if (!rv->src2.valid()) + error(op[3].lineno, "invalid src2"); + else if (!rv->src3.valid()) + error(op[3].lineno, "invalid src3"); + else + return rv; + delete rv; + return 0; +} + +static bool will_pad_with_zeros(const Phv::Slice &dest, Table::Actions::Action *, + Operand::Action *ad) { + if (ad->lo != dest.lo || ad->hi != dest.hi) { + // need to line up with the destination, if it doesn't reject + // FIXME could we rotate the data in the field if everything else was ok? The + // compiler should have done that already + return false; + } + if (ad->field->bits.size() != 1) { + // punt for split fields. Not sure this can ever happen + return false; + } + // If Operand::Action is for immediate, check if the immediate is at the top end of the + // immediate overhead. The immediate extract mask in these cases will set the additional bits to + // zero (zero extend). Hence we dont need to check if the size is the same as destination + // register size. + // This check will be false for cases when Operand::Action is not immediate as immed_size will + // be zero + if (ad->field->immed_bit(ad->field->size) == ad->field->fmt->immed_size) return true; + if (ad->field->size < dest.reg.size) { + // field not big enough + return false; + } + // FIXME -- should check that the action has no other uses of this AD operand that uses + // other bits? Not trivial to do + return true; +} + +Instruction *AluOP::pass1(Table *tbl, Table::Actions::Action *act) { + if (!dest.check()) return this; + if (!ignoreSrc1 && !src1.check()) return this; + if (!ignoreSrc2 && !src2.check()) return this; + if (dest->reg.mau_id() < 0) { + error(dest.lineno, "%s not accessable in mau", dest->reg.name); + return this; + } + if (dest->reg.type != Phv::Register::NORMAL) { + error(dest.lineno, "%s dest can't be dark or mocha phv", opc->name.c_str()); + return this; + } + slot = dest->reg.mau_id(); + tbl->stage->action_set[tbl->gress][dest->reg.uid] = true; + if (!ignoreSrc1) src1->pass1(tbl, slot / Phv::mau_groupsize()); + if (!ignoreSrc2) src2->pass1(tbl, slot / Phv::mau_groupsize()); + if (!ignoreSrc2 && src2.phvGroup() < 0 && opc->swap_args) { + std::swap(src1, src2); + std::swap(ignoreSrc1, ignoreSrc2); + opc = opc->swap_args; + } + if (!ignoreSrc2 && src2.phvGroup() < 0) error(lineno, "src2 must be phv register"); + if (dest->lo || dest->hi != dest->reg.size - 1) { + if ((opc->flags & CanSliceWithConst) && Operand(dest) == src2) { + // special case -- bitwise op wih dest==src2 and src1 is a constant or action + // data that is padded with 0s can just operate on the whole container to get + // the right result + auto *k = src1.to(); + if (k && k->value >= 0 && (k->value << dest->lo) < 8) { + k->value <<= dest->lo; + // FIXME -- should rewrite dest and src2 to refer to the whole container for + // strict correctness? We don't actually look at the slice after this so maybe ok + return this; + } + auto *ad = src1.to(); + if (ad && will_pad_with_zeros(*dest, act, ad)) return this; + } + error(lineno, "ALU ops cannot operate on slices"); + } + return this; +} +Instruction *AluOP3Src::pass1(Table *tbl, Table::Actions::Action *act) { + AluOP::pass1(tbl, act); + src3->pass1(tbl, slot / Phv::mau_groupsize()); + if (!src3.to()) error(lineno, "src3 must be on the action bus"); + return this; +} +void AluOP3Src::pass2(Table *tbl, Table::Actions::Action *act) { + AluOP::pass2(tbl, act); + src3->pass2(slot / Phv::mau_groupsize()); + if (auto s1 = src1.to()) { + auto s3 = src3.to(); + if (s1->bits(slot / Phv::mau_groupsize()) + 1 != s3->bits(slot / Phv::mau_groupsize())) + error(lineno, "src1 and src3 must be adjacent on the action bus"); + } else { + error(lineno, "src1 must be on the action bus"); + } +} + +int AluOP::encode() { + int rv = (opc->opcode << 6); + if (!ignoreSrc1) rv |= src1.bits(slot / Phv::mau_groupsize()); + rv <<= Target::INSTR_SRC2_BITS(); + if (!ignoreSrc2) rv |= src2.bits(slot / Phv::mau_groupsize()); + return rv; +} +bool AluOP::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) { + return opc == a->opc && dest == a->dest && src1 == a->src1 && src2 == a->src2 && + ignoreSrc1 == a->ignoreSrc1 && ignoreSrc2 == a->ignoreSrc2; + } else { + return false; + } +} + +struct LoadConst : VLIWInstruction { + struct Decode : Instruction::Decode { + Decode(const char *n, std::set targ) : Instruction::Decode(n, targ) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + }; + Phv::Ref dest; + int src; + LoadConst(Table *tbl, const Table::Actions::Action *act, const value_t &d, int s) + : VLIWInstruction(d.lineno), dest(tbl->gress, tbl->stage->stageno + 1, d), src(s) {} + LoadConst(int line, Phv::Ref &d, int v) : VLIWInstruction(line), dest(d), src(v) {} + std::string name() override { return ""; } + Instruction *pass1(Table *tbl, Table::Actions::Action *) override; + void pass2(Table *, Table::Actions::Action *) override {} + int encode() override { return Target::encodeConst(src); } + bool equiv(Instruction *a_) override; + bool phvRead(std::function fn) override { return false; } + void dbprint(std::ostream &out) const override { out << "INSTR: set " << dest << ", " << src; } +}; + +Instruction *LoadConst::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + if (op.size != 3) { + error(op[0].lineno, "%s requires 2 operands", op[0].s); + return 0; + } + if (!CHECKTYPE(op[2], tINT)) return 0; + return new LoadConst(tbl, act, op[1], op[2].i); +} + +Instruction *LoadConst::pass1(Table *tbl, Table::Actions::Action *) { + if (!dest.check()) return this; + if (dest->reg.mau_id() < 0) { + error(dest.lineno, "%s not accessable in mau", dest->reg.name); + return this; + } + if (dest->reg.type != Phv::Register::NORMAL) { + error(dest.lineno, "load-const dest can't be dark or mocha phv"); + return this; + } + if (dest->lo || dest->hi != dest->reg.size - 1) { + error(lineno, "load-const cannot operate on slices"); + return this; + } + slot = dest->reg.mau_id(); + int size = Phv::reg(slot)->size; + BUG_CHECK(size > 0, "bad register size"); + int minval = ~0u << (size - 1); + if (size > 21) { + size = 21; + minval = 0; + } + // For an 8 or 16 bit PHV, the constant to load is 8 (or 16) bits, so + // there's no need for sign extension to deal with a negative value. For + // 32 bit PHVs, the constant is 21 bits and zero-extended to 32 bits, so + // must be positive. + if (src >= (1 << size) || src < minval) error(lineno, "Constant value %d out of range", src); + src &= (1 << size) - 1; + tbl->stage->action_set[tbl->gress][dest->reg.uid] = true; + return this; +} + +bool LoadConst::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) { + return dest == a->dest && src == a->src; + } else { + return false; + } +} + +struct CondMoveMux : VLIWInstruction { + const struct Decode : Instruction::Decode { + std::string name; + unsigned opcode, cond_size; + bool src2opt; + Decode(const char *name, unsigned opc, unsigned csize, bool s2opt, const char *alias_name) + : Instruction::Decode(name), name(name), opcode(opc), cond_size(csize), src2opt(s2opt) { + alias(alias_name); + } + Decode(const char *name, target_t targ, unsigned opc, unsigned csize, bool s2opt, + const char *alias_name) + : Instruction::Decode(name, targ), + name(name), + opcode(opc), + cond_size(csize), + src2opt(s2opt) { + alias(alias_name); + } + Decode(const char *name, std::set targ, unsigned opc, unsigned csize, bool s2opt, + const char *alias_name) + : Instruction::Decode(name, targ), + name(name), + opcode(opc), + cond_size(csize), + src2opt(s2opt) { + alias(alias_name); + } + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + } *opc; + Phv::Ref dest; + Operand src1, src2; + unsigned cond = 0; + CondMoveMux(Table *tbl, const Decode *op, const Table::Actions::Action *act, const value_t &d, + const value_t &s) + : VLIWInstruction(d.lineno), + opc(op), + dest(tbl->gress, tbl->stage->stageno + 1, d), + src1(tbl, act, s), + src2(tbl->gress, tbl->stage->stageno, d) {} + CondMoveMux(Table *tbl, const Decode *op, const Table::Actions::Action *act, const value_t &d, + const value_t &s1, const value_t &s2) + : VLIWInstruction(d.lineno), + opc(op), + dest(tbl->gress, tbl->stage->stageno + 1, d), + src1(tbl, act, s1), + src2(tbl, act, s2) {} + std::string name() { return opc->name; } + Instruction *pass1(Table *tbl, Table::Actions::Action *); + void pass2(Table *tbl, Table::Actions::Action *) { + src1->pass2(slot / Phv::mau_groupsize()); + src2->pass2(slot / Phv::mau_groupsize()); + } + int encode(); + bool equiv(Instruction *a_); + bool phvRead(std::function fn) { + bool rv = false; + if (cond & 1) { + fn(*dest); + rv = true; + } + rv |= src1.phvRead(fn); + if (!opc->src2opt || (cond & 4)) rv |= src2.phvRead(fn); + return rv; + } + void dbprint(std::ostream &out) const { + out << "INSTR: cmov " << dest << ", " << src1 << ", " << src2; + } +}; + +Instruction *CondMoveMux::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + if (op.size != 5 && (op.size != 4 || !src2opt)) { + error(op[0].lineno, "%s requires %s4 operands", op[0].s, src2opt ? "3 or " : ""); + return 0; + } + if (!CHECKTYPE(op[op.size - 1], tINT)) { + if (op[op.size - 1].i < 0 || op[op.size - 1].i >= (1 << cond_size)) { + error(op[op.size - 1].lineno, "%s condition must be %d-bit constant", op[0].s, + cond_size); + return 0; + } + } + CondMoveMux *rv; + if (op.size == 5) + rv = new CondMoveMux(tbl, this, act, op[1], op[2], op[3]); + else + rv = new CondMoveMux(tbl, this, act, op[1], op[2]); + rv->cond = op[op.size - 1].i; + if (!rv->src1.valid()) + error(op[2].lineno, "invalid src1"); + else if (!rv->src2.valid()) + error(op[3].lineno, "invalid src2"); + else + return rv; + delete rv; + return 0; +} + +Instruction *CondMoveMux::pass1(Table *tbl, Table::Actions::Action *) { + if (!dest.check() || !src1.check() || !src2.check()) return this; + if (dest->reg.mau_id() < 0) { + error(dest.lineno, "%s not accessable in mau", dest->reg.name); + return this; + } + if (dest->reg.type != Phv::Register::NORMAL) { + error(dest.lineno, "%s dest can't be dark or mocha phv", opc->name.c_str()); + return this; + } + slot = dest->reg.mau_id(); + tbl->stage->action_set[tbl->gress][dest->reg.uid] = true; + src1->pass1(tbl, slot / Phv::mau_groupsize()); + src2->pass1(tbl, slot / Phv::mau_groupsize()); + return this; +} +int CondMoveMux::encode() { + int rv = (cond << 11) | (opc->opcode << 6) | src1.bits(slot / Phv::mau_groupsize()); + rv <<= Target::INSTR_SRC2_BITS(); + /* funny cond test on src2 is to match the compiler output -- if we're not testing + * src2 validity, what we specify as src2 is irrelevant */ + return rv | (cond & 0x40 ? src2.bits(slot / Phv::mau_groupsize()) : 0); +} +bool CondMoveMux::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) { + return opc == a->opc && dest == a->dest && src1 == a->src1 && src2 == a->src2 && + cond == a->cond; + } else { + return false; + } +} + +/** + * This instruction represents the Byte-Rotate-Merge instruction described in the + * uArch section 14.1.6.5 Byte-rotate-merge section. + */ +struct ByteRotateMerge : VLIWInstruction { + struct Decode : Instruction::Decode { + Decode() : Instruction::Decode("byte_rotate_merge") { alias("byte-rotate-merge"); } + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const; + }; + Phv::Ref dest; + Operand src1, src2; + int src1_shift, src2_shift; + bitvec byte_mask; + ByteRotateMerge(Table *tbl, const Table::Actions::Action *act, const value_t &d, + const value_t &s1, const value_t &s2, int s1s, int s2s, int bm) + : VLIWInstruction(d.lineno), + dest(tbl->gress, tbl->stage->stageno + 1, d), + src1(tbl, act, s1), + src2(tbl, act, s2), + src1_shift(s1s), + src2_shift(s2s), + byte_mask(bm) {} + + std::string name() { return "byte_rotate_merge"; } + Instruction *pass1(Table *tbl, Table::Actions::Action *); + void pass2(Table *tbl, Table::Actions::Action *) { + src1->pass2(slot / Phv::mau_groupsize()); + src2->pass2(slot / Phv::mau_groupsize()); + } + int encode(); + bool equiv(Instruction *a_); + bool phvRead(std::function fn) { + return src1.phvRead(fn) | src2.phvRead(fn); + } + void dbprint(std::ostream &out) const { + out << "INSTR: byte_rotate_merge " << dest << ", " << src1 << ", " << src2 << " " + << byte_mask; + } +}; + +/** + * Unlike deposit-field, because of the non-contiguity of both sources possibly, the + * full instruction with both sources, shifts and byte mask are required + */ +Instruction *ByteRotateMerge::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + if (op.size != 7) { + error(op[0].lineno, "%s requires 6 operands", op[0].s); + return 0; + } + if (!CHECKTYPE(op[4], tINT) || !CHECKTYPE(op[5], tINT) || !CHECKTYPE(op[6], tINT)) { + error(op[0].lineno, "%s requires operands 3-5 to be ints", op[0].s); + return 0; + } + + ByteRotateMerge *rv = + new ByteRotateMerge(tbl, act, op[1], op[2], op[3], op[4].i, op[5].i, op[6].i); + if (!rv->src1.valid()) + error(op[2].lineno, "invalid src1"); + else if (!rv->src2.valid()) + error(op[3].lineno, "invalid src2"); + else + return rv; + delete rv; + return 0; +} + +/** + * The shifts at most can be container.size / 8 and the byte mask bit count can be at most + * container.size / 8. + */ +Instruction *ByteRotateMerge::pass1(Table *tbl, Table::Actions::Action *) { + if (!dest.check() || !src1.check() || !src2.check()) return this; + if (dest->reg.mau_id() < 0) { + error(dest.lineno, "%s not accessable in mau", dest->reg.name); + return this; + } + if (dest->reg.type != Phv::Register::NORMAL) { + error(dest.lineno, "byte-rotate-merge dest can't be dark or mocha phv"); + return this; + } + if (dest->reg.size == 8) { + error(dest.lineno, "byte-rotate-merge invalid on 8 bit containers"); + return this; + } + if (byte_mask.max().index() > dest->reg.size / 8) { + error(dest.lineno, "byte-rotate-merge mask beyond container size bounds"); + return this; + } + if (src1_shift > dest->reg.size / 8) { + error(dest.lineno, "byte-rotate-merge src1_shift beyond container size bounds"); + return this; + } + if (src2_shift > dest->reg.size / 8) { + error(dest.lineno, "byte-rotate-merge src2_shift beyond container size bounds"); + return this; + } + slot = dest->reg.mau_id(); + tbl->stage->action_set[tbl->gress][dest->reg.uid] = true; + src1->pass1(tbl, slot / Phv::mau_groupsize()); + src2->pass1(tbl, slot / Phv::mau_groupsize()); + src2->pass1(tbl, slot / Phv::mau_groupsize()); + if (src2.phvGroup() < 0) { + std::swap(src1, src2); + std::swap(src1_shift, src2_shift); + byte_mask = bitvec(0, dest->reg.size / 8) - byte_mask; + } + if (src2.phvGroup() < 0) error(lineno, "src2 must be phv register"); + return this; +} + +int ByteRotateMerge::encode() { + int bits = (0xa << 6) | src1.bits(slot / Phv::mau_groupsize()); + bits |= (byte_mask.getrange(0, 4)) << 10; + bits |= (src1_shift << 17); + bits |= (src2_shift << 15); + bits <<= Target::INSTR_SRC2_BITS(); + return bits | src2.bits(slot / Phv::mau_groupsize()); +} + +bool ByteRotateMerge::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) { + return dest == a->dest && src1 == a->src1 && src2 == a->src2 && byte_mask == a->byte_mask && + src1_shift == a->src1_shift && src2_shift == a->src2_shift; + } else { + return false; + } +} + +struct Set; + +struct DepositField : VLIWInstruction { + struct Decode : Instruction::Decode { + Decode() : Instruction::Decode("deposit_field") { alias("deposit-field"); } + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + }; + Phv::Ref dest; + Operand src1, src2; + DepositField(Table *tbl, const Table::Actions::Action *act, const value_t &d, const value_t &s) + : VLIWInstruction(d.lineno), + dest(tbl->gress, tbl->stage->stageno + 1, d), + src1(tbl, act, s), + src2(tbl->gress, tbl->stage->stageno, d) {} + DepositField(Table *tbl, const Table::Actions::Action *act, const value_t &d, const value_t &s1, + const value_t &s2) + : VLIWInstruction(d.lineno), + dest(tbl->gress, tbl->stage->stageno + 1, d), + src1(tbl, act, s1), + src2(tbl, act, s2) {} + DepositField(Table *tbl, const Set &); + std::string name() { return "deposit_field"; } + Instruction *pass1(Table *tbl, Table::Actions::Action *); + void pass2(Table *tbl, Table::Actions::Action *) { + src1->pass2(slot / Phv::mau_groupsize()); + src2->pass2(slot / Phv::mau_groupsize()); + } + int encode(); + bool equiv(Instruction *a_); + bool phvRead(std::function fn) { + return src1.phvRead(fn) | src2.phvRead(fn); + } + void dbprint(std::ostream &out) const { + out << "INSTR: deposit_field " << dest << ", " << src1 << ", " << src2; + } +}; + +Instruction *DepositField::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + if (op.size != 4 && op.size != 3) { + error(op[0].lineno, "%s requires 2 or 3 operands", op[0].s); + return 0; + } + DepositField *rv; + if (op.size == 4) + rv = new DepositField(tbl, act, op[1], op[2], op[3]); + else + rv = new DepositField(tbl, act, op[1], op[2]); + if (!rv->src1.valid()) + error(op[2].lineno, "invalid src1"); + else if (!rv->src2.valid()) + error(op[3].lineno, "invalid src2"); + else + return rv; + delete rv; + return 0; +} + +Instruction *DepositField::pass1(Table *tbl, Table::Actions::Action *act) { + if (!dest.check() || !src1.check() || !src2.check()) return this; + if (dest->reg.mau_id() < 0) { + error(dest.lineno, "%s not accessable in mau", dest->reg.name); + return this; + } + if (dest->reg.type != Phv::Register::NORMAL) { + error(dest.lineno, "deposit-field dest can't be dark or mocha phv"); + return this; + } + slot = dest->reg.mau_id(); + tbl->stage->action_set[tbl->gress][dest->reg.uid] = true; + src1->pass1(tbl, slot / Phv::mau_groupsize()); + src2->pass1(tbl, slot / Phv::mau_groupsize()); + return this; +} +int DepositField::encode() { + // If src1 is an Operand::Const (and we pass a valid dest_size), + // we will recieve the combined rotation + bits from DepositField::discoverRotation(). + // Otherwise the top 'RotationBits' will be zero. + int rotConst = src1.bits(slot / Phv::mau_groupsize(), dest.size()); + unsigned rot = rotConst >> RotationBits; + rot += dest->reg.size - dest->lo + src1.bitoffset(slot / Phv::mau_groupsize()); + rot %= dest->reg.size; + int bits = rotConst & ((1U << RotationBits) - 1); + bits |= (1 << 6); + bits |= dest->hi << 7; + bits |= rot << 12; + switch (Phv::reg(slot)->size) { + case 8: + bits |= (dest->lo & 3) << 10; + bits |= (dest->lo & ~3) << 13; + break; + case 16: + bits |= (dest->lo & 1) << 11; + bits |= (dest->lo & ~1) << 15; + break; + case 32: + bits |= dest->lo << 17; + break; + default: + BUG(); + } + bits <<= Target::INSTR_SRC2_BITS(); + return bits | src2.bits(slot / Phv::mau_groupsize()); +} +bool DepositField::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) { + return dest == a->dest && src1 == a->src1 && src2 == a->src2; + } else { + return false; + } +} + +struct Set : VLIWInstruction { + struct Decode : Instruction::Decode { + std::string name; + Decode(const char *n, std::set targ) : Instruction::Decode(n, targ), name(n) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + }; + Phv::Ref dest; + Operand src; + static AluOP::Decode *opA; + Set(Table *tbl, const Table::Actions::Action *act, const value_t &d, const value_t &s) + : VLIWInstruction(d.lineno), + dest(tbl->gress, tbl->stage->stageno + 1, d), + src(tbl, act, s) {} + std::string name() { return "set"; } + Instruction *pass1(Table *tbl, Table::Actions::Action *); + void pass2(Table *tbl, Table::Actions::Action *) { src->pass2(slot / Phv::mau_groupsize()); } + int encode(); + bool equiv(Instruction *a_); + bool phvRead(std::function fn) { return src.phvRead(fn); } + void dbprint(std::ostream &out) const { out << "INSTR: set " << dest << ", " << src; } +}; + +DepositField::DepositField(Table *tbl, const Set &s) + : VLIWInstruction(s), dest(s.dest), src1(s.src), src2(::Phv::Ref(s.dest->reg, tbl->gress)) {} + +Instruction *Set::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + if (op.size != 3) { + error(op[0].lineno, "%s requires 2 operands", op[0].s); + return 0; + } + Set *rv = new Set(tbl, act, op[1], op[2]); + if (!rv->src.valid()) + error(op[2].lineno, "invalid src"); + else + return rv; + delete rv; + return 0; +} + +Instruction *Set::pass1(Table *tbl, Table::Actions::Action *act) { + if (!dest.check() || !src.check()) return this; + if (dest->reg.mau_id() < 0) { + error(dest.lineno, "%s not accessable in mau", dest->reg.name); + return this; + } + if (dest->lo || dest->hi != dest->reg.size - 1) + return (new DepositField(tbl, *this))->pass1(tbl, act); + if (auto *k = src.to()) { + if (dest->reg.type == Phv::Register::DARK) { + error(dest.lineno, "can't set dark phv to a constant"); + return this; + } + int minsignconst = Target::MINIMUM_INSTR_CONSTANT(); + // Translate large value with negative value, e.g. 0xFFFE -> -2 on 16-bit PHV + int64_t maxvalue = 1LL << dest->reg.size; + int64_t delta = k->value - maxvalue; + if (delta >= minsignconst) k->value = delta; + if (k->value < minsignconst || k->value >= 8) + return (new LoadConst(lineno, dest, k->value))->pass1(tbl, act); + } + slot = dest->reg.mau_id(); + tbl->stage->action_set[tbl->gress][dest->reg.uid] = true; + src->pass1(tbl, slot / Phv::mau_groupsize()); + return this; +} + +int Set::encode() { + int rv = src.bits(slot / Phv::mau_groupsize()); + switch (dest->reg.type) { + case Phv::Register::NORMAL: + rv |= (opA->opcode << 6); + rv <<= Target::INSTR_SRC2_BITS(); + rv |= (slot & 0xf); + break; + case Phv::Register::MOCHA: + rv |= 0x40; + break; + case Phv::Register::DARK: + rv |= 0x20; + break; + default: + BUG(); + } + return rv; +} + +bool Set::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) { + return dest == a->dest && src == a->src; + } else { + return false; + } +} + +struct NulOP : VLIWInstruction { + const struct Decode : Instruction::Decode { + std::string name; + unsigned opcode; + Decode(const char *n, unsigned opc) : Instruction::Decode(n), name(n), opcode(opc) {} + Decode(const char *n, target_t targ, unsigned opc) + : Instruction::Decode(n, targ), name(n), opcode(opc) {} + Decode(const char *n, std::set targ, unsigned opc) + : Instruction::Decode(n, targ), name(n), opcode(opc) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + } *opc; + Phv::Ref dest; + NulOP(Table *tbl, const Table::Actions::Action *act, const Decode *o, const value_t &d) + : VLIWInstruction(d.lineno), opc(o), dest(tbl->gress, tbl->stage->stageno + 1, d) {} + std::string name() { return opc->name; } + Instruction *pass1(Table *tbl, Table::Actions::Action *); + void pass2(Table *, Table::Actions::Action *) {} + int encode(); + bool equiv(Instruction *a_); + bool phvRead(std::function fn) { return false; } + void dbprint(std::ostream &out) const { out << "INSTR: " << opc->name << " " << dest; } +}; + +Instruction *NulOP::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + if (op.size != 2) { + error(op[0].lineno, "%s requires 1 operand", op[0].s); + return 0; + } + return new NulOP(tbl, act, this, op[1]); +} + +Instruction *NulOP::pass1(Table *tbl, Table::Actions::Action *) { + if (!dest.check()) return this; + if (dest->reg.mau_id() < 0) { + error(dest.lineno, "%s not accessable in mau", dest->reg.name); + return this; + } + slot = dest->reg.mau_id(); + if (opc->opcode || !options.match_compiler) { + tbl->stage->action_set[tbl->gress][dest->reg.uid] = true; + } + return this; +} +int NulOP::encode() { return opc->opcode; } +bool NulOP::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) { + return opc == a->opc && dest == a->dest; + } else { + return false; + } +} + +struct ShiftOP : VLIWInstruction { + const struct Decode : Instruction::Decode { + std::string name; + unsigned opcode; + bool use_src1; + Decode(const char *n, std::set targ, unsigned opc, bool funnel = false) + : Instruction::Decode(n, targ), name(n), opcode(opc), use_src1(funnel) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + } *opc; + Phv::Ref dest; + Operand src1, src2; + int shift = 0; + ShiftOP(const Decode *d, Table *tbl, const Table::Actions::Action *act, const value_t *ops) + : VLIWInstruction(ops->lineno), + opc(d), + dest(tbl->gress, tbl->stage->stageno + 1, ops[0]), + src1(tbl, act, ops[1]), + src2(tbl, act, ops[2]) { + if (opc->use_src1) { + if (CHECKTYPE(ops[3], tINT)) shift = ops[3].i; + } else { + src2 = src1; + if (CHECKTYPE(ops[2], tINT)) shift = ops[2].i; + } + } + std::string name() { return opc->name; } + Instruction *pass1(Table *tbl, Table::Actions::Action *); + void pass2(Table *tbl, Table::Actions::Action *) { + src1->pass2(slot / Phv::mau_groupsize()); + src2->pass2(slot / Phv::mau_groupsize()); + } + int encode(); + bool equiv(Instruction *a_); + bool phvRead(std::function fn) { + return src1.phvRead(fn) | src2.phvRead(fn); + } + void dbprint(std::ostream &out) const { + out << "INSTR: " << opc->name << ' ' << dest << ", " << src1 << ", " << shift; + } +}; + +Instruction *ShiftOP::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + if (op.size != (use_src1 ? 5 : 4)) { + error(op[0].lineno, "%s requires %d operands", op[0].s, use_src1 ? 4 : 3); + return 0; + } + ShiftOP *rv = new ShiftOP(this, tbl, act, op.data + 1); + if (!rv->src1.valid()) + error(op[2].lineno, "invalid src1"); + else if (!rv->src2.valid()) + error(op[3].lineno, "invalid src2"); + else if (rv->shift < 0 || rv->shift > 0x1f) + error(op[3].lineno, "invalid shift"); + else + return rv; + delete rv; + return 0; +} + +Instruction *ShiftOP::pass1(Table *tbl, Table::Actions::Action *) { + if (!dest.check() || !src1.check() || !src2.check()) return this; + if (dest->reg.mau_id() < 0) { + error(dest.lineno, "%s not accessable in mau", dest->reg.name); + return this; + } + if (dest->reg.type != Phv::Register::NORMAL) { + error(dest.lineno, "%s dest can't be dark or mocha phv", opc->name.c_str()); + return this; + } + if (dest->lo) { + error(lineno, "shift ops cannot operate on slices"); + return this; + } + slot = dest->reg.mau_id(); + tbl->stage->action_set[tbl->gress][dest->reg.uid] = true; + src1->pass1(tbl, slot / Phv::mau_groupsize()); + src2->pass1(tbl, slot / Phv::mau_groupsize()); + if (src2.phvGroup() < 0) error(lineno, "src%s must be phv register", opc->use_src1 ? "2" : ""); + return this; +} +int ShiftOP::encode() { + int rv = (shift << 12) | (opc->opcode << 6); + if (opc->use_src1 || options.match_compiler) rv |= src1.bits(slot / Phv::mau_groupsize()); + rv <<= Target::INSTR_SRC2_BITS(); + return rv | src2.bits(slot / Phv::mau_groupsize()); +} +bool ShiftOP::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) { + return opc == a->opc && dest == a->dest && src1 == a->src1 && src2 == a->src2 && + shift == a->shift; + } else { + return false; + } +} + +static std::set tofino12 = std::set({ + TOFINO, + JBAY, +}); + +// lifted from MAU uArch 15.1.6 +// If the operation is commutative operand swap is enabled +// OPNAME OPCODE +static AluOP::Decode opADD("add", tofino12, 0x23e, AluOP::Commutative), // NOLINT + opADDC("addc", tofino12, 0x2be, AluOP::Commutative), // NOLINT + opSUB("sub", tofino12, 0x33e), // NOLINT + opSUBC("subc", tofino12, 0x3be), // NOLINT + opSADDU("saddu", tofino12, 0x03e, AluOP::Commutative), // NOLINT + opSADDS("sadds", tofino12, 0x07e, AluOP::Commutative), // NOLINT + opSSUBU("ssubu", tofino12, 0x0be), // NOLINT + opSSUBS("ssubs", tofino12, 0x0fe), // NOLINT + opMINU("minu", tofino12, 0x13e, AluOP::Commutative), // NOLINT + opMINS("mins", tofino12, 0x17e, AluOP::Commutative), // NOLINT + opMAXU("maxu", tofino12, 0x1be, AluOP::Commutative), // NOLINT + opMAXS("maxs", tofino12, 0x1fe, AluOP::Commutative), // NOLINT + opSETZ("setz", tofino12, 0x01e, AluOP::Commutative + AluOP::IgnoreSrcs), // NOLINT + opNOR("nor", tofino12, 0x05e, AluOP::Commutative), // NOLINT + opANDCA("andca", tofino12, 0x09e, AluOP::CanSliceWithConst), // NOLINT + opANDCB("andcb", tofino12, 0x11e, &opANDCA), // NOLINT + opNOTB("notb", tofino12, 0x15e, AluOP::IgnoreSrc1, "not"), // NOLINT + opNOTA("nota", tofino12, 0x0de, AluOP::IgnoreSrc2, &opNOTB), // NOLINT + opXOR("xor", tofino12, 0x19e, AluOP::Commutative + AluOP::CanSliceWithConst), // NOLINT + opNAND("nand", tofino12, 0x1de, AluOP::Commutative), // NOLINT + opAND("and", tofino12, 0x21e, AluOP::Commutative), // NOLINT + opXNOR("xnor", tofino12, 0x25e, AluOP::Commutative), // NOLINT + opB("alu_b", tofino12, 0x29e, AluOP::IgnoreSrc1), // NOLINT + opORCA("orca", tofino12, 0x2de), // NOLINT + opA("alu_a", tofino12, 0x31e, AluOP::IgnoreSrc2, &opB), // NOLINT + opORCB("orcb", tofino12, 0x35e, &opORCA), // NOLINT + opOR("or", tofino12, 0x39e, AluOP::Commutative + AluOP::CanSliceWithConst), // NOLINT + opSETHI("sethi", tofino12, 0x3de, AluOP::Commutative + AluOP::IgnoreSrcs); // NOLINT +static LoadConst::Decode opLoadConst("load-const", tofino12); // NOLINT +static Set::Decode opSet("set", tofino12); // NOLINT +static NulOP::Decode opNoop("noop", tofino12, 0x0); // NOLINT +static ShiftOP::Decode opSHL("shl", tofino12, 0x0c, false), // NOLINT + opSHRS("shrs", tofino12, 0x1c, false), // NOLINT + opSHRU("shru", tofino12, 0x14, false), // NOLINT + opFUNSHIFT("funnel-shift", tofino12, 0x4, true); // NOLINT +static DepositField::Decode opDepositField; +static ByteRotateMerge::Decode opByteRotateMerge; + +AluOP::Decode *Set::opA = &VLIW::opA; + +static AluOP3Src::Decode tf_opBMSET("bitmasked-set", TOFINO, 0x2e); // NOLINT +static CondMoveMux::Decode tf_opCondMove("cmov", TOFINO, 0x16, true, 5, + "conditional-move"); // NOLINT +static CondMoveMux::Decode tf_opCondMux("cmux", TOFINO, 0x6, false, 2, + "conditional-mux"); // NOLINT +static NulOP::Decode tf_opInvalidate("invalidate", TOFINO, 0x3800); // NOLINT + +static std::set jb_targets = std::set({ + JBAY, +}); + +static AluOP3Src::Decode jb_opBMSET("bitmasked-set", jb_targets, 0x0e); // NOLINT +static CondMoveMux::Decode jb_opCondMove("cmov", jb_targets, 0x6, true, 5, + "conditional-move"); // NOLINT +static AluOP::Decode jb_opGTEQU("gtequ", jb_targets, 0x02e), // NOLINT + jb_opGTEQS("gteqs", jb_targets, 0x06e), // NOLINT + jb_opLTU("ltu", jb_targets, 0x0ae), // NOLINT + jb_opLTS("lts", jb_targets, 0x0ee), // NOLINT + jb_opLEQU("lequ", jb_targets, 0x12e, &jb_opGTEQU), // NOLINT + jb_opLEQS("leqs", jb_targets, 0x16e, &jb_opGTEQS), // NOLINT + jb_opGTU("gtu", jb_targets, 0x1ae, &jb_opLTU), // NOLINT + jb_opGTS("gts", jb_targets, 0x1ee, &jb_opLTS), // NOLINT + jb_opEQ("eq", jb_targets, 0x22e, AluOP::Commutative), // NOLINT + jb_opNEQ("neq", jb_targets, 0x2ae, AluOP::Commutative), // NOLINT + jb_opEQ64("eq64", jb_targets, 0x26e, AluOP::Commutative), // NOLINT + jb_opNEQ64("neq64", jb_targets, 0x2ee, AluOP::Commutative); // NOLINT + +std::unique_ptr genNoopFill(Table *tbl, Table::Actions::Action *act, const char *op, + int slot) { + VECTOR(value_t) args; + VECTOR_init(args, 3); + args.add(op).add(Phv::reg(slot)->name).add(Phv::reg(slot)->name); + std::unique_ptr rv(Instruction::decode(tbl, act, args)); + VECTOR_fini(args); + return rv; +} + +} // end namespace VLIW + +void dump(const Instruction &inst) { std::cout << inst << std::endl; } diff --git a/backends/tofino/bf-asm/instruction.h b/backends/tofino/bf-asm/instruction.h new file mode 100644 index 00000000000..48d2eaa721b --- /dev/null +++ b/backends/tofino/bf-asm/instruction.h @@ -0,0 +1,74 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_INSTRUCTION_H_ +#define BACKENDS_TOFINO_BF_ASM_INSTRUCTION_H_ + +#include + +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/tables.h" + +struct Instruction : public IHasDbPrint { + int lineno; + int slot; + explicit Instruction(int l) : lineno(l), slot(-1) {} + virtual ~Instruction() {} + virtual Instruction *pass1(Table *, Table::Actions::Action *) = 0; + virtual std::string name() = 0; + virtual void pass2(Table *, Table::Actions::Action *) = 0; + virtual void dbprint(std::ostream &) const = 0; + virtual bool equiv(Instruction *a) = 0; + bool equiv(const std::unique_ptr &a) { return equiv(a.get()); } + virtual bool salu_output() const { return false; } + virtual bool salu_alu() const { return false; } + virtual bool phvRead(std::function) = 0; + bool phvRead() { + return phvRead([](const Phv::Slice &sl) {}); + } +#define VIRTUAL_TARGET_METHODS(TARGET) \ + virtual void write_regs(Target::TARGET::mau_regs &, Table *, Table::Actions::Action *) = 0; + FOR_ALL_REGISTER_SETS(VIRTUAL_TARGET_METHODS) +#undef VIRTUAL_TARGET_METHODS +#define DECLARE_FORWARD_VIRTUAL_INSTRUCTION_WRITE_REGS(TARGET) \ + void write_regs(Target::TARGET::mau_regs ®s, Table *tbl, Table::Actions::Action *act) \ + override; + static Instruction *decode(Table *, const Table::Actions::Action *, const VECTOR(value_t) &); + + enum instruction_set_t { VLIW_ALU = 0, STATEFUL_ALU = 1, NUM_SETS = 2 }; + struct Decode { + static std::multimap opcode[NUM_SETS]; + bool type_suffix; + unsigned targets; + explicit Decode(const char *name, int set = VLIW_ALU, bool ts = false); + Decode(const char *name, target_t target, int set = VLIW_ALU, bool ts = false); + Decode(const char *name, std::set target, int set = VLIW_ALU, bool ts = false); + const Decode &alias(const char *name, int set = VLIW_ALU, bool ts = false) { + opcode[set].emplace(name, this); + return *this; + } + virtual Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const = 0; + }; +}; + +namespace VLIW { +std::unique_ptr genNoopFill(Table *tbl, Table::Actions::Action *act, const char *op, + int slot); +} + +#endif /* BACKENDS_TOFINO_BF_ASM_INSTRUCTION_H_ */ diff --git a/backends/tofino/bf-asm/j2b.cpp b/backends/tofino/bf-asm/j2b.cpp new file mode 100644 index 00000000000..3888f460d62 --- /dev/null +++ b/backends/tofino/bf-asm/j2b.cpp @@ -0,0 +1,48 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include + +#include "bson.h" + +int main(int ac, char **av) { + if (ac != 3) { + std::cerr << "usage " << av[0] << " " << std::endl; + return 1; + } + std::ifstream in(av[1]); + if (!in) { + std::cerr << "failed to open " << av[1] << std::endl; + return 1; + } + json::obj *data = nullptr; + if (!(in >> data)) { + std::cerr << "failed to read json" << std::endl; + return 1; + } + std::ofstream out(av[2]); + if (!out) { + std::cerr << "failed to open " << av[2] << std::endl; + return 1; + } + if (!(out << json::binary(data))) { + std::cerr << "failed to write bson" << std::endl; + return 1; + } + return 0; +} diff --git a/backends/tofino/bf-asm/jbay/CMakeLists.txt b/backends/tofino/bf-asm/jbay/CMakeLists.txt new file mode 100644 index 00000000000..c285f9ade6f --- /dev/null +++ b/backends/tofino/bf-asm/jbay/CMakeLists.txt @@ -0,0 +1,56 @@ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 + +set (GEN_JBAY + memories.jbay_mem + memories.pipe_addrmap + memories.prsr_mem_main_rspec + regs.dprsr_reg + regs.epb_prsr4_reg + regs.ipb_prsr4_reg + regs.jbay_reg + regs.mau_addrmap + regs.pipe_addrmap + regs.pmerge_reg + regs.prsr_reg_main_rspec + ) + +foreach(f IN LISTS GEN_JBAY) + list (APPEND GEN_JBAY_SRCS ${BFASM_BINARY_DIR}/gen/jbay/${f}.cpp) + list (APPEND GEN_JBAY_HDRS ${BFASM_BINARY_DIR}/gen/jbay/${f}.h) +endforeach() + +add_custom_command(OUTPUT ${GEN_JBAY_HDRS} ${GEN_JBAY_SRCS} + COMMAND ${BFASM_WALLE} --schema chip.schema --generate-cpp template_objects.yaml -o ${BFASM_BINARY_DIR}/gen/jbay + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS template_objects.yaml chip.schema ${WALLE_SOURCES} + COMMENT "Generating cpp code for jbay from jbay/chip.schema") + +set_source_files_properties(${GEN_JBAY_SRCS} ${GEN_JBAY_HDRS} PROPERTIES GENERATED TRUE) + +set (BFAS_JBAY_SRCS + jbay/gateway.cpp + jbay/input_xbar.cpp + jbay/stateful.cpp + jbay/parser.cpp + PARENT_SCOPE + ) + +add_library (regs_jbay ${GEN_JBAY_SRCS}) +target_link_libraries (regs_jbay p4ctoolkit) +# Disable errors for warnings. FIXME: Get rid of this. +target_compile_options(regs_jbay PUBLIC -Wno-error -Wno-unused-parameter -Wno-unused-variable -Wno-type-limits -Wno-sign-compare) diff --git a/backends/tofino/bf-asm/jbay/chip.schema b/backends/tofino/bf-asm/jbay/chip.schema new file mode 100644 index 00000000000..5afef775e2d Binary files /dev/null and b/backends/tofino/bf-asm/jbay/chip.schema differ diff --git a/backends/tofino/bf-asm/jbay/counter.h b/backends/tofino/bf-asm/jbay/counter.h new file mode 100644 index 00000000000..0b6655964ab --- /dev/null +++ b/backends/tofino/bf-asm/jbay/counter.h @@ -0,0 +1,121 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_JBAY_COUNTER_H_ +#define BACKENDS_TOFINO_BF_ASM_JBAY_COUNTER_H_ + +template +void CounterTable::setup_teop_regs_2(REGS ®s, int stats_group_index) { + BUG_CHECK(teop >= 0 && teop < 4); + BUG_CHECK(gress == EGRESS); + + auto &adrdist = regs.rams.match.adrdist; + + if (!teop_initialized) { + // assume this stage driving teop + auto delay = stage->pipelength(gress) - stage->pred_cycle(gress) - 7; + adrdist.teop_bus_ctl[teop].teop_bus_ctl_delay = delay; + adrdist.teop_bus_ctl[teop].teop_bus_ctl_delay_en = 1; + adrdist.teop_bus_ctl[teop].teop_bus_ctl_stats_en = 1; + + adrdist.stats_to_teop_adr_oxbar_ctl[teop].enabled_2bit_muxctl_select = stats_group_index; + adrdist.stats_to_teop_adr_oxbar_ctl[teop].enabled_2bit_muxctl_enable = 1; + teop_initialized = true; + } + + adrdist.teop_to_stats_adr_oxbar_ctl[stats_group_index].enabled_2bit_muxctl_select = teop; + adrdist.teop_to_stats_adr_oxbar_ctl[stats_group_index].enabled_2bit_muxctl_enable = 1; + + // count all tEOP events + adrdist.dp_teop_stats_ctl[stats_group_index].dp_teop_stats_ctl_err = 0; + // XXX is this always 2? + adrdist.dp_teop_stats_ctl[stats_group_index].dp_teop_stats_ctl_rx_shift = 2; + adrdist.dp_teop_stats_ctl[stats_group_index].dp_teop_stats_ctl_rx_en = 1; + + auto &stats = regs.rams.map_alu.stats_wrap[stats_group_index].stats; + stats.statistics_ctl_teop_en = 1; +} + +template +void CounterTable::write_alu_vpn_range_2(REGS ®s) { + auto &adrdist = regs.rams.match.adrdist; + int minvpn, sparevpn; + + // Used to validate the BFA VPN configuration + std::set vpn_processed; + bitvec vpn_range; + + // Get Spare VPN + layout_vpn_bounds(minvpn, sparevpn, false); + + for (int home_row : home_rows) { + bool block_start = false; + bool block_end = false; + int min = 1000000; + int max = -1; + for (Layout &logical_row : layout) { + // Block Start with the home row and End with the Spare VPN + if (logical_row.row == home_row) block_start = true; + + if (block_start) { + for (auto v : logical_row.vpns) { + if (v == sparevpn) { + block_end = true; + break; + } + if (vpn_processed.count(v)) + error(home_lineno, "Multiple instance of the VPN %d detected", v); + else + vpn_processed.insert(v); + + if (v < min) min = v; + if (v > max) max = v; + } + } + if (block_end) { + BUG_CHECK(min != 1000000 && max != -1); + + bitvec block_range(min, max - min + 1); + if (vpn_range.intersects(block_range)) + error(home_lineno, "Overlapping of VPN range detected"); + else + vpn_range |= block_range; + + adrdist.mau_stats_alu_vpn_range[home_row / 4].stats_vpn_base = min; + adrdist.mau_stats_alu_vpn_range[home_row / 4].stats_vpn_limit = max; + adrdist.mau_stats_alu_vpn_range[home_row / 4].stats_vpn_range_check_enable = 1; + break; + } + } + BUG_CHECK(block_start && block_end); + } + + if (vpn_range != bitvec(minvpn, sparevpn - minvpn)) + error(home_lineno, "VPN range not entirely covered"); +} + +template <> +void CounterTable::setup_teop_regs(Target::JBay::mau_regs ®s, int stats_group_index) { + setup_teop_regs_2(regs, stats_group_index); +} + +template <> +void CounterTable::write_alu_vpn_range(Target::JBay::mau_regs ®s) { + write_alu_vpn_range_2(regs); +} + +#endif /* BACKENDS_TOFINO_BF_ASM_JBAY_COUNTER_H_ */ diff --git a/backends/tofino/bf-asm/jbay/deparser.cpp b/backends/tofino/bf-asm/jbay/deparser.cpp new file mode 100644 index 00000000000..e30402ac9b2 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/deparser.cpp @@ -0,0 +1,1092 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* deparser template specializations for jbay -- #included directly in top-level deparser.cpp */ + +#define YES(X) X +#define NO(X) + +#define JBAY_POV(GRESS, VAL, REG) \ + if (VAL.pov.size() == 1) \ + REG.pov = deparser.pov[GRESS].at(&VAL.pov.front()->reg) + VAL.pov.front()->lo; \ + else \ + error(VAL.val.lineno, "POV bit required for Tofino2"); + +#define JBAY_SIMPLE_INTRINSIC(GRESS, VAL, REG, IFSHIFT) \ + REG.phv = VAL.val->reg.deparser_id(); \ + JBAY_POV(GRESS, VAL, REG) \ + IFSHIFT(REG.shft = intrin.vals[0].val->lo;) + +#define JBAY_ARRAY_INTRINSIC(GRESS, VAL, ARRAY, REG, POV, IFSHIFT) \ + for (auto &r : ARRAY) { \ + r.REG.phv = VAL.val->reg.deparser_id(); \ + IFSHIFT(r.REG.shft = intrin.vals[0].val->lo;) \ + } \ + JBAY_POV(GRESS, VAL, POV) + +#define EI_INTRINSIC(NAME, IFSHIFT) \ + DEPARSER_INTRINSIC(JBay, EGRESS, NAME, 1) { \ + JBAY_SIMPLE_INTRINSIC(EGRESS, intrin.vals[0], regs.dprsrreg.inp.ipp.egr.m_##NAME, IFSHIFT) \ + } +#define HO_E_INTRINSIC(NAME, IFSHIFT) \ + DEPARSER_INTRINSIC(JBay, EGRESS, NAME, 1) { \ + JBAY_ARRAY_INTRINSIC(EGRESS, intrin.vals[0], regs.dprsrreg.ho_e, her.meta.m_##NAME, \ + regs.dprsrreg.inp.icr.egr_meta_pov.m_##NAME, IFSHIFT) \ + } +#define II_INTRINSIC(NAME, IFSHIFT) \ + DEPARSER_INTRINSIC(JBay, INGRESS, NAME, 1) { \ + JBAY_SIMPLE_INTRINSIC(INGRESS, intrin.vals[0], regs.dprsrreg.inp.ipp.ingr.m_##NAME, \ + IFSHIFT) \ + } +#define II_INTRINSIC_RENAME(NAME, REGNAME, IFSHIFT) \ + DEPARSER_INTRINSIC(JBay, INGRESS, NAME, 1) { \ + JBAY_SIMPLE_INTRINSIC(INGRESS, intrin.vals[0], regs.dprsrreg.inp.ipp.ingr.m_##REGNAME, \ + IFSHIFT) \ + } +#define HO_I_INTRINSIC(NAME, IFSHIFT) \ + DEPARSER_INTRINSIC(JBay, INGRESS, NAME, 1) { \ + JBAY_ARRAY_INTRINSIC(INGRESS, intrin.vals[0], regs.dprsrreg.ho_i, hir.meta.m_##NAME, \ + regs.dprsrreg.inp.icr.ingr_meta_pov.m_##NAME, IFSHIFT) \ + } +#define HO_I_INTRINSIC_RENAME(NAME, REGNAME, IFSHIFT) \ + DEPARSER_INTRINSIC(JBay, INGRESS, NAME, 1) { \ + JBAY_ARRAY_INTRINSIC(INGRESS, intrin.vals[0], regs.dprsrreg.ho_i, hir.meta.m_##REGNAME, \ + regs.dprsrreg.inp.icr.ingr_meta_pov.m_##REGNAME, IFSHIFT) \ + } + +EI_INTRINSIC(drop_ctl, YES) +EI_INTRINSIC(egress_unicast_port, NO) +HO_E_INTRINSIC(afc, YES) +HO_E_INTRINSIC(capture_tx_ts, YES) +HO_E_INTRINSIC(force_tx_err, YES) +HO_E_INTRINSIC(tx_pkt_has_offsets, YES) +HO_E_INTRINSIC(mirr_c2c_ctrl, YES) +HO_E_INTRINSIC(mirr_coal_smpl_len, YES) +HO_E_INTRINSIC(mirr_dond_ctrl, YES) +HO_E_INTRINSIC(mirr_epipe_port, YES) +HO_E_INTRINSIC(mirr_hash, YES) +HO_E_INTRINSIC(mirr_icos, YES) +HO_E_INTRINSIC(mirr_io_sel, YES) +HO_E_INTRINSIC(mirr_mc_ctrl, YES) +HO_E_INTRINSIC(mirr_qid, YES) +HO_E_INTRINSIC(mtu_trunc_err_f, YES) +HO_E_INTRINSIC(mtu_trunc_len, YES) + +II_INTRINSIC(copy_to_cpu, YES) +II_INTRINSIC(drop_ctl, YES) +II_INTRINSIC(egress_unicast_port, NO) +II_INTRINSIC_RENAME(egress_multicast_group_0, mgid1, NO) +II_INTRINSIC_RENAME(egress_multicast_group_1, mgid2, NO) +II_INTRINSIC(pgen, YES) +II_INTRINSIC(pgen_len, YES) +II_INTRINSIC(pgen_addr, YES) +HO_I_INTRINSIC(afc, YES) +HO_I_INTRINSIC(bypss_egr, YES) +HO_I_INTRINSIC(copy_to_cpu_cos, YES) +HO_I_INTRINSIC(ct_disable, YES) +HO_I_INTRINSIC(ct_mcast, YES) +HO_I_INTRINSIC(deflect_on_drop, YES) +HO_I_INTRINSIC(icos, YES) +HO_I_INTRINSIC(mirr_c2c_ctrl, YES) +HO_I_INTRINSIC(mirr_coal_smpl_len, YES) +HO_I_INTRINSIC(mirr_dond_ctrl, YES) +HO_I_INTRINSIC(mirr_epipe_port, YES) +HO_I_INTRINSIC(mirr_hash, YES) +HO_I_INTRINSIC(mirr_icos, YES) +HO_I_INTRINSIC(mirr_io_sel, YES) +HO_I_INTRINSIC(mirr_mc_ctrl, YES) +HO_I_INTRINSIC(mirr_qid, YES) +HO_I_INTRINSIC(mtu_trunc_err_f, YES) +HO_I_INTRINSIC(mtu_trunc_len, YES) +HO_I_INTRINSIC(qid, YES) +HO_I_INTRINSIC(rid, YES) +HO_I_INTRINSIC_RENAME(meter_color, pkt_color, YES) +HO_I_INTRINSIC_RENAME(xid, xid_l1, YES) +HO_I_INTRINSIC_RENAME(yid, xid_l2, YES) +HO_I_INTRINSIC_RENAME(hash_lag_ecmp_mcast_0, hash1, YES) +HO_I_INTRINSIC_RENAME(hash_lag_ecmp_mcast_1, hash2, YES) + +#undef EI_INTRINSIC +#undef HO_E_INTRINSIC +#undef II_INTRINSIC +#undef II_INTRINSIC_RENAME +#undef HO_I_INTRINSIC +#undef HO_I_INTRINSIC_RENAME + +/** Macros to build Digest::Type objects for JBay -- + * JBAY_SIMPLE_DIGEST: basic digest that appears one place in the config + * JBAY_ARRAY_DIGEST: config is replicated across Header+Output slices + * GRESS: INGRESS or EGRESS + * NAME: keyword use for this digest in the assembler + * ARRAY: Header+Ouput slice array (ho_i or ho_e, matching ingress or egress) + * TBL: config register containing the table config + * SEL: config register with the selection config + * IFID: YES or NO -- if this config needs to program id_phv + * CNT: how many patterns can be specified in the array + * REVERSE: YES or NO -- if the entries in the table are reverse (0 is last byte of header) + * IFIDX: YES or NO -- if CNT > 1 (if we index by id) + */ + +#define JBAY_SIMPLE_DIGEST(GRESS, NAME, TBL, SEL, IFID, CNT, REVERSE, IFIDX) \ + JBAY_COMMON_DIGEST(GRESS, NAME, TBL, SEL, IFID, CNT, REVERSE, IFIDX) \ + JBAY_DIGEST_TABLE(GRESS, NAME, TBL, IFID, YES, CNT, REVERSE, IFIDX) \ + } +#define JBAY_ARRAY_DIGEST(GRESS, NAME, ARRAY, TBL, SEL, IFID, CNT, REVERSE, IFIDX) \ + JBAY_COMMON_DIGEST(GRESS, NAME, TBL, SEL, IFID, CNT, REVERSE, IFIDX) \ + for (auto &r : ARRAY) { \ + JBAY_DIGEST_TABLE(GRESS, NAME, r.TBL, IFID, NO, CNT, REVERSE, IFIDX) \ + } \ + } + +#define JBAY_COMMON_DIGEST(GRESS, NAME, TBL, SEL, IFID, CNT, REVERSE, IFIDX) \ + DEPARSER_DIGEST(JBay, GRESS, NAME, CNT, can_shift = true;) { \ + SEL.phv = data.select.val->reg.deparser_id(); \ + JBAY_POV(GRESS, data.select, SEL) \ + SEL.shft = data.shift + data.select->lo; \ + SEL.disable_ = 0; + +#define JBAY_DIGEST_TABLE(GRESS, NAME, REG, IFID, IFVALID, CNT, REVERSE, IFIDX) \ + for (auto &set : data.layout) { \ + int id = set.first >> data.shift; \ + int idx = 0; \ + int maxidx = REG IFIDX([id]).phvs.size() - 1; \ + bool first = true; \ + int last = -1; \ + for (auto ® : set.second) { \ + if (first) { \ + first = false; \ + IFID(REG IFIDX([id]).id_phv = reg->reg.deparser_id(); continue;) \ + } \ + /* The same 16b/32b container cannot appear consecutively, but 8b can. */ \ + if (last == reg->reg.deparser_id() && reg->reg.size != 8) { \ + error(data.lineno, "%s: %db container %s seen in consecutive locations", #NAME, \ + reg->reg.size, reg->reg.name); \ + continue; \ + } \ + for (int i = reg->reg.size / 8; i > 0; i--) { \ + if (idx > maxidx) { \ + error(data.lineno, "%s digest limited to %d bytes", #NAME, maxidx + 1); \ + break; \ + } \ + REG IFIDX([id]).phvs[REVERSE(maxidx -) idx++] = reg->reg.deparser_id(); \ + } \ + last = reg->reg.deparser_id(); \ + } \ + IFVALID(REG IFIDX([id]).valid = 1;) \ + REG IFIDX([id]).len = idx; \ + } + +JBAY_SIMPLE_DIGEST(INGRESS, learning, regs.dprsrreg.inp.ipp.ingr.learn_tbl, + regs.dprsrreg.inp.ipp.ingr.m_learn_sel, NO, 8, YES, YES) +JBAY_ARRAY_DIGEST(INGRESS, mirror, regs.dprsrreg.ho_i, him.mirr_hdr_tbl.entry, + regs.dprsrreg.inp.ipp.ingr.m_mirr_sel, YES, 16, NO, YES) +JBAY_ARRAY_DIGEST(EGRESS, mirror, regs.dprsrreg.ho_e, hem.mirr_hdr_tbl.entry, + regs.dprsrreg.inp.ipp.egr.m_mirr_sel, YES, 16, NO, YES) +JBAY_SIMPLE_DIGEST(INGRESS, resubmit, regs.dprsrreg.inp.ipp.ingr.resub_tbl, + regs.dprsrreg.inp.ipp.ingr.m_resub_sel, NO, 8, NO, YES) +JBAY_SIMPLE_DIGEST(INGRESS, pktgen, regs.dprsrreg.inp.ipp.ingr.pgen_tbl, + regs.dprsrreg.inp.ipp.ingr.m_pgen, NO, 1, NO, NO) + +// all the jbay deparser subtrees with a dis or disable_ bit +// FIXME -- should be a way of doing this with a smart template or other metaprogramming. +#define JBAY_DISABLE_REGBITS(M) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_afc, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_capture_tx_ts, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_force_tx_err, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_c2c_ctrl, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_coal_smpl_len, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_dond_ctrl, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_epipe_port, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_hash, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_icos, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_io_sel, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_mc_ctrl, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_qid, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mtu_trunc_err_f, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_mtu_trunc_len, dis) \ + M(YES, regs.dprsrreg.ho_e, her.meta.m_tx_pkt_has_offsets, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_afc, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_bypss_egr, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_copy_to_cpu_cos, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_ct_disable, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_ct_mcast, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_deflect_on_drop, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_hash1, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_hash2, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_icos, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_c2c_ctrl, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_coal_smpl_len, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_dond_ctrl, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_epipe_port, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_hash, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_icos, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_io_sel, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_mc_ctrl, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_qid, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mtu_trunc_err_f, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_mtu_trunc_len, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_pkt_color, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_qid, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_rid, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_xid_l1, dis) \ + M(YES, regs.dprsrreg.ho_i, hir.meta.m_xid_l2, dis) \ + M(NO, , regs.dprsrreg.inp.ipp.egr.m_drop_ctl, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.egr.m_egress_unicast_port, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.egr.m_mirr_sel, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_copy_to_cpu, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_drop_ctl, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_egress_unicast_port, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_learn_sel, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_mgid1, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_mgid2, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_mirr_sel, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_pgen, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_pgen_addr, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_pgen_len, disable_) \ + M(NO, , regs.dprsrreg.inp.ipp.ingr.m_resub_sel, disable_) + +// Compiler workaround for TOF2LAB-44, skip certain chunk indices +void tof2lab44_workaround(int lineno, unsigned &chunk_index) { + if (options.tof2lab44_workaround) { + static std::set skipped_chunks = {24, 32, 40, 48, 56, 64, 72, + 80, 88, 96, 104, 112, 120}; + while (skipped_chunks.count(chunk_index)) chunk_index++; + } +} + +// INVARIANT: check_chunk is idempotent. +bool check_chunk(int lineno, unsigned &chunk) { + tof2lab44_workaround(lineno, chunk); + + const unsigned TOTAL_CHUNKS = Target::JBay::DEPARSER_TOTAL_CHUNKS; + static bool suppress_repeated = false; + if (chunk >= TOTAL_CHUNKS) { + if (!suppress_repeated) + error(lineno, "Ran out of chunks in field dictionary (%d)", TOTAL_CHUNKS); + suppress_repeated = true; + return false; + } + return true; +} + +/// A callback to write a PHV, constant, or checksum chunk to the field dictionary. +using WriteChunk = std::function; + +/// A callback to finish writing a PHV, constant, or checksum chunk to the field dictionary. +using FinishChunk = + std::function; + +/// A callback for writing a CLOT to the field dictionary. This increments the chunk index if the +/// CLOT spans multiple chunks. +using WriteClot = std::function; + +/// Implements common control functionality for outputting field dictionaries and field dictionary +/// slices. +template +void output_jbay_field_dictionary_helper(int lineno, POV &pov, DICT &dict, WriteChunk write_chunk, + FinishChunk finish_chunk, WriteClot write_clot) { + const unsigned CHUNK_SIZE = Target::JBay::DEPARSER_CHUNK_SIZE; + const unsigned CHUNK_GROUPS = Target::JBay::DEPARSER_CHUNK_GROUPS; + const unsigned CHUNKS_PER_GROUP = Target::JBay::DEPARSER_CHUNKS_PER_GROUP; + const unsigned CLOTS_PER_GROUP = Target::JBay::DEPARSER_CLOTS_PER_GROUP; + unsigned ch = 0, entry_n = 0, byte = 0, group = 0, clots_in_group = 0; + Phv::Slice prev_pov; + int prev = -1; + + // INVARIANT: check_chunk should be called immediately before doing anything with a chunk. + // Because check_chunk is idempotent, it is fine to call it on a chunk that has previously been + // checked. + + for (auto &ent : dict) { + auto *clot = dynamic_cast(ent.what.get()); + // FIXME -- why does the following give an error from gcc? + // auto *clot = ent.what->to(); + unsigned size = ent.what->size(); + + // Finish the current chunk if needed. + if (byte && + (clot || byte + size > CHUNK_SIZE || (prev_pov && *ent.pov.front() != prev_pov))) { + finish_chunk(ch++, entry_n++, prev_pov, byte); + byte = 0; + } + if (ch / CHUNKS_PER_GROUP != group) { + // into a new group + group = ch / CHUNKS_PER_GROUP; + clots_in_group = 0; + } + if (clot) { + // Start a new group if needed. Each group has a maximum number of CLOTs that can be + // deparsed, and CLOTs cannot span multiple groups. + bool out_of_clots_in_group = clots_in_group >= CLOTS_PER_GROUP; + auto chunks_in_clot = (size + CHUNK_SIZE - 1) / CHUNK_SIZE; + bool out_of_chunks_in_group = ch % CHUNKS_PER_GROUP + chunks_in_clot > CHUNKS_PER_GROUP; + if (out_of_clots_in_group || out_of_chunks_in_group) { + // go on to the next group + ch = (ch | (CHUNKS_PER_GROUP - 1)) + 1; + group = ch / CHUNKS_PER_GROUP; + clots_in_group = 0; + } + + // Write the CLOT to the next segment in the current group. + if (chunks_in_clot == CHUNKS_PER_GROUP && (ch % CHUNKS_PER_GROUP)) + error(clot->lineno, "--tof2lab44-workaround incompatible with clot >56 bytes"); + int clot_tag = Parser::clot_tag(clot->gress, clot->tag); + int seg_tag = clots_in_group++; + write_clot(ch, entry_n, seg_tag, clot_tag, ent.pov.front(), clot); + + prev = -1; + } else { + // Phv, Constant, or Checksum + write_chunk(ch, prev_pov, prev, ent.lineno, ent.pov.front(), ent.what.get(), byte, + size); + byte += size; + prev = ent.what->encode(); + } + prev_pov = *ent.pov.front(); + } + + if (byte > 0) { + finish_chunk(ch, entry_n, prev_pov, byte); + } +} + +template +void output_jbay_field_dictionary(int lineno, REGS ®s, POV_FMT &pov_layout, POV &pov, + DICT &dict) { + // Initialize pov_layout. + unsigned byte = 0; + for (auto &r : pov) { + for (int bits = 0; bits < r.first->size; bits += 8) { + if (byte > pov_layout.size()) error(lineno, "Ran out of space in POV in deparser"); + pov_layout[byte++] = r.first->deparser_id(); + } + } + while (byte < pov_layout.size()) pov_layout[byte++] = 0xff; + LOG5("jbay field dictionary:"); + + // Declare some callback functions, and then delegate to helper. + auto write_chunk = [](unsigned ch, const Phv::Slice &prev_pov, int prev, int ent_lineno, + const Phv::Ref &ent_pov, Deparser::FDEntry::Base *ent_what, unsigned byte, + unsigned size) { + // Just do an error check here. Defer actual writing to finish_chunk. + LOG5(" chunk " << ch << ": " << *ent_what << " (pov " << ent_pov << ")"); + if (dynamic_cast(ent_what) && prev_pov == *ent_pov && + int(ent_what->encode()) == prev && (size & 6)) + error(ent_lineno, "16 and 32-bit container cannot be repeatedly deparsed"); + }; + + auto finish_chunk = [&](unsigned ch, unsigned entry_n, const Phv::Slice &pov_bit, + unsigned byte) { + if (check_chunk(lineno, ch)) { + regs.chunk_info[ch].chunk_vld = 1; + regs.chunk_info[ch].pov = pov.at(&pov_bit.reg) + pov_bit.lo; + regs.chunk_info[ch].seg_vld = 0; + regs.chunk_info[ch].seg_slice = byte & 7; + regs.chunk_info[ch].seg_sel = byte >> 3; + } + }; + + auto write_clot = [&](unsigned &ch, unsigned &entry_n, int seg_tag, int clot_tag, + const Phv::Ref &pov_bit, Deparser::FDEntry::Clot *clot) { + const unsigned CHUNKS_PER_GROUP = Target::JBay::DEPARSER_CHUNKS_PER_GROUP; + const int group = ch / CHUNKS_PER_GROUP; + if (group < regs.fd_tags.size()) regs.fd_tags[group].segment_tag[seg_tag] = clot_tag; + LOG5(" chunk " << ch << ": " << *clot << " (pov " << pov_bit << ")"); + for (int i = 0; i < clot->length; i += 8, ++ch) { + // CLOTs cannot span multiple groups. + BUG_CHECK(ch / CHUNKS_PER_GROUP == group || error_count > 0, "CLOT spanning groups"); + if (check_chunk(lineno, ch)) { + regs.chunk_info[ch].chunk_vld = 1; + regs.chunk_info[ch].pov = pov.at(&pov_bit->reg) + pov_bit->lo; + regs.chunk_info[ch].seg_vld = 1; + regs.chunk_info[ch].seg_sel = seg_tag; + regs.chunk_info[ch].seg_slice = i / 8U; + } + } + }; + + output_jbay_field_dictionary_helper(lineno, pov, dict, write_chunk, finish_chunk, write_clot); +} + +template +void output_jbay_field_dictionary_slice(int lineno, CHUNKS &chunk, CLOTS &clots, POV &pov, + DICT &dict, json::vector &fd_gress, + json::vector &fd_entries, gress_t gress) { + json::map fd; + json::map fd_entry; + json::vector chunk_bytes; + json::vector fd_entry_chunk_bytes; + + auto write_chunk = [&](unsigned ch, const Phv::Slice &prev_pov, int prev, int ent_lineno, + const Phv::Ref &ent_pov, Deparser::FDEntry::Base *ent_what, + unsigned byte, unsigned size) { + while (size--) { + json::map chunk_byte; + json::map fd_entry_chunk_byte; + json::map fd_entry_chunk; + chunk_byte["Byte"] = byte; + fd_entry_chunk_byte["chunk_number"] = byte; + if (ent_what->encode() < CONSTANTS_PHVID_JBAY_LOW) { + auto *phv = dynamic_cast(ent_what); + auto phv_reg = phv->reg(); + write_field_name_in_json(phv_reg, &ent_pov->reg, ent_pov->lo, chunk_byte, + fd_entry_chunk, 19, gress); + } else { + write_csum_const_in_json(ent_what->encode(), chunk_byte, fd_entry_chunk, gress); + } + fd_entry_chunk_byte["chunk"] = std::move(fd_entry_chunk); + chunk_bytes.push_back(std::move(chunk_byte)); + fd_entry_chunk_bytes.push_back(std::move(fd_entry_chunk_byte)); + if (check_chunk(lineno, ch)) { + chunk[ch].is_phv |= 1 << byte; + chunk[ch].byte_off.phv_offset[byte++] = ent_what->encode(); + } + } + }; + + auto finish_chunk = [&](unsigned ch, unsigned entry_n, const Phv::Slice &pov_bit, + unsigned byte) { + fd["Field Dictionary Number"] = entry_n; + fd["Field Dictionary Chunk"] = ch; + fd_entry["entry"] = entry_n; + // fd_entry["fde_chunk"] = ch; -- requires compiler_interfaces change + Deparser::write_pov_in_json(fd, fd_entry, &pov_bit.reg, pov.at(&pov_bit.reg) + pov_bit.lo, + pov_bit.lo); + if (check_chunk(lineno, ch)) { + chunk[ch].cfg.seg_vld = 0; // no CLOTs yet + chunk[ch].cfg.seg_slice = byte & 7; + chunk[ch].cfg.seg_sel = byte >> 3; + } + + fd["Content"] = std::move(chunk_bytes); + fd_entry["chunks"] = std::move(fd_entry_chunk_bytes); + fd_entries.push_back(std::move(fd_entry)); + fd_gress.push_back(std::move(fd)); + }; + + auto write_clot = [&](unsigned &ch, unsigned &entry_n, int seg_tag, int clot_tag, + const Phv::Ref &pov_bit, Deparser::FDEntry::Clot *clot) { + const unsigned CHUNKS_PER_GROUP = Target::JBay::DEPARSER_CHUNKS_PER_GROUP; + const int group = ch / CHUNKS_PER_GROUP; + if (group < clots.size()) clots[group].segment_tag[seg_tag] = clot_tag; + auto phv_repl = clot->phv_replace.begin(); + auto csum_repl = clot->csum_replace.begin(); + for (int i = 0; i < clot->length; i += 8, ++ch, ++entry_n) { + // CLOTs cannot span multiple groups. + BUG_CHECK(ch / CHUNKS_PER_GROUP == group || error_count > 0, "CLOT spanning groups"); + + fd["Field Dictionary Number"] = entry_n; + fd["Field Dictionary Chunk"] = ch; + fd_entry["entry"] = entry_n; + // fd_entry["fde_chunk"] = ch; -- requires compiler_interfaces change + Deparser::write_pov_in_json(fd, fd_entry, &pov_bit->reg, + pov.at(&pov_bit->reg) + pov_bit->lo, pov_bit->lo); + + if (check_chunk(lineno, ch)) { + chunk[ch].cfg.seg_vld = 1; + chunk[ch].cfg.seg_sel = seg_tag; + chunk[ch].cfg.seg_slice = i / 8U; + } + + for (int j = 0; j < 8 && i + j < clot->length; ++j) { + json::map chunk_byte; + json::map fd_entry_chunk_byte; + json::map fd_entry_chunk; + chunk_byte["Byte"] = j; + fd_entry_chunk_byte["chunk_number"] = j; + if (phv_repl != clot->phv_replace.end() && int(phv_repl->first) <= i + j) { + // This is PHV replaced, PHV is used + chunk[ch].is_phv |= 1 << j; + chunk[ch].byte_off.phv_offset[j] = phv_repl->second->reg.deparser_id(); + auto phv_reg = &phv_repl->second->reg; + write_field_name_in_json(phv_reg, &pov_bit->reg, pov_bit->lo, chunk_byte, + fd_entry_chunk, 19, gress); + if (int(phv_repl->first + phv_repl->second->size() / 8U) <= i + j + 1) + ++phv_repl; + } else if (csum_repl != clot->csum_replace.end() && + int(csum_repl->first) <= i + j) { + if (check_chunk(lineno, ch)) { + chunk[ch].is_phv |= 1 << j; + chunk[ch].byte_off.phv_offset[j] = csum_repl->second.encode(); + } + write_csum_const_in_json(csum_repl->second.encode(), chunk_byte, fd_entry_chunk, + gress); + if (int(csum_repl->first + 2) <= i + j + 1) ++csum_repl; + } else { + if (check_chunk(lineno, ch)) chunk[ch].byte_off.phv_offset[j] = i + j; + chunk_byte["CLOT"] = clot_tag; + chunk_byte["CLOT_OFFSET"] = i + j; + fd_entry_chunk["clot_tag"] = clot_tag; + // fd_entry_chunk["clot_offset"] = i + j; requires compiler_interfaces change + } + fd_entry_chunk_byte["chunk"] = std::move(fd_entry_chunk); + chunk_bytes.push_back(std::move(chunk_byte)); + fd_entry_chunk_bytes.push_back(std::move(fd_entry_chunk_byte)); + } + fd["Content"] = std::move(chunk_bytes); + fd_entry["chunks"] = std::move(fd_entry_chunk_bytes); + fd_entries.push_back(std::move(fd_entry)); + fd_gress.push_back(std::move(fd)); + } + }; + + output_jbay_field_dictionary_helper(lineno, pov, dict, write_chunk, finish_chunk, write_clot); +} + +static void check_jbay_ownership(bitvec phv_use[2]) { + unsigned mask = 0; + int group = -1; + for (auto i : phv_use[INGRESS]) { + if ((i | mask) == (group | mask)) continue; + switch (Phv::reg(i)->size) { + case 8: + case 16: + mask = 3; + break; + case 32: + mask = 1; + break; + default: + BUG(); + } + group = i & ~mask; + if (phv_use[EGRESS].getrange(group, mask + 1)) { + error(0, "%s..%s used by both ingress and egress deparser", Phv::reg(group)->name, + Phv::reg(group | mask)->name); + } + } +} + +static void setup_jbay_ownership(bitvec phv_use, ubits_base &phv8, ubits_base &phv16, + ubits_base &phv32) { + std::set phv8_grps, phv16_grps, phv32_grps; + + for (auto i : phv_use) { + auto *reg = Phv::reg(i); + switch (reg->size) { + case 8: + phv8_grps.insert(1U << ((reg->deparser_id() - 64) / 4U)); + break; + case 16: + phv16_grps.insert(1U << ((reg->deparser_id() - 128) / 4U)); + break; + case 32: + phv32_grps.insert(1U << (reg->deparser_id() / 2U)); + break; + default: + BUG(); + } + } + + for (auto v : phv8_grps) phv8 |= v; + for (auto v : phv16_grps) phv16 |= v; + for (auto v : phv32_grps) phv32 |= v; +} + +static short jbay_phv2cksum[224][2] = { + // Entries 0-127 are for 32 bit PHV + // Each 32 bit PHV uses two 16b adders + // The even addresses are for [31:16], the odd addresses are for [15:0] + // Note: The current CSR description of these entries for 32 bit containers is incorrect. + // 128-191 are for 8 bit PHV + // 192-287 are for 16 bit PHV + {1, 0}, {3, 2}, {5, 4}, {7, 6}, {9, 8}, {11, 10}, {13, 12}, {15, 14}, + {17, 16}, {19, 18}, {21, 20}, {23, 22}, {25, 24}, {27, 26}, {29, 28}, {31, 30}, + {33, 32}, {35, 34}, {37, 36}, {39, 38}, {41, 40}, {43, 42}, {45, 44}, {47, 46}, + {49, 48}, {51, 50}, {53, 52}, {55, 54}, {57, 56}, {59, 58}, {61, 60}, {63, 62}, + {65, 64}, {67, 66}, {69, 68}, {71, 70}, {73, 72}, {75, 74}, {77, 76}, {79, 78}, + {81, 80}, {83, 82}, {85, 84}, {87, 86}, {89, 88}, {91, 90}, {93, 92}, {95, 94}, + {97, 96}, {99, 98}, {101, 100}, {103, 102}, {105, 104}, {107, 106}, {109, 108}, {111, 110}, + {113, 112}, {115, 114}, {117, 116}, {119, 118}, {121, 120}, {123, 122}, {125, 124}, {127, 126}, + {128, -1}, {129, -1}, {130, -1}, {131, -1}, {132, -1}, {133, -1}, {134, -1}, {135, -1}, + {136, -1}, {137, -1}, {138, -1}, {139, -1}, {140, -1}, {141, -1}, {142, -1}, {143, -1}, + {144, -1}, {145, -1}, {146, -1}, {147, -1}, {148, -1}, {149, -1}, {150, -1}, {151, -1}, + {152, -1}, {153, -1}, {154, -1}, {155, -1}, {156, -1}, {157, -1}, {158, -1}, {159, -1}, + {160, -1}, {161, -1}, {162, -1}, {163, -1}, {164, -1}, {165, -1}, {166, -1}, {167, -1}, + {168, -1}, {169, -1}, {170, -1}, {171, -1}, {172, -1}, {173, -1}, {174, -1}, {175, -1}, + {176, -1}, {177, -1}, {178, -1}, {179, -1}, {180, -1}, {181, -1}, {182, -1}, {183, -1}, + {184, -1}, {185, -1}, {186, -1}, {187, -1}, {188, -1}, {189, -1}, {190, -1}, {191, -1}, + {192, -1}, {193, -1}, {194, -1}, {195, -1}, {196, -1}, {197, -1}, {198, -1}, {199, -1}, + {200, -1}, {201, -1}, {202, -1}, {203, -1}, {204, -1}, {205, -1}, {206, -1}, {207, -1}, + {208, -1}, {209, -1}, {210, -1}, {211, -1}, {212, -1}, {213, -1}, {214, -1}, {215, -1}, + {216, -1}, {217, -1}, {218, -1}, {219, -1}, {220, -1}, {221, -1}, {222, -1}, {223, -1}, + {224, -1}, {225, -1}, {226, -1}, {227, -1}, {228, -1}, {229, -1}, {230, -1}, {231, -1}, + {232, -1}, {233, -1}, {234, -1}, {235, -1}, {236, -1}, {237, -1}, {238, -1}, {239, -1}, + {240, -1}, {241, -1}, {242, -1}, {243, -1}, {244, -1}, {245, -1}, {246, -1}, {247, -1}, + {248, -1}, {249, -1}, {250, -1}, {251, -1}, {252, -1}, {253, -1}, {254, -1}, {255, -1}, + {256, -1}, {257, -1}, {258, -1}, {259, -1}, {260, -1}, {261, -1}, {262, -1}, {263, -1}, + {264, -1}, {265, -1}, {266, -1}, {267, -1}, {268, -1}, {269, -1}, {270, -1}, {271, -1}, + {272, -1}, {273, -1}, {274, -1}, {275, -1}, {276, -1}, {277, -1}, {278, -1}, {279, -1}, + {280, -1}, {281, -1}, {282, -1}, {283, -1}, {284, -1}, {285, -1}, {286, -1}, {287, -1}, +}; + +template +static void write_jbay_checksum_entry(ENTRIES &entry, unsigned mask, int swap, int pov, int id, + const char *reg = nullptr) { + write_checksum_entry(entry, mask, swap, id, reg); + entry.pov = pov; +} + +// Populates pov_map which maps the bit in the main POV array [127:0] +// to bit in the checksum pov array [32:0] +// The checksum pov array is 32 bits / 4 bytes - pov_cfg.byte_set[4]. +// Each element of the pov_cfg.byte_sel array maps to the byte in the main POV array +template +void jbay_csum_pov_config(Phv::Ref povRef, POV &pov_cfg, + ordered_map &pov, + std::map &pov_map, unsigned *prev_byte, + int csum_unit) { + unsigned bit = pov.at(&povRef->reg) + povRef->lo; + if (pov_map.count(bit)) return; + for (unsigned i = 0; i < (*prev_byte); ++i) { + if (pov_cfg.byte_sel[i] == bit / 8U) { + pov_map[bit] = i * 8U + bit % 8U; + break; + } + } + if (pov_map.count(bit)) return; + if (*prev_byte >= (int)pov_cfg.byte_sel.size()) { + error(povRef.lineno, "Checksum unit %d exceeds %d bytes of POV", csum_unit, + (int)pov_cfg.byte_sel.size()); + return; + } + pov_map[bit] = (*prev_byte) * 8U + bit % 8U; + pov_cfg.byte_sel[(*prev_byte)++] = bit / 8U; + return; +} + +template +void set_jbay_pov_cfg(POV &pov_cfg, std::map &pov_map, + Deparser::FullChecksumUnit &full_csum, + ordered_map &pov, int csum_unit, + unsigned *prev_byte) { + for (auto &unit_entry : full_csum.entries) { + for (auto val : unit_entry.second) { + if (val.pov.size() != 1) { + error(val.val.lineno, "one POV bit required for Tofino2"); + continue; + } + jbay_csum_pov_config(val.pov.front(), pov_cfg, pov, pov_map, prev_byte, csum_unit); + } + } + for (auto &val : full_csum.clot_entries) { + if (val.pov.size() != 1) { + error(val.val.lineno, "one POV bit required for Tofino2"); + continue; + } + jbay_csum_pov_config(val.pov.front(), pov_cfg, pov, pov_map, prev_byte, csum_unit); + } + for (auto &checksum_pov : full_csum.pov) { + jbay_csum_pov_config(checksum_pov.second, pov_cfg, pov, pov_map, prev_byte, csum_unit); + } + return; +} + +template +void write_jbay_full_checksum_config( + CSUM &csum, ENTRIES &phv_entries, int unit, std::set &visited, + std::array, MAX_DEPARSER_CHECKSUM_UNITS> &pov_map, + Deparser::FullChecksumUnit &full_csum, ordered_map &pov) { + for (auto &unit_entry : full_csum.entries) { + // Same partial checksum unit can be used in multiple full checksum unit. + // No need to rewrite the checksum entries multiple times for the same unit + if (visited.count(unit_entry.first)) continue; + visited.insert(unit_entry.first); + for (auto val : unit_entry.second) { + if (val.pov.size() != 1) continue; + int povbit = + pov_map[unit_entry.first].at(pov.at(&val.pov.front()->reg) + val.pov.front()->lo); + int mask = val.mask; + int swap = val.swap; + auto &remap = jbay_phv2cksum[val->reg.deparser_id()]; + write_jbay_checksum_entry(phv_entries[unit_entry.first].entry[remap[0]], mask & 3, + swap & 1, povbit, unit_entry.first, val->reg.name); + if (remap[1] >= 0) + write_jbay_checksum_entry(phv_entries[unit_entry.first].entry[remap[1]], mask >> 2, + swap >> 1, povbit, unit_entry.first, val->reg.name); + else + BUG_CHECK((mask >> 2 == 0) && (swap >> 1 == 0)); + } + } + int tag_idx = 0; + for (auto &val : full_csum.clot_entries) { + if (val.pov.size() != 1) continue; + int povbit = pov_map[unit].at(pov.at(&val.pov.front()->reg) + val.pov.front()->lo); + if (tag_idx == 16) error(-1, "Ran out of clot entries in deparser checksum unit %d", unit); + csum.clot_entry[tag_idx].pov = povbit; + csum.clot_entry[tag_idx].vld = 1; + csum.tags[tag_idx].tag = val.tag; + tag_idx++; + } + for (auto &checksum_pov : full_csum.pov) { + csum.phv_entry[checksum_pov.first].pov = + pov_map[unit].at(pov.at(&checksum_pov.second->reg) + checksum_pov.second->lo); + csum.phv_entry[checksum_pov.first].vld = 1; + } + csum.zeros_as_ones.en = full_csum.zeros_as_ones_en; + + // FIXME -- use/set csum.csum_constant? +} +// Engine 0: scratch[23:0] +// Engine 1: { scratch2[15:0], scratch[31:24] } +// Engine 2: { scratch[7:0] , scratch2[31:16] } +// Engine 3: scratch[31:8] +// So each engine gets a cfg_vector[23:0] +// There are 16 CLOT csums and 8 PHV csums that can be inverted: +// CLOT csum [15:0] are controlled by cfg_vector [15:0] +// PHV csums [7:0] are controlled by cfg_vector [23:16] + +template +void write_jbay_full_checksum_invert_config(SCRATCH1 &scratch1, SCRATCH2 &scratch2, + SCRATCH3 &scratch3, int unit, + Deparser::FullChecksumUnit &full_csum) { + ubits<32> value1; + ubits<32> value2; + ubits<32> value3; + for (auto checksum_unit : full_csum.checksum_unit_invert) { + if (unit == 0) { + value1 |= (1 << (16 + checksum_unit)); + } else if (unit == 1) { + value1 |= (1 << (8 + checksum_unit)); + } else if (unit == 2) { + value3 |= (1 << checksum_unit); + } else if (unit == 3) { + value3 |= (1 << (24 + checksum_unit)); + } + } + for (auto clot_tag : full_csum.clot_tag_invert) { + if (unit == 0) { + value1 |= (1 << clot_tag); + } else if (unit == 1) { + if (clot_tag > 7) { + value1 |= (1 << (clot_tag - 8)); + } else { + value3 |= (1 << (16 + clot_tag)); + } + } else if (unit == 2) { + value2 |= (1 << (16 + clot_tag)); + } else if (unit == 3) { + value3 |= (1 << (8 + clot_tag)); + } + } + if (value1 || value2 || value3) { + scratch1.value |= value1; + scratch2.value |= value2; + scratch3.value |= value3; + } + return; +} + +template +void write_jbay_constant_config(CONS &cons, const std::set &vals) { + unsigned idx = 0; + for (auto v : vals) { + cons[idx] = v; + idx++; + } +} + +template <> +void Deparser::write_config(Target::JBay::deparser_regs ®s) { + regs.dprsrreg.inp.icr.disable(); // disable this whole tree + regs.dprsrreg.inp.icr.disabled_ = false; // then enable just certain subtrees + regs.dprsrreg.inp.icr.csum_engine.enable(); + regs.dprsrreg.inp.icr.egr.enable(); + regs.dprsrreg.inp.icr.egr_meta_pov.enable(); + regs.dprsrreg.inp.icr.ingr.enable(); + regs.dprsrreg.inp.icr.ingr_meta_pov.enable(); + regs.dprsrreg.inp.icr.scratch.enable(); + regs.dprsrreg.inp.icr.scratch2.enable(); + regs.dprsrreg.inp.ipp.scratch.enable(); + regs.dprsrreg.inp.iim.disable(); + regs.dprsrreg.inpslice.disable(); + for (auto &r : regs.dprsrreg.ho_i) r.out_ingr.disable(); + for (auto &r : regs.dprsrreg.ho_e) r.out_egr.disable(); + + for (auto &r : regs.dprsrreg.ho_i) + write_jbay_constant_config(r.hir.h.hdr_xbar_const.value, constants[INGRESS]); + for (auto &r : regs.dprsrreg.ho_e) + write_jbay_constant_config(r.her.h.hdr_xbar_const.value, constants[EGRESS]); + std::set visited_i; + std::array, MAX_DEPARSER_CHECKSUM_UNITS> pov_map_i; + for (int csum_unit = 0; csum_unit < Target::JBay::DEPARSER_CHECKSUM_UNITS; csum_unit++) { + unsigned prev_byte = 0; + if (full_checksum_unit[INGRESS][csum_unit].clot_entries.empty() && + full_checksum_unit[INGRESS][csum_unit].entries.empty()) + continue; + set_jbay_pov_cfg(regs.dprsrreg.inp.ipp.phv_csum_pov_cfg.csum_pov_cfg[csum_unit], + pov_map_i[csum_unit], full_checksum_unit[INGRESS][csum_unit], pov[INGRESS], + csum_unit, &prev_byte); + if (error_count > 0) break; + } + for (int csum_unit = 0; csum_unit < Target::JBay::DEPARSER_CHECKSUM_UNITS && error_count == 0; + csum_unit++) { + if (full_checksum_unit[INGRESS][csum_unit].clot_entries.empty() && + full_checksum_unit[INGRESS][csum_unit].entries.empty()) + continue; + regs.dprsrreg.inp.ipp.phv_csum_pov_cfg.thread.thread[csum_unit] = INGRESS; + write_jbay_full_checksum_config( + regs.dprsrreg.inp.icr.csum_engine[csum_unit], regs.dprsrreg.inp.ipp_m.i_csum.engine, + csum_unit, visited_i, pov_map_i, full_checksum_unit[INGRESS][csum_unit], pov[INGRESS]); + write_jbay_full_checksum_invert_config( + regs.dprsrreg.inp.icr.scratch, regs.dprsrreg.inp.icr.scratch2, + regs.dprsrreg.inp.ipp.scratch, csum_unit, full_checksum_unit[INGRESS][csum_unit]); + } + std::set visited_e; + std::array, MAX_DEPARSER_CHECKSUM_UNITS> pov_map_e; + for (int csum_unit = 0; csum_unit < Target::JBay::DEPARSER_CHECKSUM_UNITS; csum_unit++) { + unsigned prev_byte = 0; + if (full_checksum_unit[EGRESS][csum_unit].clot_entries.empty() && + full_checksum_unit[EGRESS][csum_unit].entries.empty()) + continue; + set_jbay_pov_cfg(regs.dprsrreg.inp.ipp.phv_csum_pov_cfg.csum_pov_cfg[csum_unit], + pov_map_e[csum_unit], full_checksum_unit[EGRESS][csum_unit], pov[EGRESS], + csum_unit, &prev_byte); + if (error_count > 0) break; + } + for (int csum_unit = 0; csum_unit < Target::JBay::DEPARSER_CHECKSUM_UNITS && error_count == 0; + csum_unit++) { + if (full_checksum_unit[EGRESS][csum_unit].clot_entries.empty() && + full_checksum_unit[EGRESS][csum_unit].entries.empty()) + continue; + regs.dprsrreg.inp.ipp.phv_csum_pov_cfg.thread.thread[csum_unit] = EGRESS; + write_jbay_full_checksum_config( + regs.dprsrreg.inp.icr.csum_engine[csum_unit], regs.dprsrreg.inp.ipp_m.i_csum.engine, + csum_unit, visited_e, pov_map_e, full_checksum_unit[EGRESS][csum_unit], pov[EGRESS]); + write_jbay_full_checksum_invert_config( + regs.dprsrreg.inp.icr.scratch, regs.dprsrreg.inp.icr.scratch2, + regs.dprsrreg.inp.ipp.scratch, csum_unit, full_checksum_unit[EGRESS][csum_unit]); + } + + output_jbay_field_dictionary(lineno[INGRESS], regs.dprsrreg.inp.icr.ingr, + regs.dprsrreg.inp.ipp.main_i.pov.phvs, pov[INGRESS], + dictionary[INGRESS]); + json::map field_dictionary_alloc; + json::vector fde_entries_i; + json::vector fde_entries_e; + json::vector fde_entries; + json::vector fd_gress; + for (auto &rslice : regs.dprsrreg.ho_i) { + output_jbay_field_dictionary_slice(lineno[INGRESS], rslice.him.fd_compress.chunk, + rslice.hir.h.compress_clot_sel, pov[INGRESS], + dictionary[INGRESS], fd_gress, fde_entries, INGRESS); + field_dictionary_alloc["ingress"] = std::move(fd_gress); + fde_entries_i = std::move(fde_entries); + } + output_jbay_field_dictionary(lineno[EGRESS], regs.dprsrreg.inp.icr.egr, + regs.dprsrreg.inp.ipp.main_e.pov.phvs, pov[EGRESS], + dictionary[EGRESS]); + for (auto &rslice : regs.dprsrreg.ho_e) { + output_jbay_field_dictionary_slice(lineno[EGRESS], rslice.hem.fd_compress.chunk, + rslice.her.h.compress_clot_sel, pov[EGRESS], + dictionary[EGRESS], fd_gress, fde_entries, EGRESS); + field_dictionary_alloc["egress"] = std::move(fd_gress); + fde_entries_e = std::move(fde_entries); + } + if (Log::verbosity() > 0) { + auto json_dump = open_output("logs/field_dictionary.log"); + *json_dump << &field_dictionary_alloc; + } + // Output deparser resources + report_resources_deparser_json(fde_entries_i, fde_entries_e); + + if (Phv::use(INGRESS).intersects(Phv::use(EGRESS))) { + if (!options.match_compiler) { + error(lineno[INGRESS], "Registers used in both ingress and egress in pipeline: %s", + Phv::db_regset(Phv::use(INGRESS) & Phv::use(EGRESS)).c_str()); + } else { + warning(lineno[INGRESS], "Registers used in both ingress and egress in pipeline: %s", + Phv::db_regset(Phv::use(INGRESS) & Phv::use(EGRESS)).c_str()); + } + /* FIXME -- this only (sort-of) works because 'deparser' comes first in the alphabet, + * FIXME -- so is the first section to have its 'output' method run. Its a hack + * FIXME -- anyways to attempt to correct broken asm that should be an error */ + Phv::unsetuse(INGRESS, phv_use[EGRESS]); + Phv::unsetuse(EGRESS, phv_use[INGRESS]); + } + + check_jbay_ownership(phv_use); + regs.dprsrreg.inp.icr.i_phv8_grp.enable(); + regs.dprsrreg.inp.icr.i_phv16_grp.enable(); + regs.dprsrreg.inp.icr.i_phv32_grp.enable(); + // regs.dprsrreg.inp.icr.scratch.enable(); + regs.dprsrreg.inp.icr.i_phv8_grp.val = 0; + regs.dprsrreg.inp.icr.i_phv16_grp.val = 0; + regs.dprsrreg.inp.icr.i_phv32_grp.val = 0; + // regs.dprsrreg.inp.icr.scratch.value = 0; + setup_jbay_ownership(phv_use[INGRESS], regs.dprsrreg.inp.icr.i_phv8_grp.val, + regs.dprsrreg.inp.icr.i_phv16_grp.val, + regs.dprsrreg.inp.icr.i_phv32_grp.val); + regs.dprsrreg.inp.icr.e_phv8_grp.enable(); + regs.dprsrreg.inp.icr.e_phv16_grp.enable(); + regs.dprsrreg.inp.icr.e_phv32_grp.enable(); + setup_jbay_ownership(phv_use[EGRESS], regs.dprsrreg.inp.icr.e_phv8_grp.val, + regs.dprsrreg.inp.icr.e_phv16_grp.val, + regs.dprsrreg.inp.icr.e_phv32_grp.val); + + for (auto &intrin : intrinsics) intrin.type->setregs(regs, *this, intrin); + + /* resubmit_mode specifies whether this pipe can perform a resubmit operation on + a packet. i.e. tell the IPB to resubmit a packet to the MAU pipeline for a second + time. If the compiler determines that no resubmit is possible, then it can set this + bit, which should lower latency in some circumstances. + 0 = Resubmit is allowed. 1 = Resubmit is not allowed */ + bool resubmit = false; + for (auto &digest : digests) { + if (digest.type->name == "resubmit" || + digest.type->name == "resubmit_preserving_field_list") { + resubmit = true; + break; + } + } + if (resubmit) + regs.dprsrreg.inp.ipp.ingr.resubmit_mode.mode = 0; + else + regs.dprsrreg.inp.ipp.ingr.resubmit_mode.mode = 1; + + for (auto &digest : digests) digest.type->setregs(regs, *this, digest); + + /* Set learning digest mask for JBay */ + for (auto &digest : digests) { + if (digest.type->name == "learning") { + regs.dprsrreg.inp.icr.lrnmask.enable(); + for (auto &set : digest.layout) { + int id = set.first; + int len = regs.dprsrreg.inp.ipp.ingr.learn_tbl[id].len; + if (len == 0) continue; // Allow empty param list + + // Fix for TF2LAB-37s: + // This fixes a hardware limitation where the container following + // the last PHV used cannot be the same non 8 bit container as the last entry. + // E.g. For len = 5, (active entries start at index 47) + // Used - PHV[47] ... PHV[43] = 0; + // Unused - PHV[42] ... PHV[0] = 0; // Defaults to 0 + // This causes issues in hardware as container 0 is used. + // We fix by setting the default as 64 an 8 - bit container. It can be any + // other 8 bit container value. + // The hardware does not cause any issues for 8 bit conatiners. + for (int i = 47 - len; i >= 0; i--) + regs.dprsrreg.inp.ipp.ingr.learn_tbl[id].phvs[i] = 64; + // Fix for TF2LAB-37 end + + // Create a bitvec of all phv masks stacked up next to each + // other in big-endian. 'setregs' above stacks the digest fields + // in a similar manner to setup the phvs per byte on learn_tbl + // regs. To illustrate with an example - tna_digest.p4 (since + // this is not clear based on reg descriptions); + // + // BFA Output: + // + // learning: + // select: { B1(0..2): B0(1) } # L[0..2]b: + // ingress::ig_intr_md_for_dprsr.digest_type 0: + // - B1(0..2) # L[0..2]b: ingress::ig_intr_md_for_dprsr.digest_type + // - MW0 # ingress::hdr.ethernet.dst_addr.16-47 + // - MH1 # ingress::hdr.ethernet.dst_addr.0-15 + // - MH0(0..8) # L[0..8]b: ingress::ig_md.port + // - MW1 # ingress::hdr.ethernet.src_addr.16-47 + // - MH2 # ingress::hdr.ethernet.src_addr.0-15 + // + // PHV packing for digest, + // + // B1(7..0) | MW0 (31..24) | MW0(23..16) | MW0(15..8) | + // MW0(7..0) | MH1 (15..8) | MH1(7..0) | MH0(16..8) | + // MH0(7..0) | MW1 (31..24) | MW1(23..16) | MW1(15..8) | + // MW1(7..0) | MH2 (15..8) | MH2(7..0) | ---------- | + // + // Learn Mask Regs for above digest + // deparser.regs.dprsrreg.inp.icr.lrnmask[0].mask[11] = 4294967047 (0x07ffffff) + // deparser.regs.dprsrreg.inp.icr.lrnmask[0].mask[10] = 4294967295 (0xffffff01) + // deparser.regs.dprsrreg.inp.icr.lrnmask[0].mask[9] = 4278321151 (0xffffffff) + // deparser.regs.dprsrreg.inp.icr.lrnmask[0].mask[8] = 4294967040 (0xffffff00) + + bitvec lrnmask; + int startBit = 0; + int size = 0; + for (auto p : set.second) { + if (size > 0) lrnmask <<= p->reg.size; + auto psliceSize = p.size(); + startBit = p.lobit(); + lrnmask.setrange(startBit, psliceSize); + size += p->reg.size; + } + // Pad to a 32 bit word + auto shift = (size % 32) ? (32 - (size % 32)) : 0; + lrnmask <<= shift; + int num_words = (size + 31) / 32; + int quanta_index = 11; + for (int index = num_words - 1; index >= 0; index--) { + BUG_CHECK(quanta_index >= 0); + unsigned word = lrnmask.getrange(index * 32, 32); + regs.dprsrreg.inp.icr.lrnmask[id].mask[quanta_index--] = word; + } + } + } + } + +#define DISBALE_IF_NOT_SET(ISARRAY, ARRAY, REGS, DISABLE) \ + ISARRAY(for (auto &r : ARRAY)) if (!ISARRAY(r.) REGS.modified()) ISARRAY(r.) REGS.DISABLE = 1; + JBAY_DISABLE_REGBITS(DISBALE_IF_NOT_SET) + + if (options.condense_json) regs.disable_if_reset_value(); + if (error_count == 0 && options.gen_json) + regs.emit_json(*open_output("regs.deparser.cfg.json")); + TopLevel::regs()->reg_pipe.pardereg.dprsrreg.set("regs.deparser", ®s); +} + +#if 0 +namespace { +static struct JbayChecksumReg : public Phv::Register { + JbayChecksumReg(int unit) : Phv::Register("", Phv::Register::CHECKSUM, unit, + unit+CONSTANTS_PHVID_JBAY_HIGH, 16) { + snprintf(name, "csum%d", unit); } + int deparser_id() const override { return uid; } +} jbay_checksum_units[8] = { {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7} }; +} + +template<> Phv::Slice Deparser::RefOrChksum::lookup() const { + if (lo != hi || lo < 0 || lo >= Target::JBay::DEPARSER_CHECKSUM_UNITS) { + error(lineno, "Invalid checksum unit number"); + return Phv::Slice(); } + return Phv::Slice(tofino_checksum_units[lo], 0, 15); +} +#endif + +template <> +unsigned Deparser::FDEntry::Checksum::encode() { + return CONSTANTS_PHVID_JBAY_HIGH + unit; +} + +template <> +unsigned Deparser::FDEntry::Constant::encode() { + return CONSTANTS_PHVID_JBAY_LOW + Deparser::constant_idx(gress, val); +} + +template <> +void Deparser::gen_learn_quanta(Target::JBay::parser_regs ®s, json::vector &learn_quanta) {} + +template <> +void Deparser::process(Target::JBay *) { + // Chip-specific code for process method + // None for JBay +} diff --git a/backends/tofino/bf-asm/jbay/gateway.cpp b/backends/tofino/bf-asm/jbay/gateway.cpp new file mode 100644 index 00000000000..9f7da083748 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/gateway.cpp @@ -0,0 +1,99 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/jbay/gateway.h" + +#include "backends/tofino/bf-asm/stage.h" + +void Target::Tofino::GatewayTable::write_next_table_regs(Target::JBay::mau_regs ®s) { + auto &merge = regs.rams.match.merge; + if (need_next_map_lut) merge.next_table_map_en_gateway |= 1U << logical_id; + int idx = 3; + for (auto &line : table) { + BUG_CHECK(idx >= 0); + if (!line.run_table) { + if (need_next_map_lut) + merge.gateway_next_table_lut[logical_id][idx] = line.next_map_lut; + else + merge.gateway_next_table_lut[logical_id][idx] = line.next.next_table_id(); + } + --idx; + } + if (!miss.run_table) { + if (need_next_map_lut) + merge.gateway_next_table_lut[logical_id][4] = miss.next_map_lut; + else + merge.gateway_next_table_lut[logical_id][4] = miss.next.next_table_id(); + } + if (!match_table && need_next_map_lut) { + // Factor with common code in jbay/match_table.cpp write_next_table_regs + merge.next_table_map_en |= 1U << logical_id; + int i = 0; + for (auto &n : extra_next_lut) { + merge.pred_map_loca[logical_id][i].pred_map_loca_next_table = n.next_table_id(); + merge.pred_map_loca[logical_id][i].pred_map_loca_exec = + n.next_in_stage(stage->stageno) >> 1; + merge.pred_map_glob[logical_id][i].pred_map_glob_exec = + n.next_in_stage(stage->stageno + 1); + merge.pred_map_glob[logical_id][i].pred_map_long_brch |= n.long_branch_tags(); + ++i; + } + // is this needed? The model complains if we leave the unused slots as 0 + while (i < Target::NEXT_TABLE_SUCCESSOR_TABLE_DEPTH()) + merge.pred_map_loca[logical_id][i++].pred_map_loca_next_table = 0x1ff; + } +} + +template <> +void GatewayTable::standalone_write_regs(Target::JBay::mau_regs ®s) { + // FIXME -- factor this with JBay MatchTable::write_regs + auto &merge = regs.rams.match.merge; + if (gress == GHOST) merge.pred_ghost_thread |= 1 << logical_id; + merge.pred_glob_exec_thread[gress] |= 1 << logical_id; + if (always_run || pred.empty()) merge.pred_always_run[gress] |= 1 << logical_id; + + if (long_branch_input >= 0) + setup_muxctl(merge.pred_long_brch_lt_src[logical_id], long_branch_input); + + bool is_branch = (miss.next.next_table() != nullptr); + if (!is_branch && !need_next_map_lut) { + for (auto &line : table) { + if (line.next.next_table() != nullptr) { + is_branch = true; + break; + } + } + } + if (!is_branch) + for (auto &n : hit_next) + if (n.next_table() != nullptr) { + is_branch = true; + break; + } + if (!is_branch) + for (auto &n : extra_next_lut) + if (n.next_table() != nullptr) { + is_branch = true; + break; + } + if (is_branch) merge.pred_is_a_brch |= 1 << logical_id; + + merge.mpr_glob_exec_thread |= merge.logical_table_thread[0].logical_table_thread_egress & + ~merge.logical_table_thread[0].logical_table_thread_ingress & + ~merge.pred_ghost_thread; +} +template void GatewayTable::standalone_write_regs(Target::JBay::mau_regs ®s); diff --git a/backends/tofino/bf-asm/jbay/gateway.h b/backends/tofino/bf-asm/jbay/gateway.h new file mode 100644 index 00000000000..3700f07f9dd --- /dev/null +++ b/backends/tofino/bf-asm/jbay/gateway.h @@ -0,0 +1,27 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_JBAY_GATEWAY_H_ +#define BACKENDS_TOFINO_BF_ASM_JBAY_GATEWAY_H_ + +#include "backends/tofino/bf-asm/tables.h" +#include "backends/tofino/bf-asm/tofino/gateway.h" + +template <> +void GatewayTable::standalone_write_regs(Target::JBay::mau_regs ®s); + +#endif /* BACKENDS_TOFINO_BF_ASM_JBAY_GATEWAY_H_ */ diff --git a/backends/tofino/bf-asm/jbay/input_xbar.cpp b/backends/tofino/bf-asm/jbay/input_xbar.cpp new file mode 100644 index 00000000000..e0e7e7e61b7 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/input_xbar.cpp @@ -0,0 +1,70 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/jbay/input_xbar.h" + +template <> +void InputXbar::write_galois_matrix(Target::JBay::mau_regs ®s, HashTable id, + const std::map &mat) { + int parity_col = -1; + BUG_CHECK(id.type == HashTable::EXACT, "not an exact hash table %d", id.type); + if (hash_table_parity.count(id) && !options.disable_gfm_parity) { + parity_col = hash_table_parity[id]; + } + auto &hash = regs.dp.xbar_hash.hash; + std::set gfm_rows; + for (auto &col : mat) { + int c = col.first; + // Skip parity column encoding, if parity is set overall parity is + // computed later below + if (c == parity_col) continue; + const HashCol &h = col.second; + for (int word = 0; word < 4; word++) { + unsigned data = h.data.getrange(word * 16, 16); + if (data == 0) continue; + auto &w = hash.galois_field_matrix[id.index * 4 + word][c]; + w.byte0 = data & 0xff; + w.byte1 = (data >> 8) & 0xff; + gfm_rows.insert(id.index * 4 + word); + } + } + // A GFM row can be shared by multiple tables. In most cases the columns are + // non overlapping but if they are overlapping the GFM encodings must be the + // same (e.g. ATCAM tables). The input xbar has checks to determine which + // cases are valid. + // The parity must be computed for all columns within the row and set into + // the parity column. + if (parity_col >= 0) { + for (auto r : gfm_rows) { + int hp_byte0 = 0, hp_byte1 = 0; + for (auto c = 0; c < 52; c++) { + if (c == parity_col) continue; + auto &w = hash.galois_field_matrix[r][c]; + hp_byte0 ^= w.byte0; + hp_byte1 ^= w.byte1; + } + auto &w_hp = hash.galois_field_matrix[r][parity_col]; + w_hp.byte0.rewrite(); + w_hp.byte1.rewrite(); + w_hp.byte0 = hp_byte0; + w_hp.byte1 = hp_byte1; + } + } +} + +template void InputXbar::write_galois_matrix(Target::JBay::mau_regs ®s, HashTable id, + const std::map &mat); diff --git a/backends/tofino/bf-asm/jbay/input_xbar.h b/backends/tofino/bf-asm/jbay/input_xbar.h new file mode 100644 index 00000000000..45b11f3c581 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/input_xbar.h @@ -0,0 +1,27 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_JBAY_INPUT_XBAR_H_ +#define BACKENDS_TOFINO_BF_ASM_JBAY_INPUT_XBAR_H_ + +#include "backends/tofino/bf-asm/input_xbar.h" + +template <> +void InputXbar::write_galois_matrix(Target::JBay::mau_regs ®s, HashTable id, + const std::map &mat); + +#endif /* BACKENDS_TOFINO_BF_ASM_JBAY_INPUT_XBAR_H_ */ diff --git a/backends/tofino/bf-asm/jbay/instruction.cpp b/backends/tofino/bf-asm/jbay/instruction.cpp new file mode 100644 index 00000000000..1eee6043d2f --- /dev/null +++ b/backends/tofino/bf-asm/jbay/instruction.cpp @@ -0,0 +1,200 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* JBay overloads for instructions #included in instruction.cpp + * WARNING -- this is included in an anonymous namespace, as VLIWInstruction is + * in that anonymous namespace */ + +template +void VLIWInstruction::write_regs_2(REGS ®s, Table *tbl, Table::Actions::Action *act) { + if (act != tbl->stage->imem_addr_use[imem_thread(tbl->gress)][act->addr]) { + LOG3("skipping " << tbl->name() << '.' << act->name << " as its imem is used by " + << tbl->stage->imem_addr_use[imem_thread(tbl->gress)][act->addr]->name); + return; + } + LOG2(this); + auto &imem = regs.dp.imem; + int iaddr = act->addr / ACTION_IMEM_COLORS; + int color = act->addr % ACTION_IMEM_COLORS; + unsigned bits = encode(); + BUG_CHECK(slot >= 0); + unsigned off = slot % Phv::mau_groupsize(); + unsigned side = 0, group = 0; + switch (slot / Phv::mau_groupsize()) { + case 0: + side = 0; + group = 0; + break; + case 1: + side = 0; + group = 1; + break; + case 2: + side = 1; + group = 0; + break; + case 3: + side = 1; + group = 1; + break; + case 4: + side = 0; + group = 0; + break; + case 5: + side = 0; + group = 1; + break; + case 6: + side = 1; + group = 0; + break; + case 7: + side = 1; + group = 1; + break; + case 8: + side = 0; + group = 0; + break; + case 9: + side = 0; + group = 1; + break; + case 10: + side = 0; + group = 2; + break; + case 11: + side = 1; + group = 0; + break; + case 12: + side = 1; + group = 1; + break; + case 13: + side = 1; + group = 2; + break; + default: + BUG(); + } + + switch (Phv::reg(slot)->type) { + case Phv::Register::NORMAL: + switch (Phv::reg(slot)->size) { + case 8: + BUG_CHECK(group == 0 || group == 1); + imem.imem_subword8[side][group][off][iaddr].imem_subword8_instr = bits; + imem.imem_subword8[side][group][off][iaddr].imem_subword8_color = color; + imem.imem_subword8[side][group][off][iaddr].imem_subword8_parity = + parity(bits) ^ color; + break; + case 16: + imem.imem_subword16[side][group][off][iaddr].imem_subword16_instr = bits; + imem.imem_subword16[side][group][off][iaddr].imem_subword16_color = color; + imem.imem_subword16[side][group][off][iaddr].imem_subword16_parity = + parity(bits) ^ color; + break; + case 32: + BUG_CHECK(group == 0 || group == 1); + imem.imem_subword32[side][group][off][iaddr].imem_subword32_instr = bits; + imem.imem_subword32[side][group][off][iaddr].imem_subword32_color = color; + imem.imem_subword32[side][group][off][iaddr].imem_subword32_parity = + parity(bits) ^ color; + break; + default: + BUG(); + } + break; + case Phv::Register::MOCHA: + switch (Phv::reg(slot)->size) { + case 8: + BUG_CHECK(group == 0 || group == 1); + imem.imem_mocha_subword8[side][group][off - 12][iaddr] + .imem_mocha_subword_instr = bits; + imem.imem_mocha_subword8[side][group][off - 12][iaddr] + .imem_mocha_subword_color = color; + imem.imem_mocha_subword8[side][group][off - 12][iaddr] + .imem_mocha_subword_parity = parity(bits) ^ color; + break; + case 16: + imem.imem_mocha_subword16[side][group][off - 12][iaddr] + .imem_mocha_subword_instr = bits; + imem.imem_mocha_subword16[side][group][off - 12][iaddr] + .imem_mocha_subword_color = color; + imem.imem_mocha_subword16[side][group][off - 12][iaddr] + .imem_mocha_subword_parity = parity(bits) ^ color; + break; + case 32: + BUG_CHECK(group == 0 || group == 1); + imem.imem_mocha_subword32[side][group][off - 12][iaddr] + .imem_mocha_subword_instr = bits; + imem.imem_mocha_subword32[side][group][off - 12][iaddr] + .imem_mocha_subword_color = color; + imem.imem_mocha_subword32[side][group][off - 12][iaddr] + .imem_mocha_subword_parity = parity(bits) ^ color; + break; + default: + BUG(); + } + break; + case Phv::Register::DARK: + switch (Phv::reg(slot)->size) { + case 8: + BUG_CHECK(group == 0 || group == 1); + imem.imem_dark_subword8[side][group][off - 16][iaddr].imem_dark_subword_instr = + bits; + imem.imem_dark_subword8[side][group][off - 16][iaddr].imem_dark_subword_color = + color; + imem.imem_dark_subword8[side][group][off - 16][iaddr].imem_dark_subword_parity = + parity(bits) ^ color; + break; + case 16: + imem.imem_dark_subword16[side][group][off - 16][iaddr].imem_dark_subword_instr = + bits; + imem.imem_dark_subword16[side][group][off - 16][iaddr].imem_dark_subword_color = + color; + imem.imem_dark_subword16[side][group][off - 16][iaddr] + .imem_dark_subword_parity = parity(bits) ^ color; + break; + case 32: + BUG_CHECK(group == 0 || group == 1); + imem.imem_dark_subword32[side][group][off - 16][iaddr].imem_dark_subword_instr = + bits; + imem.imem_dark_subword32[side][group][off - 16][iaddr].imem_dark_subword_color = + color; + imem.imem_dark_subword32[side][group][off - 16][iaddr] + .imem_dark_subword_parity = parity(bits) ^ color; + break; + default: + BUG(); + } + break; + default: + BUG(); + } + + auto &power_ctl = regs.dp.actionmux_din_power_ctl; + phvRead([&](const Phv::Slice &sl) { set_power_ctl_reg(power_ctl, sl.reg.mau_id()); }); +} + +void VLIWInstruction::write_regs(Target::JBay::mau_regs ®s, Table *tbl, + Table::Actions::Action *act) { + write_regs_2(regs, tbl, act); +} diff --git a/backends/tofino/bf-asm/jbay/match_table.cpp b/backends/tofino/bf-asm/jbay/match_table.cpp new file mode 100644 index 00000000000..cd177a86af8 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/match_table.cpp @@ -0,0 +1,130 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* mau table template specializations for jbay -- #included directly in match_tables.cpp */ + +/** Write next table setup, which is JBay-specific. `tbl` here is the ternary indirect + * table if there is one, or the match table otherwise */ +template <> +void MatchTable::write_next_table_regs(Target::JBay::mau_regs ®s, Table *tbl) { + auto &merge = regs.rams.match.merge; + if (!hit_next.empty() || !extra_next_lut.empty()) { + merge.next_table_map_en |= (1U << logical_id); + int i = 0; + for (auto &n : hit_next) { + merge.pred_map_loca[logical_id][i].pred_map_loca_next_table = n.next_table_id(); + merge.pred_map_loca[logical_id][i].pred_map_loca_exec = + n.next_in_stage(stage->stageno) >> 1; + merge.pred_map_glob[logical_id][i].pred_map_glob_exec = + n.next_in_stage(stage->stageno + 1); + merge.pred_map_glob[logical_id][i].pred_map_long_brch |= n.long_branch_tags(); + ++i; + } + for (auto &n : extra_next_lut) { + merge.pred_map_loca[logical_id][i].pred_map_loca_next_table = n.next_table_id(); + merge.pred_map_loca[logical_id][i].pred_map_loca_exec = + n.next_in_stage(stage->stageno) >> 1; + merge.pred_map_glob[logical_id][i].pred_map_glob_exec = + n.next_in_stage(stage->stageno + 1); + merge.pred_map_glob[logical_id][i].pred_map_long_brch |= n.long_branch_tags(); + ++i; + } + // is this needed? The model complains if we leave the unused slots as 0 + while (i < Target::NEXT_TABLE_SUCCESSOR_TABLE_DEPTH()) + merge.pred_map_loca[logical_id][i++].pred_map_loca_next_table = 0x1ff; + } + + merge.next_table_format_data[logical_id].match_next_table_adr_mask = next_table_adr_mask; + merge.next_table_format_data[logical_id].match_next_table_adr_miss_value = + miss_next.next_table_id(); + merge.pred_miss_exec[logical_id].pred_miss_loca_exec = + miss_next.next_in_stage(stage->stageno) >> 1; + merge.pred_miss_exec[logical_id].pred_miss_glob_exec = + miss_next.next_in_stage(stage->stageno + 1); + merge.pred_miss_long_brch[logical_id] = miss_next.long_branch_tags(); +} + +template <> +void MatchTable::write_regs(Target::JBay::mau_regs ®s, int type, Table *result) { + write_common_regs(regs, type, result); + // FIXME -- factor this with JBay GatewayTable::standalone_write_regs + auto &merge = regs.rams.match.merge; + if (gress == GHOST) merge.pred_ghost_thread |= 1 << logical_id; + merge.pred_glob_exec_thread[gress] |= 1 << logical_id; + if (always_run || pred.empty()) merge.pred_always_run[gress] |= 1 << logical_id; + + if (long_branch_input >= 0) + setup_muxctl(merge.pred_long_brch_lt_src[logical_id], long_branch_input); + + if (result == nullptr) result = this; + + bool is_branch = (miss_next.next_table() != nullptr); + if (!is_branch && gateway && gateway->is_branch()) is_branch = true; + if (!is_branch) + for (auto &n : hit_next) + if (n.next_table() != nullptr) { + is_branch = true; + break; + } + if (!is_branch) + for (auto &n : extra_next_lut) + if (n.next_table() != nullptr) { + is_branch = true; + break; + } + + if (!is_branch && result->get_format_field_size("next") > 3) is_branch = true; + + // Check if any table actions have a next table miss set up + // if yes, the pred_is_a_brch register must be set on the table to override the next table + // configuration with this value. + // + // E.g. + // switch (mc_filter.apply().action_run) { + // NoAction : { // Has @defaultonly + // ttl_thr_check.apply(); + // } + // } + // + // Generated bfa + // ... + // hit: [ END ] + // miss: END + // ... + // NoAction(-1, 1): + // - hit_allowed: { allowed: false, reason: user_indicated_default_only } + // - default_only_action: { allowed: true, is_constant: true } + // - handle: 0x20000015 + // - next_table_miss: ttl_thr_check_0 + // + // If merge.pred_is_a_brch is not set in this usecase, the default miss configuration of 'END' + // or 'End of Pipe' is executed and next table ttl_thr_check_0 will not be executed. + if (!is_branch) { + for (auto &act : *result->actions) { + if (act.next_table_miss_ref.next_table()) { + is_branch = true; + break; + } + } + } + + if (is_branch) merge.pred_is_a_brch |= 1 << logical_id; + + merge.mpr_glob_exec_thread |= merge.logical_table_thread[0].logical_table_thread_egress & + ~merge.logical_table_thread[0].logical_table_thread_ingress & + ~merge.pred_ghost_thread; +} diff --git a/backends/tofino/bf-asm/jbay/meter.h b/backends/tofino/bf-asm/jbay/meter.h new file mode 100644 index 00000000000..f6c8e6ef449 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/meter.h @@ -0,0 +1,140 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_JBAY_METER_H_ +#define BACKENDS_TOFINO_BF_ASM_JBAY_METER_H_ + +template +void MeterTable::setup_teop_regs_2(REGS ®s, int meter_group_index) { + BUG_CHECK(teop >= 0 && teop < 4); + BUG_CHECK(gress == EGRESS); + + auto &adrdist = regs.rams.match.adrdist; + if (!teop_initialized) { + // assume this stage driving teop + auto delay = stage->pipelength(gress) - stage->pred_cycle(gress) - 7; + adrdist.teop_bus_ctl[teop].teop_bus_ctl_delay = delay; + adrdist.teop_bus_ctl[teop].teop_bus_ctl_delay_en = 1; + adrdist.teop_bus_ctl[teop].teop_bus_ctl_meter_en = 1; + + adrdist.meter_to_teop_adr_oxbar_ctl[teop].enabled_2bit_muxctl_select = meter_group_index; + adrdist.meter_to_teop_adr_oxbar_ctl[teop].enabled_2bit_muxctl_enable = 1; + teop_initialized = true; + } + + adrdist.teop_to_meter_adr_oxbar_ctl[meter_group_index].enabled_2bit_muxctl_select = teop; + adrdist.teop_to_meter_adr_oxbar_ctl[meter_group_index].enabled_2bit_muxctl_enable = 1; + + // count all tEOP events + adrdist.dp_teop_meter_ctl[meter_group_index].dp_teop_meter_ctl_err = 0; + // Refer to JBAY uArch Section 6.4.4.10.8 + // + // The user of the incoming tEOP address needs to consider the original + // driver. For instance, a meter address driver will be aliged with the LSB + // of the 18b incoming address, whereas a single-entry stats driver will be + // already padded with 2 zeros. + // + // For example, dp_teop_meter_ctl.dp_teop_meter_ctl_rx_shift must be + // programmed to 2 to compensate for the single-entry stats address driver: + // + // Meter (23b) = {4b CMD+Color, ((dp_teop{6b VPN, 10b addr, 2b subword + // zeros} >> 2) + 7b zero pad)} + // + // As per above, the dp_teop_meter_ctl_rx_shift is set based on the original + // driver. For a meter address driving there is no need for any shift, + // however if a stats address is driving then it needs to be shifted by 2. + // Compiler currently does not use this mechanism where a stats address is + // driving the meter, this is scope for optimization in future. + adrdist.dp_teop_meter_ctl[meter_group_index].dp_teop_meter_ctl_rx_shift = 0; + adrdist.dp_teop_meter_ctl[meter_group_index].dp_teop_meter_ctl_rx_en = 1; + + auto &meter = regs.rams.map_alu.meter_group[meter_group_index].meter; + meter.meter_ctl_teop_en = 1; +} + +template +void MeterTable::write_alu_vpn_range_2(REGS ®s) { + auto &adrdist = regs.rams.match.adrdist; + int minvpn, sparevpn; + + // Used to validate the BFA VPN configuration + std::set vpn_processed; + bitvec vpn_range; + + // Get Spare VPN + layout_vpn_bounds(minvpn, sparevpn, false); + + for (int home_row : home_rows) { + bool block_start = false; + bool block_end = false; + int min = 1000000; + int max = -1; + for (Layout &logical_row : layout) { + // Block Start with the home row and End with the Spare VPN + if (logical_row.row == home_row) block_start = true; + + if (block_start) { + for (auto v : logical_row.vpns) { + if (v == sparevpn) { + block_end = true; + break; + } + if (vpn_processed.count(v)) + error(home_lineno, "Multiple instance of the VPN %d detected", v); + else + vpn_processed.insert(v); + + if (v < min) min = v; + if (v > max) max = v; + } + } + if (block_end) { + BUG_CHECK(min != 1000000 && max != -1); + + bitvec block_range(min, max - min + 1); + if (vpn_range.intersects(block_range)) + error(home_lineno, "Overlapping of VPN range detected"); + else + vpn_range |= block_range; + + adrdist.mau_meter_alu_vpn_range[home_row / 4].meter_vpn_base = min; + adrdist.mau_meter_alu_vpn_range[home_row / 4].meter_vpn_limit = max; + adrdist.mau_meter_alu_vpn_range[home_row / 4].meter_vpn_range_check_enable = 1; + for (MatchTable *m : match_tables) + adrdist.meter_alu_adr_range_check_icxbar_map[home_row / 4] |= 1U + << m->logical_id; + break; + } + } + BUG_CHECK(block_start && block_end); + } + + if (vpn_range != bitvec(minvpn, sparevpn - minvpn)) + error(home_lineno, "VPN range not entirely covered"); +} + +template <> +void MeterTable::setup_teop_regs(Target::JBay::mau_regs ®s, int meter_group_index) { + setup_teop_regs_2(regs, meter_group_index); +} + +template <> +void MeterTable::write_alu_vpn_range(Target::JBay::mau_regs ®s) { + write_alu_vpn_range_2(regs); +} + +#endif /* BACKENDS_TOFINO_BF_ASM_JBAY_METER_H_ */ diff --git a/backends/tofino/bf-asm/jbay/parser.cpp b/backends/tofino/bf-asm/jbay/parser.cpp new file mode 100644 index 00000000000..fb5b53caa17 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/parser.cpp @@ -0,0 +1,583 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/parser-tofino-jbay.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/top_level.h" + +template <> +void Parser::Checksum::write_config(Target::JBay::parser_regs ®s, Parser *parser) { + if (unit == 0) + write_row_config(regs.memory[gress].po_csum_ctrl_0_row[addr]); + else if (unit == 1) + write_row_config(regs.memory[gress].po_csum_ctrl_1_row[addr]); + else if (unit == 2) + write_row_config(regs.memory[gress].po_csum_ctrl_2_row[addr]); + else if (unit == 3) + write_row_config(regs.memory[gress].po_csum_ctrl_3_row[addr]); + else if (unit == 4) + write_row_config(regs.memory[gress].po_csum_ctrl_4_row[addr]); + else + error(lineno, "invalid unit for parser checksum"); +} + +template <> +void Parser::Checksum::write_output_config(Target::JBay::parser_regs ®s, Parser *pa, + State::Match * /*ma*/, void *_row, + unsigned &used) const { + if (type != 0 || !dest) return; + + Target::JBay::parser_regs::_memory::_po_action_row *row = + (Target::JBay::parser_regs::_memory::_po_action_row *)_row; + + // checksum verification outputs "steal" extractors, see parser uArch (6.3.6) + + for (int i = 0; i < 20; ++i) { + if (used & (1 << i)) continue; + used |= 1 << i; + row->phv_dst[i] = dest->reg.parser_id(); + row->extract_type[i] = 3; + return; + } + error(lineno, "Ran out of phv output extractor slots"); +} + +template <> +void Parser::CounterInit::write_config(Target::JBay::parser_regs ®s, gress_t gress, int idx) { + auto &ctr_init_ram = regs.memory[gress].ml_ctr_init_ram[idx]; + ctr_init_ram.add = add; + ctr_init_ram.mask_8 = mask; + ctr_init_ram.rotate = rot; + ctr_init_ram.max = max; + ctr_init_ram.src = src; +} + +template <> +void Parser::State::Match::write_lookup_config(Target::JBay::parser_regs ®s, State *state, + int r) const { + auto &row = regs.memory[state->gress].ml_tcam_row[r]; + match_t lookup = {0, 0}; + unsigned dont_care = 0; + for (int i = 0; i < 4; i++) { + lookup.word0 <<= 8; + lookup.word1 <<= 8; + dont_care <<= 8; + if (state->key.data[i].bit >= 0) { + lookup.word0 |= ((match.word0 >> state->key.data[i].bit) & 0xff); + lookup.word1 |= ((match.word1 >> state->key.data[i].bit) & 0xff); + } else { + dont_care |= 0xff; + } + } + lookup.word0 |= dont_care; + lookup.word1 |= dont_care; + for (int i = 3; i >= 0; i--) { + row.w0_lookup_8[i] = lookup.word0 & 0xff; + row.w1_lookup_8[i] = lookup.word1 & 0xff; + lookup.word0 >>= 8; + lookup.word1 >>= 8; + } + row.w0_curr_state = state->stateno.word0; + row.w1_curr_state = state->stateno.word1; + if (state->key.ctr_zero >= 0) { + row.w0_ctr_zero = (match.word0 >> state->key.ctr_zero) & 1; + row.w1_ctr_zero = (match.word1 >> state->key.ctr_zero) & 1; + } else { + row.w0_ctr_zero = row.w1_ctr_zero = 1; + } + if (state->key.ctr_neg >= 0) { + row.w0_ctr_neg = (match.word0 >> state->key.ctr_neg) & 1; + row.w1_ctr_neg = (match.word1 >> state->key.ctr_neg) & 1; + } else { + row.w0_ctr_neg = row.w1_ctr_neg = 1; + } + row.w0_ver_0 = row.w1_ver_0 = 1; + row.w0_ver_1 = row.w1_ver_1 = 1; +} + +/* FIXME -- combine these next two methods into a single method on MatchKey */ +/* FIXME -- factor Tofino/JBay variation better (most is common) */ +template <> +int Parser::State::write_lookup_config(Target::JBay::parser_regs ®s, Parser *pa, State *state, + int row, const std::vector &prev) { + LOG2("-- checking match from state " << name << " (" << stateno << ')'); + auto &ea_row = regs.memory[gress].ml_ea_row[row]; + int max_off = -1; + for (int i = 0; i < 4; i++) { + if (key.data[i].bit < 0) continue; + bool set = true; + for (State *p : prev) { + if (p->key.data[i].bit >= 0) { + set = false; + if (p->key.data[i].byte != key.data[i].byte) + error(p->lineno, + "Incompatible match fields between states " + "%s and %s, triggered from state %s", + name.c_str(), p->name.c_str(), state->name.c_str()); + } + } + if (set && key.data[i].byte != MatchKey::USE_SAVED) { + int off = key.data[i].byte + ea_row.shift_amt; + // Valid offset ranges: + // 0..31 : Input packet + // 60..63 : Scratch registers + if ((off < 0) || ((off > 31) && (off < 60)) || (off > 63)) { + error(key.lineno, + "Match offset of %d in state %s out of range " + "for previous state %s", + key.data[i].byte, name.c_str(), state->name.c_str()); + } + ea_row.lookup_offset_8[i] = off; + ea_row.ld_lookup_8[i] = 1; + max_off = std::max(max_off, off); + } + } + return max_off; +} + +template <> +int Parser::State::Match::write_load_config(Target::JBay::parser_regs ®s, Parser *pa, + State *state, int row) const { + auto &ea_row = regs.memory[state->gress].ml_ea_row[row]; + int max_off = -1; + for (int i = 0; i < 4; i++) { + if (load.data[i].bit < 0) continue; + if (load.data[i].byte != MatchKey::USE_SAVED) { + int off = load.data[i].byte; + // Valid offset ranges: + // 0..31 : Input packet + // 60..63 : Scratch registers + if ((off < 0) || ((off > 31) && (off < 60)) || (off > 63)) { + error(load.lineno, "Load offset of %d in state %s out of range", load.data[i].byte, + state->name.c_str()); + } + ea_row.lookup_offset_8[i] = off; + ea_row.ld_lookup_8[i] = 1; + max_off = std::max(max_off, off); + } + ea_row.sv_lookup_8[i] = (load.save >> i) & 1; + } + + return max_off; +} + +static void write_output_slot(int lineno, Target::JBay::parser_regs::_memory::_po_action_row *row, + unsigned &used, int src, int dest, int bytemask, bool offset) { + BUG_CHECK(bytemask > 0 && bytemask < 4); + for (int i = 0; i < 20; ++i) { + if (used & (1 << i)) continue; + row->phv_dst[i] = dest; + row->phv_src[i] = src; + if (offset) row->phv_offset_add_dst[i] = 1; + row->extract_type[i] = bytemask; + used |= 1 << i; + return; + } + error(lineno, "Ran out of phv output slots"); +} + +template <> +void Parser::State::Match::write_row_config(Target::JBay::parser_regs ®s, Parser *pa, + State *state, int row, Match *def, + json::map &ctxt_json) { + write_common_row_config(regs, pa, state, row, def, ctxt_json); + auto &action_row = regs.memory[state->gress].po_action_row[row]; + + if (disable_partial_hdr_err > 0) action_row.disable_partial_hdr_err = 1; +} + +template <> +int Parser::State::Match::Save::write_output_config(Target::JBay::parser_regs ®s, void *_row, + unsigned &used, int, int) const { + Target::JBay::parser_regs::_memory::_po_action_row *row = + (Target::JBay::parser_regs::_memory::_po_action_row *)_row; + int dest = where->reg.parser_id(); + int mask = (1 << (1 + where->hi / 8U)) - (1 << (where->lo / 8U)); + int lo = this->lo; + // 8b containers are paired in 16b chunks in the parser + // If we're extracting to the upper half of a chunk (the odd 8b register) then + // adjust the extract type to be to the upper half + if (where->reg.size == 8 && mask == 1) { + if (where->reg.index & 1) { + mask <<= 1; + } + } + if (flags & ROTATE) error(where.lineno, "no rotate support in Tofino2"); + + // All containers are 16b in the parser. 32b container extracts are implemented as + // a pair of 16b extracts. + int bytemask = (mask >> 2) & 3; + if (bytemask) { + write_output_slot(where.lineno, row, used, lo, dest + 1, bytemask, flags & OFFSET); + lo += bitcount(mask & 0xc); + } + + bytemask = mask & 3; + if (bytemask) write_output_slot(where.lineno, row, used, lo, dest, bytemask, flags & OFFSET); + return hi; +} + +#define SAVE_ONLY_USED_SLOTS 0xffc00 +static void write_output_const_slot(int lineno, + Target::JBay::parser_regs::_memory::_po_action_row *row, + unsigned &used, unsigned src, int dest, int bytemask, + int flags) { + // use bits 24..27 of 'used' to track the two constant slots + BUG_CHECK(bytemask > 0 && bytemask < 4); + BUG_CHECK((src & ~((0xffff00ff >> (8 * (bytemask - 1))) & 0xffff)) == 0); + // FIXME -- should be able to treat this as 4x8-bit rather than 2x16-bit slots, as long + // as the ROTATE flag is consistent for each half. + int cslot = -1; + // see if const already allocated and reuse + for (cslot = 0; cslot < 2; cslot++) + if (row->val_const[cslot] == src && (used & (bytemask << (2 * cslot + 24)))) break; + if (cslot >= 2) { + for (cslot = 0; cslot < 2; cslot++) + if (0 == (used & (bytemask << (2 * cslot + 24)))) break; + } + if (cslot >= 2) { + error(lineno, "Ran out of constant output slots"); + return; + } + row->val_const[cslot] |= src; + if (flags & 2 /*ROTATE*/) row->val_const_rot[cslot] = 1; + used |= bytemask << (2 * cslot + 24); + unsigned tmpused = used | SAVE_ONLY_USED_SLOTS; + write_output_slot(lineno, row, tmpused, 62 - 2 * cslot + (bytemask == 1), dest, bytemask, + flags & 1 /*OFFSET*/); + used |= tmpused & ~SAVE_ONLY_USED_SLOTS; +} + +template <> +void Parser::State::Match::Set::write_output_config(Target::JBay::parser_regs ®s, void *_row, + unsigned &used, int, int) const { + Target::JBay::parser_regs::_memory::_po_action_row *row = + (Target::JBay::parser_regs::_memory::_po_action_row *)_row; + int dest = where->reg.parser_id(); + int mask = (1 << (1 + where->hi / 8U)) - (1 << (where->lo / 8U)); + unsigned what = this->what << where->lo; + // Trim the bytes to be written, unless the value is being rotated + if (what && !(flags & ROTATE)) { + for (unsigned i = 0; i < 4; ++i) + if (((what >> (8 * i)) & 0xff) == 0) mask &= ~(1 << i); + } + if (where->reg.size == 8) { + BUG_CHECK((mask & ~1) == 0); + if (where->reg.index & 1) { + mask <<= 1; + what <<= 8; + } + } + if (mask & 3) + write_output_const_slot(where.lineno, row, used, what & 0xffff, dest, mask & 3, flags); + if (mask & 0xc) { + write_output_const_slot(where.lineno, row, used, (what >> 16) & 0xffff, dest + 1, + (mask >> 2) & 3, flags); + if ((mask & 3) && (flags & ROTATE)) row->val_const_32b_bond = 1; + } +} + +/* Tofino2 has a simple uniform array of 20 extractors, so doesn't really need an output + * map to track them. Constants 'sets' are handled by having 4 bytes of data that is set + * per row and extrated from the input buffer like a 'save', except only the first 10 + * extractors can access them. So `output_map` ends up being just a pointer to the + * register object for the row, and `used` is a 24-bit bitmap, tracking the 20 extractors + * and the 4 constant bytes. + */ +template <> +void *Parser::setup_phv_output_map(Target::JBay::parser_regs ®s, gress_t gress, int row) { + return ®s.memory[gress].po_action_row[row]; +} +template <> +void Parser::mark_unused_output_map(Target::JBay::parser_regs & /*regs*/, void * /*map*/, + unsigned /*used*/) { + // unneeded on jbay +} + +template <> +void Parser::State::Match::HdrLenIncStop::write_config( + JBay::memories_parser_::_po_action_row &po_row) const { + po_row.hdr_len_inc_stop = 1; + po_row.hdr_len_inc_final_amt = final_amt; +} + +template <> +void Parser::State::Match::Clot::write_config(JBay::memories_parser_::_po_action_row &po_row, + int idx, bool offset_add) const { + po_row.clot_tag[idx] = tag; + po_row.clot_offset[idx] = start; + if (load_length) { + po_row.clot_type[idx] = 1; + po_row.clot_len_src[idx] = length; + po_row.clot_en_len_shr[idx] = length_shift; + // po_row.clot_len_mask[idx] = length_mask; -- FIXME -- CSR reg commented out + } else { + po_row.clot_len_src[idx] = length - 1; + po_row.clot_type[idx] = 0; + po_row.clot_en_len_shr[idx] = 1; + } + po_row.clot_has_csum[idx] = csum_unit > 0; + po_row.clot_tag_offset_add[idx] = offset_add; +} + +template <> +void Parser::State::Match::write_counter_config( + Target::JBay::parser_regs::_memory::_ml_ea_row &ea_row) const { + if (ctr_load) { + switch (ctr_ld_src) { + case 0: + ea_row.ctr_op = 2; + break; + case 1: + ea_row.ctr_op = 3; + break; + default: + error(lineno, "Unsupported parser counter load instruction (JBay)"); + } + } else if (ctr_stack_pop) { + ea_row.ctr_op = 1; + } else { // add + ea_row.ctr_op = 0; + } + + ea_row.ctr_amt_idx = ctr_instr ? ctr_instr->addr : ctr_imm_amt; + ea_row.ctr_stack_push = ctr_stack_push; + ea_row.ctr_stack_upd_w_top = ctr_stack_upd_w_top; +} + +// Workaround for JBAY-2717: parser counter adds RAM index or immediate value +// to the pushed value when doing push + update_w_top. To cancel this offset, +// we subtract the amount on pop. +void jbay2717_workaround(Parser *parser, Target::JBay::parser_regs ®s) { + for (auto &kv : parser->match_to_row) { + if (kv.first->ctr_stack_pop) { + for (auto p : kv.first->get_all_preds()) { + if (p->ctr_stack_push && p->ctr_stack_upd_w_top) { + auto &ea_row = regs.memory[parser->gress].ml_ea_row[kv.second]; + auto adjust = p->ctr_instr ? p->ctr_instr->addr : p->ctr_imm_amt; + ea_row.ctr_amt_idx = ea_row.ctr_amt_idx.value - adjust; + break; + } + } + } + } +} + +template <> +void Parser::write_config(Target::JBay::parser_regs ®s, json::map &ctxt_json, + bool single_parser) { + if (single_parser) { + for (auto st : all) + st->write_config(regs, this, ctxt_json[st->gress == EGRESS ? "egress" : "ingress"]); + } else { + ctxt_json["states"] = json::vector(); + for (auto st : all) st->write_config(regs, this, ctxt_json["states"]); + } + if (error_count > 0) return; + + jbay2717_workaround(this, regs); + + int i = 0; + for (auto ctr : counter_init) { + if (ctr) ctr->write_config(regs, gress, i); + ++i; + } + + for (i = 0; i < checksum_use.size(); i++) { + for (auto csum : checksum_use[i]) { + if (csum) { + csum->write_config(regs, this); + if (csum->dest) phv_use[csum->gress].setbit(csum->dest->reg.uid); + } + } + } + + if (gress == EGRESS) { + regs.egress.epbreg.chan0_group.chnl_ctrl.meta_opt = meta_opt; + regs.egress.epbreg.chan1_group.chnl_ctrl.meta_opt = meta_opt; + regs.egress.epbreg.chan2_group.chnl_ctrl.meta_opt = meta_opt; + regs.egress.epbreg.chan3_group.chnl_ctrl.meta_opt = meta_opt; + regs.egress.epbreg.chan4_group.chnl_ctrl.meta_opt = meta_opt; + regs.egress.epbreg.chan5_group.chnl_ctrl.meta_opt = meta_opt; + regs.egress.epbreg.chan6_group.chnl_ctrl.meta_opt = meta_opt; + regs.egress.epbreg.chan7_group.chnl_ctrl.meta_opt = meta_opt; + } + + // All phvs used globaly by egress and not by ingress parser should be owned by + // egress parser so they get zeroed properly in the parser + phv_use[EGRESS] |= remove_nonparser(Phv::use(EGRESS)) - expand_parser_groups(phv_use[INGRESS]); + + setup_jbay_ownership(phv_use, regs.merge.ul.phv_owner_127_0.owner, + regs.merge.ur.phv_owner_255_128.owner, regs.main[INGRESS].phv_owner.owner, + regs.main[EGRESS].phv_owner.owner); + + setup_jbay_clear_on_write(phv_allow_clear_on_write, regs.merge.ul.phv_clr_on_wr_127_0.clr, + regs.merge.ur.phv_clr_on_wr_255_128.clr, + regs.main[INGRESS].phv_clr_on_wr.clr, + regs.main[EGRESS].phv_clr_on_wr.clr); + + setup_jbay_no_multi_write(phv_allow_bitwise_or, phv_allow_clear_on_write, + regs.main[INGRESS].no_multi_wr.nmw, + regs.main[EGRESS].no_multi_wr.nmw); + + regs.main[gress].hdr_len_adj.amt = hdr_len_adj; + + if (parser_error.lineno >= 0) { + for (auto i : {0, 1}) { + regs.main[gress].err_phv_cfg[i].en = 1; + regs.main[gress].err_phv_cfg[i].dst = parser_error->reg.parser_id(); + regs.main[gress].err_phv_cfg[i].no_tcam_match_err_en = 1; + regs.main[gress].err_phv_cfg[i].partial_hdr_err_en = 1; + regs.main[gress].err_phv_cfg[i].ctr_range_err_en = 1; + regs.main[gress].err_phv_cfg[i].timeout_iter_err_en = 1; + regs.main[gress].err_phv_cfg[i].timeout_cycle_err_en = 1; + regs.main[gress].err_phv_cfg[i].src_ext_err_en = 1; + regs.main[gress].err_phv_cfg[i].phv_owner_err_en = 1; + regs.main[gress].err_phv_cfg[i].multi_wr_err_en = 1; + regs.main[gress].err_phv_cfg[i].aram_mbe_en = 1; + regs.main[gress].err_phv_cfg[i].fcs_err_en = 1; + regs.main[gress].err_phv_cfg[i].csum_mbe_en = 1; + } + } else { + // en has a reset value of 1 and that is why we have to explicitly disable it + // otherwise dst will assume default value of 0 + for (auto i : {0, 1}) regs.main[gress].err_phv_cfg[i].en = 0; + } + + int i_start = Stage::first_table(INGRESS) & 0x1ff; + for (auto ® : regs.merge.ll1.i_start_table) reg.table = i_start; + + int e_start = Stage::first_table(EGRESS) & 0x1ff; + for (auto ® : regs.merge.lr1.e_start_table) reg.table = e_start; + + regs.merge.lr1.g_start_table.table = Stage::first_table(GHOST) & 0x1ff; + if (ghost_parser.size()) { + // tm_status_phv sets the location for ghost intrinsic metadata in the + // parser + // The parser carves up the 4k of PHV it sees into 256 x 16b containers. + // (32b MAU containers map to a pair of parser 16b containers, and 2 x + // 8b MAU containers map to a single parser 16b container.) PHV + // specified here will take the two containers at address + // { PHV & 0xfe, PHV & 0xfe + 1 }. + // Hence, ghost intrinsic metadata is assumed to be allocated in a + // contiguous 32b location. + regs.merge.lr1.tm_status_phv.en = 1; + regs.merge.lr1.tm_status_phv.phv = ghost_parser[0]->reg.parser_id(); + if (ghost_pipe_mask != 0xf) // if not default set given value + regs.merge.lr1.tm_status_phv.pipe_mask = ghost_pipe_mask; + } + + if (gress == INGRESS) { + for (auto &ref : regs.ingress.prsr) + ref.set("regs.parser.main.ingress", ®s.main[INGRESS]); + } + if (gress == EGRESS) { + for (auto &ref : regs.egress.prsr) ref.set("regs.parser.main.egress", ®s.main[EGRESS]); + } + if (error_count == 0) { + if (options.condense_json) { + // FIXME -- removing the uninitialized memory causes problems? + // FIXME -- walle gets the addresses wrong. Might also require explicit + // FIXME -- zeroing in the driver on real hardware + // regs.memory[INGRESS].disable_if_reset_value(); + // regs.memory[EGRESS].disable_if_reset_value(); + // regs.ingress.disable_if_reset_value(); + // regs.egress.disable_if_reset_value(); + regs.main[INGRESS].disable_if_reset_value(); + regs.main[EGRESS].disable_if_reset_value(); + regs.merge.disable_if_reset_value(); + } + if (options.gen_json) { + if (single_parser) { + regs.memory[INGRESS].emit_json(*open_output("memories.parser.ingress.cfg.json"), + "ingress"); + regs.memory[EGRESS].emit_json(*open_output("memories.parser.egress.cfg.json"), + "egress"); + regs.ingress.emit_json(*open_output("regs.parser.ingress.cfg.json")); + regs.egress.emit_json(*open_output("regs.parser.egress.cfg.json")); + regs.main[INGRESS].emit_json(*open_output("regs.parser.main.ingress.cfg.json"), + "ingress"); + regs.main[EGRESS].emit_json(*open_output("regs.parser.main.egress.cfg.json"), + "egress"); + regs.merge.emit_json(*open_output("regs.parse_merge.cfg.json")); + } else { + regs.memory[INGRESS].emit_json( + *open_output("memories.parser.ingress.%02x.cfg.json", parser_no), "ingress"); + regs.memory[EGRESS].emit_json( + *open_output("memories.parser.egress.%02x.cfg.json", parser_no), "egress"); + regs.ingress.emit_json( + *open_output("regs.parser.ingress.%02x.cfg.json", parser_no)); + regs.egress.emit_json(*open_output("regs.parser.egress.%02x.cfg.json", parser_no)); + regs.main[INGRESS].emit_json(*open_output("regs.parser.main.ingress.cfg.json"), + "ingress"); + regs.main[EGRESS].emit_json(*open_output("regs.parser.main.egress.cfg.json"), + "egress"); + regs.merge.emit_json(*open_output("regs.parse_merge.cfg.json")); + } + } + } + + /* multiple JBay parser mem blocks can respond to same address range to allow programming + * the device with a single write operation. See: pardereg.pgstnreg.ibprsr4reg.prsr.mem_ctrl */ + if (gress == INGRESS) { + for (unsigned i = 0; i < TopLevel::regs()->mem_pipe.parde.i_prsr_mem.size(); + options.singlewrite ? i += 4 : i += 1) { + TopLevel::regs()->mem_pipe.parde.i_prsr_mem[i].set( + "memories.parser.ingress", ®s.memory[INGRESS]); + } + } + + if (gress == EGRESS) { + for (unsigned i = 0; i < TopLevel::regs()->mem_pipe.parde.e_prsr_mem.size(); + options.singlewrite ? i += 4 : i += 1) { + TopLevel::regs()->mem_pipe.parde.e_prsr_mem[i].set( + "memories.parser.egress", ®s.memory[EGRESS]); + } + } + + if (gress == INGRESS) { + for (auto &ref : TopLevel::regs()->reg_pipe.pardereg.pgstnreg.ipbprsr4reg) + ref.set("regs.parser.ingress", ®s.ingress); + } + + if (gress == EGRESS) { + for (auto &ref : TopLevel::regs()->reg_pipe.pardereg.pgstnreg.epbprsr4reg) + ref.set("regs.parser.egress", ®s.egress); + } + TopLevel::regs()->reg_pipe.pardereg.pgstnreg.pmergereg.set("regs.parse_merge", + ®s.merge); +} + +template <> +void Parser::gen_configuration_cache(Target::JBay::parser_regs ®s, json::vector &cfg_cache) { + std::string reg_fqname; + std::string reg_name; + std::string reg_value_str; + unsigned reg_width = 13; + + /* Publishing meta_opt field for chnl_ctrl register */ + /* Are ovr_pipeid, chnl_clean, init_dprsr_credit, init_ebuf_credit always handled by the + * driver? + */ + for (int i = 0; i < 9; i++) { + reg_fqname = "pardereg.pgstnreg.epbprsr4reg[" + std::to_string(i) + + "].epbreg.chan0_group.chnl_ctrl.meta_opt"; + reg_name = "epb" + std::to_string(i) + "parser0_chnl_ctrl_0"; + reg_value_str = int_to_hex_string(meta_opt, reg_width); + add_cfg_reg(cfg_cache, reg_fqname, reg_name, reg_value_str); + } +} diff --git a/backends/tofino/bf-asm/jbay/phv.cpp b/backends/tofino/bf-asm/jbay/phv.cpp new file mode 100644 index 00000000000..ba01b84ae00 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/phv.cpp @@ -0,0 +1,66 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/jbay/phv.h" + +void Target::JBay::Phv::init_regs(::Phv &phv) { + // Allocating JBay regs so the uids map to mau register encodings + static const struct { + char code[2]; + unsigned size, count; + } groups[] = {{"W", 32, 4}, {"B", 8, 4}, {"H", 16, 6}}; + static const struct { + char code[2]; + Register::type_t type; + unsigned count; + } types[] = {{"", Register::NORMAL, 12}, {"M", Register::MOCHA, 4}, {"D", Register::DARK, 4}}; + unsigned uid = 0; + unsigned byte = 0; + unsigned deparser_id = 0; + phv.regs.resize(280); + for (unsigned i = 0; i < sizeof groups / sizeof *groups; i++) { + unsigned idx[sizeof types / sizeof *types] = {0}; + for (unsigned j = 0; j < groups[i].count; j++) { + for (unsigned k = 0; k < sizeof types / sizeof *types; k++) { + for (unsigned l = 0; l < types[k].count; l++, idx[k]++, uid++) { + auto reg = new Register; + phv.regs[uid] = reg; + memset(reg->name, 0, sizeof(reg->name)); + snprintf(reg->name, sizeof(reg->name), "%.2s%.2s%d", types[k].code, + groups[i].code, idx[k]); + reg->type = types[k].type; + reg->index = idx[k]; + reg->uid = uid; + reg->size = groups[i].size; + if (reg->type == Register::DARK) { + reg->parser_id_ = reg->deparser_id_ = -1; + } else { + reg->parser_id_ = byte / 2U; + reg->deparser_id_ = deparser_id++; + byte += reg->size / 8U; + } + phv.names[INGRESS][reg->name][0].slice = ::Phv::Slice(*reg, 0, reg->size - 1); + phv.names[EGRESS][reg->name][0].slice = ::Phv::Slice(*reg, 0, reg->size - 1); + phv.names[GHOST][reg->name][0].slice = ::Phv::Slice(*reg, 0, reg->size - 1); + } + } + } + } + BUG_CHECK(uid == phv.regs.size()); + BUG_CHECK(deparser_id == 224); + BUG_CHECK(byte == 512); +} diff --git a/backends/tofino/bf-asm/jbay/phv.h b/backends/tofino/bf-asm/jbay/phv.h new file mode 100644 index 00000000000..92b7af5532a --- /dev/null +++ b/backends/tofino/bf-asm/jbay/phv.h @@ -0,0 +1,56 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_JBAY_PHV_H_ +#define BACKENDS_TOFINO_BF_ASM_JBAY_PHV_H_ + +#include "backends/tofino/bf-asm/phv.h" + +class Target::JBay::Phv : public Target::Phv { + friend class ::Phv; + struct Register : public ::Phv::Register { + short parser_id_, deparser_id_; + int parser_id() const override { return parser_id_; } + int mau_id() const override { return uid < 280 ? uid : -1; } + int ixbar_id() const override { + static const int ixbar_permute[16] = {0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, -6, -6, 0, 0}; + return deparser_id_ + ixbar_permute[deparser_id_ & 0xf]; + } + int deparser_id() const override { return deparser_id_; } + }; + void init_regs(::Phv &phv) override; + target_t type() const override { return JBAY; } + unsigned mau_groupsize() const override { return 20; } +}; + +class Target::Tofino2H::Phv : public Target::JBay::Phv { + target_t type() const override { return TOFINO2H; } +}; + +class Target::Tofino2M::Phv : public Target::JBay::Phv { + target_t type() const override { return TOFINO2M; } +}; + +class Target::Tofino2U::Phv : public Target::JBay::Phv { + target_t type() const override { return TOFINO2U; } +}; + +class Target::Tofino2A0::Phv : public Target::JBay::Phv { + target_t type() const override { return TOFINO2A0; } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_JBAY_PHV_H_ */ diff --git a/backends/tofino/bf-asm/jbay/salu_inst.cpp b/backends/tofino/bf-asm/jbay/salu_inst.cpp new file mode 100644 index 00000000000..f6d0ce90425 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/salu_inst.cpp @@ -0,0 +1,493 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* JBay template specializations for instructions #included in salu_inst.cpp + * WARNING -- this is included in an anonymous namespace, as these SaluInstruction + * subclasses are all defined in that anonymous namespace */ + +struct DivMod : public AluOP { + struct Decode : public AluOP::Decode { + Decode(const char *name, target_t targ, int opc) : AluOP::Decode(name, targ, opc) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override { + auto *rv = new DivMod(this, op[0].lineno); + if (op.size != 3) error(op[0].lineno, "divmod must have exactly 2 operands"); + if (op.size > 1) rv->srca = operand(tbl, act, op[1]); + if (op.size > 2) rv->srcb = operand(tbl, act, op[2]); + rv->dest = AluOP::HI; + rv->slot = ALU2HI; + return rv; + } + }; + DivMod(const Decode *op, int l) : AluOP(op, l) {} + + Instruction *pass1(Table *tbl, Table::Actions::Action *act) override { + tbl->stage->table_use[timing_thread(tbl->gress)] |= Stage::USE_STATEFUL_DIVIDE; + BUG_CHECK(tbl->to(), "stateful instruction on non-stateful table?"); + tbl->to()->divmod_used = true; + return AluOP::pass1(tbl, act); + } + FOR_ALL_REGISTER_SETS(DECLARE_FORWARD_VIRTUAL_INSTRUCTION_WRITE_REGS) +}; + +// setz op, so can OR with alu1hi to get that result +DivMod::Decode opDIVMOD("divmod", JBAY, 0x00); + +void DivMod::write_regs(Target::Tofino::mau_regs &, Table *, Table::Actions::Action *) { BUG(); } +void DivMod::write_regs(Target::JBay::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + AluOP::write_regs(regs, tbl, act); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu_instr_common = meter_group.stateful.salu_instr_common[act->code]; + salu_instr_common.salu_divide_enable |= 1; +} + +struct MinMax : public SaluInstruction { + const struct Decode : public Instruction::Decode { + std::string name; + unsigned opcode; + Decode(const char *name, target_t targ, int op) + : Instruction::Decode(name, targ, STATEFUL_ALU), name(name), opcode(op) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + } *opc; + bool phv = false; // source is mem or phv + operand mask, postmod; + // constants for mask and postmod packed together + boost::optional constval = boost::none; + MinMax(const Decode *op, int l) : SaluInstruction(l), opc(op) {} + std::string name() override { return opc->name; }; + Instruction *pass1(Table *tbl, Table::Actions::Action *) override; + void pass2(Table *tbl, Table::Actions::Action *) override; + bool salu_alu() const override { return true; } + bool equiv(Instruction *a_) override { + if (auto *a = dynamic_cast(a_)) + return opc == a->opc && phv == a->phv && mask == a->mask && postmod == a->postmod; + return false; + } + bool phvRead(std::function) override { return phv; } + void dbprint(std::ostream &out) const override { + out << "INSTR: " << opc->name << (phv ? "phv, " : "mem, ") << mask; + if (postmod) out << ", " << postmod; + } + FOR_ALL_REGISTER_SETS(DECLARE_FORWARD_VIRTUAL_INSTRUCTION_WRITE_REGS) +}; + +MinMax::Decode opMIN8("min8", JBAY, 0), opMAX8("max8", JBAY, 1), opMIN16("min16", JBAY, 2), + opMAX16("max16", JBAY, 3); + +Instruction *MinMax::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + auto *rv = new MinMax(this, op[0].lineno); + if (op.size > 2) { + if (op[1] == "phv") + rv->phv = true; + else if (op[1] != "mem") + error(op[1].lineno, "%s source must be 'mem' or 'phv'", op[0].s); + rv->mask = operand(tbl, act, op[2]); + if (!rv->mask.to() && !rv->mask.to()) + error(op[1].lineno, "%s mask must be constant or from phv or hash_dist", op[0].s); + } else { + error(op[0].lineno, "%s must have a single mask operand", op[0].s); + } + if (op.size == 4) { + rv->postmod = operand(tbl, act, op[3]); + } else if (op.size > 4) { + error(op[0].lineno, "too many operands for %s", op[0].s); + } + rv->slot = MINMAX; + return rv; +} +Instruction *MinMax::pass1(Table *tbl_, Table::Actions::Action *act) { + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + int mask_size = (opc->opcode & 2) ? 8 : 16; + constval = boost::none; + mask->pass1(tbl); + act->minmax_use = true; + if (auto k = mask.to()) { + if (k->value < 0 || k->value >= (1U << mask_size) || mask.neg) + error(k->lineno, "%s mask value out of range", name().c_str()); + constval = k->value & ((1U << mask_size) - 1); + } else if (auto p = mask.to()) { + if (p->phv_index(tbl)) + error(lineno, "%s phv mask must come from the lower half input", name().c_str()); + } else { + error(mask->lineno, "%s invalid mask", name().c_str()); + } + if (postmod) { + if (auto k = postmod.to()) { + if (k->value < 0) { + k->value = -k->value; + postmod.neg = !postmod.neg; + } + if (k->value > 255) error(lineno, "%s post mod too large", name().c_str()); + constval = constval.get_value_or(0) | (k->value & 0xff) << mask_size; + } else if (auto p = postmod.to()) { + if (!p->phv_index(tbl)) + error(lineno, "%s phv post mod must come from the upper half input", + name().c_str()); + } else { + error(postmod->lineno, "%s invalid post mod", name().c_str()); + } + } + // We allocate the value here in order to report an error in pass1 if the capacity + // of the register file is exceeded. The next call in write_regs with the same value + // will return already allocated register file row index. + if (constval) tbl->get_const(lineno, *constval); + return this; +} +void MinMax::pass2(Table *tbl, Table::Actions::Action *act) { + if (act->slot_use.intersects(bitvec(ALU2LO, 4))) + error(lineno, "min/max requires all 4 stateful alu slots be unused"); +} +void MinMax::write_regs(Target::JBay::mau_regs ®s, Table *tbl_, Table::Actions::Action *act) { + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu_instr_common = meter_group.stateful.salu_instr_common[act->code]; + if (auto k = mask.to()) { + salu_instr_common.salu_minmax_mask_ctl = 1; + } else { + salu_instr_common.salu_minmax_mask_ctl = 0; + } + salu_instr_common.salu_minmax_ctl = opc->opcode; + salu_instr_common.salu_minmax_enable = 1; + if (postmod) { + if (auto k = postmod.to()) { + salu_instr_common.salu_minmax_postmod_value_ctl = 0; + } else { + salu_instr_common.salu_minmax_postmod_value_ctl = 1; + } + if (postmod.neg) + salu_instr_common.salu_minmax_postdec_enable = 1; + else + salu_instr_common.salu_minmax_postinc_enable = 1; + } + if (constval) { + auto &salu_instr_cmp = meter_group.stateful.salu_instr_cmp_alu[act->code][3]; + salu_instr_cmp.salu_cmp_regfile_adr = tbl->get_const(lineno, *constval); + } + // salu_instr_common.salu_minmax_src_sel = phv; -- FIXME -- specify PHV source? + for (auto &salu : meter_group.stateful.salu_instr_state_alu[act->code]) { + salu.salu_op = 0xd; + salu.salu_arith = 1; + salu.salu_pred = 0xffff; + } +} +void MinMax::write_regs(Target::Tofino::mau_regs &, Table *, Table::Actions::Action *) { BUG(); } + +template <> +void AluOP::write_regs(Target::JBay::mau_regs ®s, Table *tbl_, Table::Actions::Action *act) { + LOG2(this); + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu = meter_group.stateful.salu_instr_state_alu[act->code][slot - ALU2LO]; + auto &salu_ext = meter_group.stateful.salu_instr2_state_alu[act->code][slot - ALU2LO]; + auto &salu_instr_common = meter_group.stateful.salu_instr_common[act->code]; + auto &salu_instr_output_alu = meter_group.stateful.salu_instr_output_alu[act->code]; + salu.salu_op = opc->opcode & 0xf; + salu.salu_arith = opc->opcode >> 4; + salu.salu_pred = predication_encode; + bool need_flyover = (tbl->format->size >> tbl->is_dual_mode()) > 32; + const int alu_const_min = Target::STATEFUL_ALU_CONST_MIN(); + const int alu_const_max = Target::STATEFUL_ALU_CONST_MAX(); + if (srca) { + if (auto m = srca.to()) { + salu.salu_asrc_input = m->field->bit(0) > 0 ? 1 : 0; + if (need_flyover) { + salu_ext.salu_flyover_src_sel = 1; + need_flyover = false; + } + } else if (auto f = srca.to()) { + salu.salu_asrc_input = f->phv_index(tbl) ? 3 : 2; + if (need_flyover) { + salu_ext.salu_flyover_src_sel = 1; + need_flyover = false; + } + } else if (auto k = srca.to()) { + salu.salu_asrc_input = 4; + if (k->value >= alu_const_min && k->value <= alu_const_max) { + salu.salu_const_src = k->value & Target::STATEFUL_ALU_CONST_MASK(); + salu.salu_regfile_const = 0; + } else { + salu.salu_const_src = tbl->get_const(k->lineno, k->value); + salu.salu_regfile_const = 1; + } + } else if (auto r = srca.to()) { + salu.salu_asrc_input = 4; + salu.salu_const_src = r->index; + salu.salu_regfile_const = 1; + } else { + BUG(); + } + } + if (srcb) { + if (auto m = srcb.to()) { + salu.salu_bsrc_input = m->field->bit(0) > 0 ? 3 : 2; + if (need_flyover) { + salu_ext.salu_flyover_src_sel = 0; + need_flyover = false; + } + } else if (auto f = srcb.to()) { + salu.salu_bsrc_input = f->phv_index(tbl) ? 1 : 0; + if (need_flyover) { + salu_ext.salu_flyover_src_sel = 0; + need_flyover = false; + } + } else if (auto m = srcb.to()) { + salu_instr_common.salu_alu2_lo_bsrc_math = 1; + if (auto b = m->of.to()) { + salu_instr_common.salu_alu2_lo_math_src = b->phv_index(tbl); + } else if (auto b = m->of.to()) { + salu_instr_common.salu_alu2_lo_math_src = b->field->bit(0) > 0 ? 3 : 2; + } else { + BUG(); + } + } else if (auto k = srcb.to()) { + salu.salu_bsrc_input = 4; + if (k->value >= alu_const_min && k->value <= alu_const_max) { + salu.salu_const_src = k->value & Target::STATEFUL_ALU_CONST_MASK(); + salu.salu_regfile_const = 0; + } else { + salu.salu_const_src = tbl->get_const(k->lineno, k->value); + salu.salu_regfile_const = 1; + } + } else if (auto r = srcb.to()) { + salu.salu_bsrc_input = 4; + salu.salu_const_src = r->index; + salu.salu_regfile_const = 1; + } else { + BUG(); + } + } +} +void AluOP::write_regs(Target::JBay::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + write_regs(regs, tbl, act); +} + +template <> +void BitOP::write_regs(Target::JBay::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + LOG2(this); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu = meter_group.stateful.salu_instr_state_alu[act->code][slot - ALU2LO]; + salu.salu_op = opc->opcode & 0xf; + salu.salu_pred = predication_encode; + // 1b instructions are from mem-lo to alu1-lo + salu.salu_asrc_input = 0; +} +void BitOP::write_regs(Target::JBay::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + write_regs(regs, tbl, act); +} + +static int sbus_mask(int alu, const std::vector &tbls) { + int rv = 0; + for (auto &tbl : tbls) { + int bit = tbl->layout[0].row / 4U; + if (bit > alu) --bit; + rv |= 1 << bit; + } + return rv; +} + +template <> +void CmpOP::write_regs(Target::JBay::mau_regs ®s, Table *tbl_, Table::Actions::Action *act) { + LOG2(this); + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu = meter_group.stateful.salu_instr_cmp_alu[act->code][slot]; + auto &salu_instr_common = meter_group.stateful.salu_instr_common[act->code]; + if (srca) { + salu.salu_cmp_asrc_input = srca->field->bit(0) > 0; + salu.salu_cmp_asrc_sign = srca_neg; + salu.salu_cmp_asrc_enable = 1; + if (maska != uint32_t(-1)) { + salu.salu_cmp_asrc_mask_enable = 1; + auto cval = 0; + if (auto k = dynamic_cast(srcc)) + cval = k->value; + else if (auto r = dynamic_cast(srcc)) + cval = tbl->get_const_val(r->index); + int64_t min = Target::STATEFUL_CMP_CONST_MIN(); + int64_t max = Target::STATEFUL_CMP_CONST_MAX(); + bool maska_outside = (maska < uint32_t(min) && maska > max); + bool maska_equal_inside = (uint32_t(cval) != maska && cval >= min && cval <= max); + if (!maska_outside && !maska_equal_inside) { + salu.salu_cmp_const_src = maska & Target::STATEFUL_CMP_CONST_MASK(); + salu.salu_cmp_mask_input = 0; + } else { + salu.salu_cmp_regfile_adr = tbl->get_const(srca->lineno, maska); + salu.salu_cmp_mask_input = 1; + } + } + } + if (srcb) { + salu.salu_cmp_bsrc_input = srcb->phv_index(tbl); + salu.salu_cmp_bsrc_sign = srcb_neg; + salu.salu_cmp_bsrc_enable = 1; + if (maskb != uint32_t(-1)) { + // uarch 6.2.12.6.1, masks for operand a/b are sourced from the + // same regfile slot. + if (salu.salu_cmp_asrc_mask_enable && salu.salu_cmp_mask_input && maskb != maska) + error(lineno, "inconsistent operand mask %d and %d in salu compare operation", + maska, maskb); + salu.salu_cmp_bsrc_mask_enable = 1; + salu.salu_cmp_regfile_adr = tbl->get_const(srcb->lineno, maskb); + } + } + if (srcc) { + if (auto k = dynamic_cast(srcc)) { + const int cmp_const_min = Target::STATEFUL_CMP_CONST_MIN(); + const int cmp_const_max = Target::STATEFUL_CMP_CONST_MAX(); + if (k->value >= cmp_const_min && k->value <= cmp_const_max) { + salu.salu_cmp_const_src = k->value & Target::STATEFUL_CMP_CONST_MASK(); + salu.salu_cmp_regfile_const = 0; + } else { + // uarch 6.2.12.6.1, masks for operand a/b are sourced from the + // same regfile slot as operand c if c is a constant. + if (salu.salu_cmp_asrc_mask_enable && salu.salu_cmp_mask_input && + maska != uint32_t(k->value)) + error(lineno, "inconsistent operand mask %d and %d in salu compare operation", + maska, uint32_t(k->value)); + if (salu.salu_cmp_bsrc_mask_enable && salu.salu_cmp_mask_input && + maskb != uint32_t(k->value)) + error(lineno, "inconsistent operand mask %d and %d in salu compare operation", + maskb, uint32_t(k->value)); + salu.salu_cmp_regfile_adr = tbl->get_const(srcc->lineno, k->value); + salu.salu_cmp_regfile_const = 1; + } + } else if (auto r = dynamic_cast(srcc)) { + salu.salu_cmp_regfile_adr = r->index; + salu.salu_cmp_regfile_const = 1; + } + } else { + salu.salu_cmp_const_src = 0; + salu.salu_cmp_regfile_const = 0; + } + salu.salu_cmp_opcode = opc->opcode | (type << 2); + auto lmask = sbus_mask(logical_home_row / 4U, tbl->sbus_learn); + auto mmask = sbus_mask(logical_home_row / 4U, tbl->sbus_match); + salu_instr_common.salu_lmatch_sbus_listen = lmask; + salu_instr_common.salu_match_sbus_listen = mmask; + salu_instr_common.salu_sbus_in_comb = tbl->sbus_comb; + if (lmask || mmask) { + // if lmask and mmask are both zero, these registers don't matter, but the model + // will assert if they are non-zero) + salu.salu_cmp_sbus_or = 0; + salu.salu_cmp_sbus_and = learn ? 1 : 0; + salu.salu_cmp_sbus_invert = learn_not ? 1 : 0; + } +} +void CmpOP::write_regs(Target::JBay::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + write_regs(regs, tbl, act); +} + +template <> +void TMatchOP::write_regs(Target::JBay::mau_regs ®s, Table *tbl_, Table::Actions::Action *act) { + LOG2(this); + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu = meter_group.stateful.salu_instr_cmp_alu[act->code][slot]; + auto &salu_tmatch = meter_group.stateful.salu_instr_tmatch_alu[act->code][slot]; + auto &salu_instr_common = meter_group.stateful.salu_instr_common[act->code]; + salu.salu_cmp_tmatch_enable = 1; + salu.salu_cmp_asrc_enable = 1; + salu.salu_cmp_bsrc_enable = 1; + meter_group.stateful.tmatch_mask[slot][0] = ~mask & 0xffffffffU; + meter_group.stateful.tmatch_mask[slot][1] = ~mask >> 32; + salu.salu_cmp_opcode = 2; + salu.salu_cmp_asrc_input = srca->field->bit(0) > 0; + salu.salu_cmp_bsrc_input = srcb->phv_index(tbl); + if (auto lmask = sbus_mask(logical_home_row / 4U, tbl->sbus_learn)) + salu_instr_common.salu_lmatch_sbus_listen = lmask; + if (auto mmask = sbus_mask(logical_home_row / 4U, tbl->sbus_match)) + salu_instr_common.salu_match_sbus_listen = mmask; + salu.salu_cmp_sbus_or = 0; + salu.salu_cmp_sbus_and = learn ? 1 : 0; + salu.salu_cmp_sbus_invert = learn_not ? 1 : 0; + // we set the learn output unconditionally if there's a tmatch -- should it be controllable? + salu_tmatch.salu_tmatch_vld_ctl = 1; + // salu_tmatch.salu_tmatch_invert = 0; -- when can this be useful? +} + +void TMatchOP::write_regs(Target::JBay::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + write_regs(regs, tbl, act); +} + +void OutOP::decode_output_mux(Target::JBay, Table *tbl, value_t &op) { + static const std::map ops_mux_lookup = { + {"mem_hi", 1}, {"mem_lo", 0}, {"memory_hi", 1}, {"memory_lo", 0}, {"phv_hi", 3}, + {"phv_lo", 2}, {"alu_hi", 5}, {"alu_lo", 4}, {"minmax_index", 5}, {"minmax_post", 4}, + {"predicate", 6}, {"address", 7}, {"div", 8}, {"mod", 9}, {"minmax", 10}}; + if (op.type == tCMD && ops_mux_lookup.count(op[0].s)) + output_mux = ops_mux_lookup.at(op[0].s); + else if (op.type == tSTR && ops_mux_lookup.count(op.s)) + output_mux = ops_mux_lookup.at(op.s); + else + output_mux = -1; + if (src) { + int tmp = output_mux; + if (auto *phv = src.to()) + output_mux = 2 + phv->phv_index(tbl->to()); + else if (auto *mem = src.to()) + output_mux = mem->field->bit(0) > 0 ? 1 : 0; + BUG_CHECK(tmp < 0 || tmp == output_mux, "inconsistent output mux decode"); + } +} +int OutOP::decode_output_option(Target::JBay, value_t &op) { + if (op == "lmatch") { + lmatch = true; + if (op.type == tCMD) + lmatch_pred = decode_predicate(op[1]); + else + lmatch_pred = STATEFUL_PREDICATION_ENCODE_UNCOND; + } else { + return -1; + } + return 0; +} + +template <> +void OutOP::write_regs(Target::JBay::mau_regs ®s, Table *tbl_, Table::Actions::Action *act) { + LOG2(this); + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu = meter_group.stateful.salu_instr_output_alu[act->code][slot - ALUOUT0]; + if (predication_encode) { + salu.salu_output_cmpfn = predication_encode; + } else { + salu.salu_output_cmpfn = STATEFUL_PREDICATION_ENCODE_UNCOND; + } + salu.salu_output_asrc = output_mux; + if ((salu.salu_lmatch_adr_bit_enable = lmatch)) + meter_group.stateful.salu_mathtable[0] = lmatch_pred; + if (output_mux == STATEFUL_PREDICATION_OUTPUT) + meter_group.stateful.stateful_ctl.salu_output_pred_sel = slot - ALUOUT0; +} +void OutOP::write_regs(Target::JBay::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + write_regs(regs, tbl, act); +} diff --git a/backends/tofino/bf-asm/jbay/stage.cpp b/backends/tofino/bf-asm/jbay/stage.cpp new file mode 100644 index 00000000000..38f37c71b5e --- /dev/null +++ b/backends/tofino/bf-asm/jbay/stage.cpp @@ -0,0 +1,316 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* mau stage template specializations for jbay -- #included directly in top-level stage.cpp */ + +template <> +void Stage::gen_configuration_cache(Target::JBay::mau_regs ®s, json::vector &cfg_cache) { + Stage::gen_configuration_cache_common(regs, cfg_cache); + + static unsigned i_pdddelay; + static unsigned e_pdddelay; + unsigned reg_width = 8; // this means number of hex characters + std::string i_reg_value_str; + std::string e_reg_value_str; + std::string reg_fqname; + std::string reg_name; + unsigned reg_value; + std::string reg_value_str; + + if (stageno != 0) { + if (i_pdddelay > regs.cfg_regs.amod_pre_drain_delay[INGRESS]) + i_pdddelay = regs.cfg_regs.amod_pre_drain_delay[INGRESS]; + if (e_pdddelay > regs.cfg_regs.amod_pre_drain_delay[EGRESS]) + e_pdddelay = regs.cfg_regs.amod_pre_drain_delay[EGRESS]; + + if (stageno == AsmStage::numstages() - 1) { + // 64 is due to number of CSR's + i_pdddelay += (7 + 64); + i_reg_value_str = int_to_hex_string(i_pdddelay, reg_width); + e_pdddelay += (7 + 64); + e_reg_value_str = int_to_hex_string(e_pdddelay, reg_width); + + add_cfg_reg(cfg_cache, "pardereg.pgstnreg.parbreg.left.i_wb_ctrl", "left_i_wb_ctrl", + i_reg_value_str); + add_cfg_reg(cfg_cache, "pardereg.pgstnreg.parbreg.right.e_wb_ctrl", "right_e_wb_ctrl", + e_reg_value_str); + } + } + + // meter_ctl + auto &meter_ctl = regs.rams.map_alu.meter_group; + for (int i = 0; i < 4; i++) { + reg_fqname = "mau[" + std::to_string(stageno) + "].rams.map_alu.meter_group[" + + std::to_string(i) + "]" + ".meter.meter_ctl"; + reg_name = "stage_" + std::to_string(stageno) + "_meter_ctl_" + std::to_string(i); + reg_value = meter_ctl[i].meter.meter_ctl; + if ((reg_value != 0) || (options.match_compiler)) { + reg_value_str = int_to_hex_string(reg_value, reg_width); + add_cfg_reg(cfg_cache, reg_fqname, reg_name, reg_value_str); + } + } +} + +static void addvec(json::vector &vec, ubits_base &val, uint32_t extra = 0) { + vec.push_back(val | extra); +} +static void addvec(json::vector &vec, uint32_t val, uint32_t extra = 0) { + vec.push_back(val | extra); +} + +template +static void addvec(json::vector &vec, checked_array_base &array, uint32_t extra = 0) { + for (auto &el : array) addvec(vec, el, extra); +} + +template +static json::map make_reg_vec(REGS ®s, REG ®, const char *name, uint32_t mask0, + uint32_t mask1, uint32_t mask2, uint32_t extra = 0) { + json::map rv; + rv["name"] = name; + rv["offset"] = regs.binary_offset(®); + addvec(rv["value"], reg, extra); + rv["mask"] = json::vector{json::number(mask0), json::number(mask1), json::number(mask2)}; + return rv; +} + +template +void Stage::gen_mau_stage_extension(REGS ®s, json::map &extend) { + extend["last_programmed_stage"] = Target::NUM_MAU_STAGES() - 1; + json::vector ®isters = extend["registers"] = json::vector(); + registers.push_back(make_reg_vec(regs, regs.dp.phv_ingress_thread, "regs.dp.phv_ingress_thread", + 0, 0x3ff, 0x3ff)); + registers.push_back(make_reg_vec(regs, regs.dp.phv_ingress_thread_imem, + "regs.dp.phv_ingress_thread_imem", 0, 0x3ff, 0x3ff)); + registers.push_back(make_reg_vec(regs, regs.dp.phv_egress_thread, "regs.dp.phv_egress_thread", + 0, 0x3ff, 0x3ff)); + registers.push_back(make_reg_vec(regs, regs.dp.phv_egress_thread_imem, + "regs.dp.phv_egress_thread_imem", 0, 0x3ff, 0x3ff)); + registers.push_back(make_reg_vec(regs, regs.rams.match.adrdist.adr_dist_pipe_delay, + "regs.rams.match.adrdist.adr_dist_pipe_delay", 0, 0xf, 0xf)); + typename std::remove_reference< + decltype(regs.rams.match.adrdist.deferred_eop_bus_delay[0])>::type mask0, + mask1; + mask0.eop_delay_fifo_en = mask1.eop_delay_fifo_en = 1; + mask0.eop_internal_delay_fifo = mask1.eop_internal_delay_fifo = 0x1f; + mask0.eop_output_delay_fifo = 0x1; + mask1.eop_output_delay_fifo = 0x1f; + BUG_CHECK(regs.rams.match.adrdist.deferred_eop_bus_delay[0].eop_output_delay_fifo & + regs.rams.match.adrdist.deferred_eop_bus_delay[1].eop_output_delay_fifo & 1); + registers.push_back(make_reg_vec(regs, regs.rams.match.adrdist.deferred_eop_bus_delay, + "regs.rams.match.adrdist.deferred_eop_bus_delay", mask0, mask0, + mask1)); + registers.push_back(make_reg_vec(regs, regs.dp.cur_stage_dependency_on_prev, + "regs.dp.cur_stage_dependency_on_prev", 0, 0x3, 0x3, 0x1)); + registers.push_back(make_reg_vec(regs, regs.dp.next_stage_dependency_on_cur, + "regs.dp.next_stage_dependency_on_cur", 0x3, 0x3, 0, 0x1)); + registers.push_back(make_reg_vec(regs, regs.rams.match.merge.mpr_bus_dep, + "regs.rams.match.merge.mpr_bus_dep", 0x3, 0x3, 0, 0x3)); + registers.push_back(make_reg_vec(regs, regs.dp.pipelength_added_stages, + "regs.dp.pipelength_added_stages", 0, 0xf, 0xf)); + registers.push_back(make_reg_vec(regs, regs.rams.match.merge.exact_match_delay_thread, + "regs.rams.match.merge.exact_match_delay_thread", 0, 0x3, + 0x3)); + BUG_CHECK((regs.rams.match.merge.mpr_thread_delay[0] & 1) == 0); + BUG_CHECK((regs.rams.match.merge.mpr_thread_delay[1] & 1) == 0); + registers.push_back(make_reg_vec(regs, regs.rams.match.merge.mpr_thread_delay, + "regs.rams.match.merge.mpr_thread_delay", 1, 1, 0x1f)); +} + +/* disable power gating configuration for specific MAU regs to weedout delay programming + * issues. We dont expect to call this function in the normal usage of JBay - this is + * only for emulator debug/bringup + */ +template +static void disable_jbay_power_gating(REGS ®s) { + for (gress_t gress : Range(INGRESS, EGRESS)) { + regs.dp.mau_match_input_xbar_exact_match_enable[gress] |= 0x1; + regs.dp.xbar_hash.xbar.mau_match_input_xbar_ternary_match_enable[gress] |= 0x1; + } + + auto &xbar_power_ctl = regs.dp.match_input_xbar_din_power_ctl; + auto &actionmux_power_ctl = regs.dp.actionmux_din_power_ctl; + for (int side = 0; side < 2; side++) { + for (int reg = 0; reg < 16; reg++) { + xbar_power_ctl[side][reg] |= 0x3FF; + actionmux_power_ctl[side][reg] |= 0x3FF; + } + } +} + +template <> +void Stage::write_regs(Target::JBay::mau_regs ®s, bool) { + write_common_regs(regs); + auto &merge = regs.rams.match.merge; + for (gress_t gress : Range(INGRESS, EGRESS)) { + if (stageno == 0) { + merge.predication_ctl[gress].start_table_fifo_delay0 = pred_cycle(gress) - 2; + merge.predication_ctl[gress].start_table_fifo_enable = 1; + } else if (stage_dep[gress] == MATCH_DEP) { + merge.predication_ctl[gress].start_table_fifo_delay0 = + this[-1].pipelength(gress) - this[-1].pred_cycle(gress) + pred_cycle(gress) - 3; + merge.predication_ctl[gress].start_table_fifo_enable = 1; + } else { + BUG_CHECK(stage_dep[gress] == ACTION_DEP); + merge.predication_ctl[gress].start_table_fifo_delay0 = 0; + merge.predication_ctl[gress].start_table_fifo_enable = 0; + } + + if (stageno != 0) + regs.dp.cur_stage_dependency_on_prev[gress] = stage_dep[gress] != MATCH_DEP; + + /* set stage0 dependency if explicitly set by the commandline option */ + if (stageno == 0 && !options.stage_dependency_pattern.empty()) + regs.dp.cur_stage_dependency_on_prev[gress] = stage_dep[gress] != MATCH_DEP; + + if (stageno != AsmStage::numstages() - 1) + regs.dp.next_stage_dependency_on_cur[gress] = this[1].stage_dep[gress] != MATCH_DEP; + else if (AsmStage::numstages() < Target::NUM_MAU_STAGES()) + regs.dp.next_stage_dependency_on_cur[gress] = 1; + auto &deferred_eop_bus_delay = regs.rams.match.adrdist.deferred_eop_bus_delay[gress]; + deferred_eop_bus_delay.eop_internal_delay_fifo = pred_cycle(gress) + 2; + /* FIXME -- making this depend on the dependency of the next stage seems wrong */ + if (stageno == AsmStage::numstages() - 1) { + if (AsmStage::numstages() < Target::NUM_MAU_STAGES()) + deferred_eop_bus_delay.eop_output_delay_fifo = 1; + else + deferred_eop_bus_delay.eop_output_delay_fifo = pipelength(gress) - 2; + } else if (this[1].stage_dep[gress] == MATCH_DEP) { + deferred_eop_bus_delay.eop_output_delay_fifo = pipelength(gress) - 2; + } else { + deferred_eop_bus_delay.eop_output_delay_fifo = 1; + } + deferred_eop_bus_delay.eop_delay_fifo_en = 1; + if (stageno != AsmStage::numstages() - 1 && this[1].stage_dep[gress] == MATCH_DEP) { + merge.mpr_thread_delay[gress] = pipelength(gress) - pred_cycle(gress) - 4; + } else { + /* last stage in JBay must be always set as match-dependent on deparser */ + if (stageno == AsmStage::numstages() - 1) { + merge.mpr_thread_delay[gress] = pipelength(gress) - pred_cycle(gress) - 4; + } else { + merge.mpr_thread_delay[gress] = 0; + } + } + } + + for (gress_t gress : Range(INGRESS, EGRESS)) + if (table_use[gress] & USE_TCAM) + regs.tcams.tcam_piped |= options.match_compiler ? 3 : 1 << gress; + + for (gress_t gress : Range(INGRESS, EGRESS)) { + regs.cfg_regs.amod_pre_drain_delay[gress] = pipelength(gress) - 9; + if (this[1].stage_dep[gress] == MATCH_DEP) + regs.cfg_regs.amod_wide_bubble_rsp_delay[gress] = pipelength(gress) - 3; + else + regs.cfg_regs.amod_wide_bubble_rsp_delay[gress] = 0; + } + /* Max re-request limit with a long interval. Parb is going to have a large + * gap configured to minimize traffic hits during configuration this means + * that individual stages may not get their bubbles and will need to retry. */ + regs.cfg_regs.amod_req_interval = 6732; + regs.cfg_regs.amod_req_limit = 15; + + if (stageno == 0) { + /* MFerrera: "After some debug on the emulator, we've found a programming issue due to + * incorrect documentation and CSR description of match_ie_input_mux_sel in JBAY" + * MAU Stage 0 must always be configured to source iPHV from Parser-Arbiter + * Otherwise, MAU stage 0 is configured as match-dependent on Parser-Arbiter */ + regs.dp.match_ie_input_mux_sel |= 3; + } + + merge.pred_stage_id = stageno; + if (long_branch_terminate) merge.pred_long_brch_terminate = long_branch_terminate; + for (gress_t gress : Range(INGRESS, GHOST)) { + if (long_branch_thread[gress]) + merge.pred_long_brch_thread[gress] = long_branch_thread[gress]; + } + + for (gress_t gress : Range(INGRESS, GHOST)) { + merge.mpr_stage_id[gress] = mpr_stage_id[gress]; + for (int id = 0; id < LOGICAL_TABLES_PER_STAGE; ++id) { + merge.mpr_next_table_lut[gress][id] = mpr_next_table_lut[gress][id]; + } + } + for (int id = 0; id < LOGICAL_TABLES_PER_STAGE; ++id) { + merge.mpr_glob_exec_lut[id] = mpr_glob_exec_lut[id]; + } + for (int id = 0; id < MAX_LONGBRANCH_TAGS; ++id) { + merge.mpr_long_brch_lut[id] = mpr_long_brch_lut[id]; + } + merge.mpr_always_run = mpr_always_run; + + if (stageno != AsmStage::numstages() - 1) { + merge.mpr_bus_dep.mpr_bus_dep_ingress = this[1].stage_dep[INGRESS] != MATCH_DEP; + merge.mpr_bus_dep.mpr_bus_dep_egress = this[1].stage_dep[EGRESS] != MATCH_DEP; + } + + merge.mpr_bus_dep.mpr_bus_dep_glob_exec = mpr_bus_dep_glob_exec[INGRESS] | + mpr_bus_dep_glob_exec[EGRESS] | + mpr_bus_dep_glob_exec[GHOST]; + merge.mpr_bus_dep.mpr_bus_dep_long_brch = mpr_bus_dep_long_branch[INGRESS] | + mpr_bus_dep_long_branch[EGRESS] | + mpr_bus_dep_long_branch[GHOST]; + + merge.mpr_long_brch_thread = long_branch_thread[EGRESS]; + if (auto conflict = (long_branch_thread[INGRESS] | long_branch_thread[GHOST]) & + long_branch_thread[EGRESS]) { + // Should probably check this earlier, but there's not a good place to do it. + for (auto tag : bitvec(conflict)) { + error(long_branch_use[tag]->lineno, + "Need one-stage turnaround before reusing " + "long_branch tag %d in a different thread", + tag); + } + } + + bitvec in_use = match_use[INGRESS] | action_use[INGRESS] | action_set[INGRESS]; + bitvec eg_use = match_use[EGRESS] | action_use[EGRESS] | action_set[EGRESS]; + /* FIXME -- if the regs are live across a stage (even if not used in that stage) they + * need to be set in the thread registers. For now we just assume if they are used + * anywhere, they need to be marked as live */ + in_use |= Phv::use(INGRESS); + eg_use |= Phv::use(EGRESS); + static const int phv_use_transpose[2][14] = { + {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21}, + {4, 5, 6, 7, 12, 13, 14, 15, 22, 23, 24, 25, 26, 27}}; + // FIXME -- this code depends on the Phv::Register uids matching the + // FIXME -- mau encoding of phv containers. (FIXME-PHV) + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 14; j++) { + regs.dp.phv_ingress_thread[i][j] = regs.dp.phv_ingress_thread_imem[i][j] = + in_use.getrange(10 * phv_use_transpose[i][j], 10); + regs.dp.phv_egress_thread[i][j] = regs.dp.phv_egress_thread_imem[i][j] = + eg_use.getrange(10 * phv_use_transpose[i][j], 10); + } + } + + /* Things following are for debug/bringup only : not for normal flows */ + + if (options.disable_power_gating) { + disable_jbay_power_gating(regs); + } + + write_teop_regs(regs); +} + +void AlwaysRunTable::write_regs(Target::JBay::mau_regs ®s) { + if (gress == EGRESS) + regs.dp.imem_word_read_override.imem_word_read_override_egress = 1; + else + regs.dp.imem_word_read_override.imem_word_read_override_ingress = 1; + actions->write_regs(regs, this); +} diff --git a/backends/tofino/bf-asm/jbay/stateful.cpp b/backends/tofino/bf-asm/jbay/stateful.cpp new file mode 100644 index 00000000000..a429828b723 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/stateful.cpp @@ -0,0 +1,393 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/jbay/stateful.h" + +static const char *function_names[] = {"none", "log", "fifo", "stack", "clear"}; + +static int decode_push_pop(const value_t &v) { + static const std::map modes = { + {"hit", PUSH_HIT}, {"miss", PUSH_MISS}, {"gateway", PUSH_GATEWAY}, {"active", PUSH_ALL}}; + if (!CHECKTYPE(v, tSTR)) return 0; + if (!modes.count(v.s)) { + error(v.lineno, "Unknown push/pop mode %s", v.s); + return 0; + } + return modes.at(v.s); +} + +bool StatefulTable::setup_jbay(const pair_t &kv) { + if (kv.key == "sbus") { + // FIXME -- this should be in the stateful action setup as it is per action? + if (!CHECKTYPE(kv.value, tMAP)) return true; + for (auto &el : kv.value.map) { + if (el.key == "match") { + parse_vector(sbus_match, el.value); + } else if (el.key == "learn") { + parse_vector(sbus_learn, el.value); + } else if (el.key == "operation" || el.key == "combine") { + if (el.value == "and") + sbus_comb = SBUS_AND; + else if (el.value == "or") + sbus_comb = SBUS_OR; + else + error(el.value.lineno, "Invalid sbus %s %s, must be 'and' or 'or'", + value_desc(el.key), value_desc(el.value)); + } else { + warning(el.key.lineno, "ignoring unknown item %s in sbus of table %s", + value_desc(el.key), name()); + } + } + } else if (kv.key == "fifo" || kv.key == "stack") { + if (stateful_counter_mode) { + error(kv.key.lineno, "Conflicting log counter functions in %s", name()); + return true; + } + if (kv.key == "fifo") + stateful_counter_mode = FUNCTION_FIFO; + else if (kv.key == "stack") + stateful_counter_mode = FUNCTION_STACK; + if (!CHECKTYPE(kv.value, tMAP)) return true; + for (auto &el : MapIterChecked(kv.value.map)) { + if (el.key == "push") + stateful_counter_mode |= decode_push_pop(el.value); + else if (el.key == "pop") + stateful_counter_mode |= decode_push_pop(el.value) << PUSHPOP_BITS; + else + error(el.key.lineno, "Syntax error, expecting push or pop"); + } + } else if (kv.key == "clear") { + if (stateful_counter_mode) { + error(kv.key.lineno, "Conflicting log counter functions in %s", name()); + return true; + } + stateful_counter_mode = FUNCTION_FAST_CLEAR; + stateful_counter_mode |= decode_push_pop(kv.value); + } else if (kv.key == "watermark") { + if (kv.value == "pop") + watermark_pop_not_push = 1; + else if (kv.value != "push") + error(kv.value.lineno, "Syntax error, expecting push or pop"); + if (kv.value.type == tSTR) + watermark_level = 1; + else if (CHECKTYPE(kv.value[1], tINT)) + watermark_level = kv.value[1].i / 128; + if (kv.value[1].i % 128 != 0) + error(kv.value[1].lineno, "watermark level must be a mulitple of 128"); + } else if (kv.key == "underflow") { + if (CHECKTYPE(kv.value, tSTR)) underflow_action = kv.value; + } else if (kv.key == "overflow") { + if (CHECKTYPE(kv.value, tSTR)) overflow_action = kv.value; + } else if (kv.key == "offset_vpn") { + offset_vpn = get_bool(kv.value); + } else if (kv.key == "address_shift") { + if (CHECKTYPE(kv.value, tINT)) meter_adr_shift = kv.value.i; + } else if (kv.key == "phv_hash_shift") { + if (CHECKTYPE(kv.value, tINT)) { + phv_hash_shift = kv.value.i / 8U; + if (kv.value.i % 8U != 0) + error(kv.value.lineno, "phv_hash_shift must be a mulitple of 8"); + else if (phv_hash_shift < 0 || phv_hash_shift > 15) + error(kv.value.lineno, "phv_hash_shift %" PRId64 " out of range", kv.value.i); + } + } else if (kv.key == "phv_hash_mask") { + if (CHECKTYPE2(kv.value, tINT, tBIGINT)) phv_hash_mask = get_bitvec(kv.value); + } else if (kv.key == "stage_alu_id") { + if (CHECKTYPE(kv.value, tINT)) { + if (kv.value.i < 0 || kv.value.i >= 128) + error(kv.value.lineno, "invalid stage_alu_id %" PRIi64, kv.value.i); + stage_alu_id = kv.value.i; + } + } else { + return false; + } + return true; +} + +int parse_jbay_counter_mode(const value_t &v) { + int rv = 0; + if (v == "counter") + rv = FUNCTION_LOG; + else if (v == "fifo") + rv = FUNCTION_FIFO; + else if (v == "stack") + rv = FUNCTION_STACK; + else if (v == "clear") + rv = FUNCTION_FAST_CLEAR; + else + return -1; + if (v.type == tSTR) return rv | PUSH_ALL; + if (v.type != tCMD) return -1; + int flag = 0; + for (int i = 1; i < v.vec.size; ++i) { + if (v[i] == "hit") { + flag |= PUSH_HIT; + } else if (v[i] == "miss") { + flag |= PUSH_MISS; + } else if (v[i] == "gateway") { + flag |= PUSH_GATEWAY; + } else if (v[i] == "gw0") { + flag |= PUSH_GW_ENTRY; + } else if (v[i] == "gw1") { + flag |= (PUSH_GW_ENTRY << 1); + } else if (v[i] == "gw2") { + flag |= (PUSH_GW_ENTRY << 2); + } else if (v[i] == "gw3") { + flag |= (PUSH_GW_ENTRY << 3); + } else if (v[i] == "push" && (rv & FUNCTION_MASK) != FUNCTION_LOG) { + rv |= flag ? flag : PUSH_ALL; + flag = 0; + } else if (v[i] == "pop" && (rv & FUNCTION_MASK) != FUNCTION_LOG) { + rv |= (flag ? flag : PUSH_ALL) << PUSHPOP_BITS; + flag = 0; + } else { + return -1; + } + } + return rv | flag; +} +int StatefulTable::parse_counter_mode(Target::JBay target, const value_t &v) { + return parse_jbay_counter_mode(v); +} + +void StatefulTable::set_counter_mode(Target::JBay target, int mode) { + int fnmode = mode & FUNCTION_MASK; + BUG_CHECK(fnmode > 0 && (fnmode >> FUNCTION_SHIFT) <= FUNCTION_FAST_CLEAR); + if (stateful_counter_mode && (stateful_counter_mode & FUNCTION_MASK) != fnmode) + error(lineno, "Incompatible uses (%s and %s) of stateful alu counters", + function_names[stateful_counter_mode >> FUNCTION_SHIFT], + function_names[mode >> FUNCTION_SHIFT]); + else + stateful_counter_mode |= fnmode; + if (mode & PUSH_MASK) stateful_counter_mode |= PUSH_ANY; + if (mode & POP_MASK) stateful_counter_mode |= POP_ANY; +} + +// DANGER -- nasty hack to set the raw bits of an SALU state alu instruction +// really need to make the csr2cpp codegen handle this automatically +template +void set_raw_instr_bits(checked_array<4, T> ®, bitvec v) { + for (int i = 0; i < 4; ++i) { + reg[i].salu_const_src = v.getrange(i * 32, 4); + reg[i].salu_regfile_const = v.getrange(i * 32 + 4, 1); + reg[i].salu_bsrc_input = v.getrange(i * 32 + 5, 3); + reg[i].salu_asrc_input = v.getrange(i * 32 + 8, 3); + reg[i].salu_op = v.getrange(i * 32 + 11, 4); + reg[i].salu_arith = v.getrange(i * 32 + 15, 1); + reg[i].salu_pred = v.getrange(i * 32 + 16, 16); + } +} + +static int counter_to_use(MatchTable *m) { + for (auto st : m->get_attached()->statefuls) return st->to()->meter_group(); + BUG("no attached stateful table?"); + return 0; +} + +template +void StatefulTable::write_tofino2_common_regs(REGS ®s) { + auto &adrdist = regs.rams.match.adrdist; + auto &merge = regs.rams.match.merge; + auto &vpn_range = adrdist.mau_meter_alu_vpn_range[meter_group()]; + auto &salu = regs.rams.map_alu.meter_group[meter_group()].stateful; + int minvpn, maxvpn; + layout_vpn_bounds(minvpn, maxvpn, true); + vpn_range.meter_vpn_base = minvpn; + vpn_range.meter_vpn_limit = maxvpn; + vpn_range.meter_vpn_range_check_enable = 1; + int counter_idx = -1; + Actions::Action *sweep_action = nullptr; + for (MatchTable *m : match_tables) { + int mode = 0; + if (auto *call = m->get_call(this)) { + if (call->args.at(0).type == Call::Arg::Counter) { + mode = call->args.at(0).count_mode(); + if (counter_idx < 0) + counter_idx = counter_to_use(m); + else + BUG_CHECK(counter_idx == counter_to_use(m), "conflicting counter use in %s", + name()); + } + if ((mode & FUNCTION_MASK) == FUNCTION_FAST_CLEAR) { + for (auto &a : *m->get_actions()) { + if (auto *sw = action_for_table_action(m, &a)) { + BUG_CHECK(!sweep_action || sw == sweep_action, + "Inconsistent sweep action for %s", name()); + sweep_action = sw; + } + } + } + } + if (address_used) { + auto &slog_map = adrdist.mau_stateful_log_counter_logical_map[m->logical_id]; + slog_map.stateful_log_counter_logical_map_ctl = meter_group(); + slog_map.stateful_log_counter_logical_map_enable = 1; + } + if (mode) { + merge.mau_stateful_log_counter_ctl[m->logical_id / 8U][0].set_subfield( + mode & PUSHPOP_MASK, 4 * (m->logical_id % 8U), 4); + merge.mau_stateful_log_counter_ctl[m->logical_id / 8U][1].set_subfield( + (mode >> PUSHPOP_BITS) & PUSHPOP_MASK, 4 * (m->logical_id % 8U), 4); + for (auto &rep : merge.mau_stateful_log_ctl_ixbar_map[m->logical_id / 8U]) { + if (mode & PUSHPOP_MASK) + rep[0].set_subfield(counter_idx | 0x4, 3 * (m->logical_id % 8U), 3); + if ((mode >> PUSHPOP_BITS) & PUSHPOP_MASK) + rep[1].set_subfield(counter_idx | 0x4, 3 * (m->logical_id % 8U), 3); + } + } + if (address_used) + adrdist.meter_alu_adr_range_check_icxbar_map[meter_group()] |= 1U << m->logical_id; + if (offset_vpn) { + if (!address_used) + warning(lineno, + "Adjusting output address of %s for next stage, but noone is " + "reading it", + name()); + adrdist.mau_stateful_log_stage_vpn_offset[m->logical_id].stateful_log_stage_vpn_offset = + maxvpn - minvpn + 1; + // state_instr_width_logical and stateful_log_stage_vpn_offset + // should be set or unset together as they are both used for the + // stateful logging fifo feature. See figure 6-73 in jbay uarch. + adrdist.stateful_instr_width_logical[m->logical_id] = format->log2size - 3; + } + } + switch (meter_group()) { + case 0: + adrdist.meter_adr_shift.meter_adr_shift0 = meter_adr_shift; + break; + case 1: + adrdist.meter_adr_shift.meter_adr_shift1 = meter_adr_shift; + break; + case 2: + adrdist.meter_adr_shift.meter_adr_shift2 = meter_adr_shift; + break; + case 3: + adrdist.meter_adr_shift.meter_adr_shift3 = meter_adr_shift; + break; + } + if (counter_idx >= 0) { + auto &oxbar_map = adrdist.mau_stateful_log_counter_oxbar_map[meter_group()]; + oxbar_map.stateful_log_counter_oxbar_ctl = counter_idx; + oxbar_map.stateful_log_counter_oxbar_enable = 1; + } + auto &ctl2 = merge.mau_stateful_log_counter_ctl2[meter_group()]; + auto &ctl3 = merge.mau_stateful_log_counter_ctl3[meter_group()]; + if (stateful_counter_mode && (stateful_counter_mode & FUNCTION_MASK) != FUNCTION_FAST_CLEAR) { + ctl2.slog_counter_function = stateful_counter_mode >> FUNCTION_SHIFT; + ctl2.slog_instruction_width = format->log2size - 3; + if ((stateful_counter_mode & PUSH_ANY) == 0) ctl2.slog_push_event_ctl = 1; + if ((stateful_counter_mode & POP_ANY) == 0) ctl2.slog_pop_event_ctl = 1; + ctl2.slog_vpn_base = logvpn_min; + ctl2.slog_vpn_limit = logvpn_max; + if (watermark_level) { + ctl2.slog_watermark_ctl = watermark_pop_not_push; + ctl2.slog_watermark_enable = 1; + merge.mau_stateful_log_watermark_threshold[meter_group()] = watermark_level; + } + if (underflow_action.set()) { + auto act = actions->action(underflow_action.name); + BUG_CHECK(act); + // 4-bit stateful addr MSB encoding for instruction, as given by table 6-67 (6.4.4.11) + ctl3.slog_underflow_instruction = act->code * 2 + 1; + } + if (overflow_action.set()) { + auto act = actions->action(overflow_action.name); + BUG_CHECK(act); + ctl3.slog_overflow_instruction = act->code * 2 + 1; + } + } else { + // we set up for fast clear from the control plane if the counter mode is unused + ctl2.slog_counter_function = FUNCTION_FAST_CLEAR >> FUNCTION_SHIFT; + ctl2.slog_instruction_width = 4; // 128 bits + ctl2.slog_vpn_base = minvpn; + ctl2.slog_vpn_limit = maxvpn; + if (busy_value) salu.stateful_clear_action_output = busy_value; + if (clear_value) { + set_raw_instr_bits(salu.salu_instr_state_alu[3], clear_value); + salu.stateful_ctl.salu_clear_value_ctl = 1; + } + if (sweep_action) { + ctl3.slog_overflow_instruction = sweep_action->code * 2 + 1; + } else { + ctl3.slog_overflow_instruction = 0x6; + } + } + regs.rams.map_alu.meter_alu_group_phv_hash_shift[meter_group()] = phv_hash_shift; + unsigned idx = 0; + for (auto &slice : regs.rams.map_alu.meter_alu_group_phv_hash_mask[meter_group()]) + slice = phv_hash_mask.getrange(32 * idx++, 32); + + for (size_t i = 0; i < const_vals.size(); ++i) { + if (const_vals[i].value > (INT64_C(1) << 33) || const_vals[i].value <= -(INT64_C(1) << 33)) + error(const_vals[i].lineno, "constant value %" PRId64 " too large for stateful alu", + const_vals[i].value); + salu.salu_const_regfile[i] = const_vals[i].value & 0xffffffffU; + salu.salu_const_regfile_msbs[i] = (const_vals[i].value >> 32) & 0x3; + } + if (stage_alu_id >= 0) { + salu.stateful_ctl.salu_stage_id = stage_alu_id; + salu.stateful_ctl.salu_stage_id_enable = 1; + } +} + +// This is called write_logging_regs, but it handles all tofino2+ target specific +// registers, as write_regs is not specialized and this is. Should rename? +template <> +void StatefulTable::write_logging_regs(Target::JBay::mau_regs ®s) { + write_tofino2_common_regs(regs); +} + +/// Compute the proper value for the register +/// map_alu.meter_alu_group_data_delay_ctl[].meter_alu_right_group_delay +/// which controls the two halves of the ixbar->meter_alu fifo, based on a bytemask of which +/// bytes are needed in the meter_alu. On JBay, the fifo is 128 bits wide, so each enable +/// bit controls 64 bits +int AttachedTable::meter_alu_fifo_enable_from_mask(Target::JBay::mau_regs &, unsigned bytemask) { + int rv = 0; + if (bytemask & 0xff) rv |= 1; + if (bytemask & 0xff00) rv |= 2; + return rv; +} + +void StatefulTable::gen_tbl_cfg(Target::JBay, json::map &tbl, json::map &stage_tbl) const { + static const char *table_type[] = {"normal", "log", "fifo", "stack", "bloom_clear"}; + if (tbl["stateful_table_type"]) { + // overall table info already set in an earlier stage; don't override it + return; + } + tbl["stateful_table_type"] = table_type[stateful_counter_mode >> FUNCTION_SHIFT]; + bool has_push = (stateful_counter_mode & PUSHPOP_MASK) != 0; + bool has_pop = (stateful_counter_mode & (PUSHPOP_MASK << PUSHPOP_BITS)) != 0; + for (MatchTable *m : match_tables) { + if (auto *call = m->get_call(this)) { + if (call->args.at(0).type == Call::Arg::Counter) { + unsigned mode = call->args.at(0).count_mode(); + has_push |= (mode & PUSHPOP_MASK) != 0; + has_pop |= (mode & (PUSHPOP_MASK << PUSHPOP_BITS)) != 0; + } + } + } + if (has_push) { + if (has_pop) + tbl["stateful_direction"] = "inout"; + else + tbl["stateful_direction"] = "in"; + } else if (has_pop) { + tbl["stateful_direction"] = "out"; + } + tbl["stateful_counter_index"] = meter_group(); +} diff --git a/backends/tofino/bf-asm/jbay/stateful.h b/backends/tofino/bf-asm/jbay/stateful.h new file mode 100644 index 00000000000..91d9a8cb8a4 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/stateful.h @@ -0,0 +1,57 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_JBAY_STATEFUL_H_ +#define BACKENDS_TOFINO_BF_ASM_JBAY_STATEFUL_H_ + +#include "backends/tofino/bf-asm/tables.h" +#include "backends/tofino/bf-asm/target.h" + +// FIXME -- should be a namespace somwhere? Or in class StatefulTable +/* for jbay counter mode, we may need both a push and a pop mode, as well as counter_function, + * so we pack them all into an int with some shifts and masks */ +enum { + PUSHPOP_BITS = 5, + PUSHPOP_MASK = 0xf, + PUSHPOP_ANY = 0x10, + PUSH_MASK = PUSHPOP_MASK, + PUSH_ANY = PUSHPOP_ANY, + POP_MASK = PUSHPOP_MASK << PUSHPOP_BITS, + POP_ANY = PUSHPOP_ANY << PUSHPOP_BITS, + PUSH_MISS = 1, + PUSH_HIT = 2, + PUSH_GATEWAY = 3, + PUSH_ALL = 4, + PUSH_GW_ENTRY = 5, + POP_MISS = PUSH_MISS << PUSHPOP_BITS, + POP_HIT = PUSH_HIT << PUSHPOP_BITS, + POP_GATEWAY = PUSH_GATEWAY << PUSHPOP_BITS, + POP_ALL = PUSH_ALL << PUSHPOP_BITS, + POP_GW_ENTRY = PUSH_GW_ENTRY << PUSHPOP_BITS, + FUNCTION_SHIFT = 2 * PUSHPOP_BITS, + FUNCTION_LOG = 1 << FUNCTION_SHIFT, + FUNCTION_FIFO = 2 << FUNCTION_SHIFT, + FUNCTION_STACK = 3 << FUNCTION_SHIFT, + FUNCTION_FAST_CLEAR = 4 << FUNCTION_SHIFT, + FUNCTION_MASK = 0xf << FUNCTION_SHIFT, +}; + +int parse_jbay_counter_mode(const value_t &v); +template <> +void StatefulTable::write_logging_regs(Target::JBay::mau_regs ®s); + +#endif /* BACKENDS_TOFINO_BF_ASM_JBAY_STATEFUL_H_ */ diff --git a/backends/tofino/bf-asm/jbay/template_objects.yaml b/backends/tofino/bf-asm/jbay/template_objects.yaml new file mode 100644 index 00000000000..9dc2f8ae478 --- /dev/null +++ b/backends/tofino/bf-asm/jbay/template_objects.yaml @@ -0,0 +1,121 @@ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 + +global: + - namespace=JBay + - binary_offset + - emit_binary + - emit_fieldname + - emit_json + - enable_disable + - input_binary + - write_dma=mapram_config + - write_dma=imem_dark_subword8 + - write_dma=imem_dark_subword16 + - write_dma=imem_dark_subword32 + - write_dma=imem_mocha_subword8 + - write_dma=imem_mocha_subword16 + - write_dma=imem_mocha_subword32 + - write_dma=imem_subword8 + - write_dma=imem_subword16 + - write_dma=imem_subword32 + - write_dma=galois_field_matrix +generate: + memories: + jbay_mem: + memories.jbay_mem.h: [ decl, name=memories.top ] + memories.jbay_mem.cpp: [ defn, name=memories.top, + -Imemories.jbay_mem.h, -Imemories.pipe_addrmap.h ] + pipe_addrmap: # pipes + memories.pipe_addrmap.h: [ decl, name=memories.pipe, widereg ] + memories.pipe_addrmap.cpp: [ defn, name=memories.pipe, widereg, + -Imemories.pipe_addrmap.h, -Imemories.prsr_mem_main_rspec.h ] + # parde_mem -- parde + prsr_mem_main_rspec: # i_prsr_mem e_prsr_mem + memories.prsr_mem_main_rspec.h: [ decl, name=memories.parser.%s ] + memories.prsr_mem_main_rspec.cpp: [ defn, name=memories.parser.%s, + -Imemories.prsr_mem_main_rspec.h ] + + regs: + jbay_reg: + regs.jbay_reg.h: [ decl, name=regs.top ] + regs.jbay_reg.cpp: [ defn, name=regs.top, + -Iregs.jbay_reg.h, -Iregs.pipe_addrmap.h ] + pipe_addrmap: # pipea + regs.pipe_addrmap.h: [ decl, name=regs.pipe, widereg ] + regs.pipe_addrmap.cpp: [ defn, name=regs.pipe, widereg, + -Iregs.pipe_addrmap.h, -Iregs.ipb_prsr4_reg.h, -Iregs.epb_prsr4_reg.h, + -Iregs.pmerge_reg.h, -Iregs.mau_addrmap.h, -Iregs.dprsr_reg.h ] + mau_addrmap: # mau + regs.mau_addrmap.h: [ decl, name=regs.match_action_stage.%02x ] + regs.mau_addrmap.cpp: [ defn, name=regs.match_action_stage.%02x, + -Iregs.mau_addrmap.h ] + # parde_glue_stn_reg + ipb_prsr4_reg: # ipbprsr4reg + regs.ipb_prsr4_reg.h: [ decl, name=regs.parser.ingress ] + regs.ipb_prsr4_reg.cpp: [ defn, name=regs.parser.ingress, + -Iregs.ipb_prsr4_reg.h, -Iregs.prsr_reg_main_rspec.h ] + prsr_reg_main_rspec: # prsr + regs.prsr_reg_main_rspec.h: [ decl, name=regs.parser.main.%s ] + regs.prsr_reg_main_rspec.cpp: [ defn, name=regs.parser.main.%s, + -Iregs.prsr_reg_main_rspec.h ] + pmerge_reg: # pmergereg + regs.pmerge_reg.h: [ decl, name=regs.parse_merge ] + regs.pmerge_reg.cpp: [ defn, name=regs.parse_merge, + -Iregs.pmerge_reg.h ] + epb_prsr4_reg: # epbprsr4reg + regs.epb_prsr4_reg.h: [ decl, name=regs.parser.egress ] + regs.epb_prsr4_reg.cpp: [ defn, name=regs.parser.egress, + -Iregs.epb_prsr4_reg.h, -Iregs.prsr_reg_main_rspec.h ] + # prsr_reg_main_rspec # prsr + # mirr_ebuf_reg + dprsr_reg: + regs.dprsr_reg.h: [ decl, name=regs.deparser ] + regs.dprsr_reg.cpp: [ defn, name=regs.deparser, + -Iregs.dprsr_reg.h ] + +ignore: + memories: + # jbay_mem + - tm_top_mem_rspec # tm + # pipes + - mau_addrmap # mau -- just a dummy reg + # parde + - pgr_mem_rspec + + regs: + # jbay_reg + - dvsl_addrmap + - eth100g_addrmap + - eth400g_addrmap + - gpio_regs + - serdes_addrmap + # pipes + # mau + # pardereg + # parde_glue_stn_reg + # parb_reg + - ebuf900_reg + - pbus_station_regs_rspec + - pgr_reg_rspec # pgrreg + - s2p_reg + - p2s_reg + - parde_glue_reg_rspec #pgluereg + # mirr_ebuf_reg # mirefreg + # dprsr_reg + # dprsr_reg_rspec + - parde_dprsr_reg_rspec diff --git a/backends/tofino/bf-asm/json.cpp b/backends/tofino/bf-asm/json.cpp new file mode 100644 index 00000000000..4f1d1572024 --- /dev/null +++ b/backends/tofino/bf-asm/json.cpp @@ -0,0 +1,253 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/json.h" + +#include +#include + +#include "lib/hex.h" + +namespace json { + +static int digit_value(char ch) { + if (ch >= 'a') return ch - 'a' + 10; + if (ch >= 'A') return ch - 'A' + 10; + if (ch >= '0' && ch <= '9') return ch - '0'; + return 999; +} + +// true iff the string ends in an odd number of '\' characters +static bool odd_backslash(const std::string &s) { + int cnt = 0; + for (int i = s.size() - 1; i >= 0; --i) { + if (s[i] != '\\') break; + cnt++; + } + return (cnt & 1) == 1; +} + +std::istream &operator>>(std::istream &in, std::unique_ptr &json) { + while (in) { + bool neg = false; + char ch; + int base = 10, digit; + in >> ch; + switch (ch) { + case '-': + neg = true; + in >> ch; + if (ch != '0') goto digit; + /* fall through */ + case '0': + base = 8; + in >> ch; + if (ch == 'x' || ch == 'X') { + base = 16; + in >> ch; + } else if (ch == 'b') { + base = 2; + in >> ch; + } + /* fall through */ + digit: + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + int64_t l = 0; + while (in && (digit = digit_value(ch)) < base) { + if ((INT64_MAX - digit) / base < l) { + std::cerr << "overflow detected" << std::endl; + } + l = l * base + digit; + in >> ch; + } + if (in) in.unget(); + if (neg) l = -l; + json.reset(new number(l)); + return in; + } + case '"': { + std::string s; + getline(in, s, '"'); + while (odd_backslash(s)) { + std::string tmp; + getline(in, tmp, '"'); + s += '\"'; + s += tmp; + } + json.reset(new string(std::move(s))); + return in; + } + case '[': { + std::unique_ptr rv(new vector()); + in >> ch; + if (ch != ']') { + in.unget(); + do { + std::unique_ptr o; + in >> o >> ch; + rv->push_back(std::move(o)); + if (ch != ',' && ch != ']') { + std::cerr << "missing ',' in vector (saw '" << ch << "')" << std::endl; + in.unget(); + } + } while (in && ch != ']'); + } + json = std::move(rv); + return in; + } + case '{': { + std::unique_ptr rv(new map()); + in >> ch; + if (ch != '}') { + in.unget(); + do { + std::unique_ptr key, val; + in >> key >> ch; + if (ch == '}') { + std::cerr << "missing value in map" << std::endl; + } else { + if (ch != ':') { + std::cerr << "missing ':' in map (saw '" << ch << "')" << std::endl; + in.unget(); + } + in >> val >> ch; + } + if (rv->count(key.get())) + std::cerr << "duplicate key in map" << std::endl; + else + (*rv)[std::move(key)] = std::move(val); + if (ch != ',' && ch != '}') { + std::cerr << "missing ',' in map (saw '" << ch << "')" << std::endl; + in.unget(); + } + } while (in && ch != '}'); + } + json = std::move(rv); + return in; + } + default: + if (isalpha(ch) || ch == '_') { + std::string s; + while (isalnum(ch) || ch == '_') { + s += ch; + if (!(in >> ch)) break; + } + in.unget(); + if (s == "true") + json.reset(new True()); + else if (s == "false") + json.reset(new False()); + else if (s == "null") + json.reset(); + else + json.reset(new string(std::move(s))); + return in; + } else { + std::cerr << "unexpected character '" << ch << "' (0x" << hex(ch) << ")" + << std::endl; + } + } + } + return in; +} + +void vector::print_on(std::ostream &out, int indent, int width, const char *pfx) const { + int twidth = width; + bool first = true; + bool oneline = test_width(twidth); + out << '['; + indent += 2; + for (auto &e : *this) { + if (!first) out << ','; + if (!oneline) out << '\n' << pfx << std::setw(indent); + out << ' ' << std::setw(0); + if (e) + e->print_on(out, indent, width - 2, pfx); + else + out << "null"; + first = false; + } + indent -= 2; + if (!first) out << (oneline ? ' ' : '\n'); + if (!oneline) out << std::setw(indent + 1); + out << ']'; +} + +void map::print_on(std::ostream &out, int indent, int width, const char *pfx) const { + int twidth = width; + bool first = true; + bool oneline = test_width(twidth); + // std::cout << "*** width=" << width << " twdith=" << twidth << std::endl; + out << '{'; + indent += 2; + for (auto &e : *this) { + if (!first) out << ','; + if (!oneline) out << '\n' << pfx << std::setw(indent); + out << ' ' << std::setw(0); + e.first->print_on(out, indent, width - 2, pfx); + out << ": "; + if (e.second) + e.second->print_on(out, indent, width - 2, pfx); + else + out << "null"; + first = false; + } + indent -= 2; + if (!first) out << (oneline ? ' ' : '\n'); + if (!oneline) out << std::setw(indent + 1); + out << '}'; +} + +std::string obj::toString() const { + std::stringstream buf; + print_on(buf); + return buf.str(); +} + +map &map::merge(const map &a) { + for (auto &el : a) { + if (!el.second) { + erase(el.first); + } else if (count(el.first)) { + auto &exist = at(el.first); + if (exist->is() && el.second->is()) { + exist->to().merge(el.second->to()); + } else if (exist->is() && el.second->is()) { + auto &vec = exist->to(); + for (auto &vel : el.second->to()) vec.push_back(vel->clone()); + } else { + exist = el.second->clone(); + } + } else { + emplace(el.first->clone().release(), el.second->clone()); + } + } + return *this; +} + +} // namespace json + +void dump(const json::obj &o) { std::cout << &o << std::endl; } +void dump(const json::obj *o) { std::cout << o << std::endl; } diff --git a/backends/tofino/bf-asm/json.h b/backends/tofino/bf-asm/json.h new file mode 100644 index 00000000000..27f707e9fa3 --- /dev/null +++ b/backends/tofino/bf-asm/json.h @@ -0,0 +1,641 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_JSON_H_ // NOLINT(build/header_guard) +#define BACKENDS_TOFINO_BF_ASM_JSON_H_ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "backends/tofino/bf-asm/rvalue_reference_wrapper.h" +#include "lib/ordered_map.h" + +using namespace P4; + +namespace json { + +/* this is std::make_unique, except that is missing in some compilers/versions. We give + * it a different name as other compilers complain about ambiguities if we don't... */ +template +std::unique_ptr mkuniq(Args &&...args) { + std::unique_ptr ret(new T(std::forward(args)...)); + return ret; +} + +class number; +class string; +class vector; +class map; + +class obj { + public: + obj() {} + obj(const obj &) = default; + obj(obj &&) = default; + obj &operator=(const obj &) & = default; + obj &operator=(obj &&) & = default; + virtual ~obj() {} + virtual bool operator<(const obj &a) const = 0; + bool operator>=(const obj &a) const { return !(*this < a); } + bool operator>(const obj &a) const { return a < *this; } + bool operator<=(const obj &a) const { return !(a < *this); } + virtual bool operator==(const obj &a) const = 0; + bool operator!=(const obj &a) const { return !(*this == a); } + virtual bool operator==(const char * /*str*/) const { return false; } + virtual bool operator==(const std::string & /*str*/) const { return false; } + virtual bool operator==(const string & /*str*/) const { return false; } + bool operator!=(const char *str) const { return !(*this == str); } + virtual bool operator==(int64_t /*val*/) const { return false; } + bool operator!=(int64_t val) const { return !(*this == val); } + struct ptrless { + bool operator()(const obj *a, const obj *b) const { return b ? a ? *a < *b : true : false; } + bool operator()(const std::unique_ptr &a, const std::unique_ptr &b) const { + return b ? a ? *a < *b : true : false; + } + }; + virtual void print_on(std::ostream &out, int /*indent*/ = 0, int /*width*/ = 80, + const char * /*pfx*/ = "") const = 0; + virtual bool test_width(int &limit) const = 0; + virtual number *as_number() { return nullptr; } + virtual const number *as_number() const { return nullptr; } + virtual string *as_string() { return nullptr; } + virtual const string *as_string() const { return nullptr; } + virtual vector *as_vector() { return nullptr; } + virtual const vector *as_vector() const { return nullptr; } + virtual map *as_map() { return nullptr; } + virtual const map *as_map() const { return nullptr; } + virtual const char *c_str() const { return nullptr; } + template + bool is() const { + return dynamic_cast(this) != nullptr; + } + template + T &to() { + return dynamic_cast(*this); + } + template + const T &to() const { + return dynamic_cast(*this); + } + virtual std::unique_ptr copy() && = 0; // Creates a shallow copy of unique_ptr + virtual std::unique_ptr clone() const = 0; // Creates a deep copy of obj + static std::unique_ptr clone_ptr(const std::unique_ptr &a) { + return a ? a->clone() : std::unique_ptr(); + } + std::string toString() const; +}; + +class True : public obj { + bool operator<(const obj &a) const { + return std::type_index(typeid(*this)) < std::type_index(typeid(a)); + } + bool operator==(const obj &a) const { return dynamic_cast(&a) != 0; } + void print_on(std::ostream &out, int /*indent*/ = 0, int /*width*/ = 80, + const char * /*pfx*/ = "") const { + out << "true"; + } + bool test_width(int &limit) const { + limit -= 4; + return limit >= 0; + } + std::unique_ptr copy() && { return mkuniq(std::move(*this)); } + std::unique_ptr clone() const { return mkuniq(); } +}; + +class False : public obj { + bool operator<(const obj &a) const { + return std::type_index(typeid(*this)) < std::type_index(typeid(a)); + } + bool operator==(const obj &a) const { return dynamic_cast(&a) != 0; } + void print_on(std::ostream &out, int /*indent*/ = 0, int /*width*/ = 80, + const char * /*pfx*/ = "") const { + out << "false"; + } + bool test_width(int &limit) const { + limit -= 5; + return limit >= 0; + } + std::unique_ptr copy() && { return mkuniq(std::move(*this)); } + std::unique_ptr clone() const { return mkuniq(); } +}; + +class number : public obj { + public: + int64_t val; + explicit number(int64_t l) : val(l) {} + ~number() {} + bool operator<(const obj &a) const override { + if (auto *b = dynamic_cast(&a)) return val < b->val; + return std::type_index(typeid(*this)) < std::type_index(typeid(a)); + } + bool operator==(const obj &a) const override { + if (auto *b = dynamic_cast(&a)) return val == b->val; + return false; + } + bool operator==(int64_t v) const override { return val == v; } + void print_on(std::ostream &out, int /*indent*/ = 0, int /*width*/ = 80, + const char * /*pfx*/ = "") const override { + out << val; + } + bool test_width(int &limit) const override { + char buf[32]; + limit -= snprintf(buf, sizeof(buf), "%" PRId64, val); + return limit >= 0; + } + number *as_number() override { return this; } + const number *as_number() const override { return this; } + std::unique_ptr copy() && override { return mkuniq(std::move(*this)); } + std::unique_ptr clone() const override { return mkuniq(val); } +}; + +class string : public obj, public std::string { + public: + string() {} + string(const string &) = default; + string(const std::string &a) : std::string(a) {} // NOLINT(runtime/explicit) + string(const char *a) : std::string(a) {} // NOLINT(runtime/explicit) + string(string &&) = default; + string(std::string &&a) : std::string(a) {} // NOLINT + string(int64_t l) : std::string(std::to_string(l)) {} // NOLINT + string &operator=(const string &) & = default; + string &operator=(string &&) & = default; + ~string() {} + bool operator<(const obj &a) const override { + if (const string *b = dynamic_cast(&a)) + return static_cast(*this) < static_cast(*b); + return std::type_index(typeid(*this)) < std::type_index(typeid(a)); + } + bool operator==(const obj &a) const override { + if (const string *b = dynamic_cast(&a)) + return static_cast(*this) == static_cast(*b); + return false; + } + bool operator==(const string &a) const override { + return static_cast(*this) == static_cast(a); + } + bool operator==(const char *str) const override { + return static_cast(*this) == str; + } + bool operator==(const std::string &str) const override { + return static_cast(*this) == str; + } + void print_on(std::ostream &out, int /*indent*/ = 0, int /*width*/ = 80, + const char * /*pfx*/ = "") const override { + out << '"' << *this << '"'; + } + bool test_width(int &limit) const override { + limit -= size() + 2; + return limit >= 0; + } + const char *c_str() const override { return std::string::c_str(); } + string *as_string() override { return this; } + const string *as_string() const override { return this; } + std::unique_ptr copy() && override { return mkuniq(std::move(*this)); } + std::unique_ptr clone() const override { return mkuniq(*this); } +}; + +class map; // forward decl + +typedef std::vector> vector_base; +class vector : public obj, public vector_base { + public: + vector() {} + vector(const vector &) = delete; + vector(vector &&) = default; + vector(const std::initializer_list> &init) { + for (auto o : init) push_back(o.get().copy()); + } + vector &operator=(const vector &) & = delete; + vector &operator=(vector &&) & = default; + ~vector() {} + bool operator<(const obj &a) const override { + if (const vector *b = dynamic_cast(&a)) { + auto p1 = begin(), p2 = b->begin(); + while (p1 != end() && p2 != b->end()) { + if (**p1 < **p2) return true; + if (**p1 != **p2) return false; + p1++; + p2++; + } + return p2 != b->end(); + } + return std::type_index(typeid(*this)) < std::type_index(typeid(a)); + } + using obj::operator<=; + using obj::operator>=; + using obj::operator>; + bool operator==(const obj &a) const override { + if (const vector *b = dynamic_cast(&a)) { + auto p1 = begin(), p2 = b->begin(); + while (p1 != end() && p2 != b->end()) { + if (**p1 != **p2) return false; + p1++; + p2++; + } + return (p1 == end() && p2 == b->end()); + } + return false; + } + using obj::operator!=; + void print_on(std::ostream &out, int /*indent*/ = 0, int /*width*/ = 80, + const char * /*pfx*/ = "") const override; + bool test_width(int &limit) const override { + limit -= 2; + for (auto &e : *this) { + if (e ? !e->test_width(limit) : (limit -= 4) < 0) return false; + if ((limit -= 2) < 0) return false; + } + return true; + } + using vector_base::push_back; + void push_back(decltype(nullptr)) { push_back(std::unique_ptr()); } + void push_back(bool t) { + if (t) + push_back(mkuniq(True())); + else + push_back(mkuniq(False())); + } + void push_back(int64_t n) { push_back(mkuniq(number(n))); } + void push_back(int n) { push_back((int64_t)n); } + void push_back(unsigned int n) { push_back((int64_t)n); } + void push_back(uint64_t n) { push_back((int64_t)n); } + void push_back(const char *s) { push_back(mkuniq(string(s))); } + void push_back(std::string s) { push_back(mkuniq(string(s))); } + void push_back(string s) { push_back(mkuniq(s)); } + void push_back(vector &&v) { push_back(mkuniq(std::move(v))); } + void push_back(json::map &&); // NOLINT(whitespace/operators) + vector *as_vector() override { return this; } + const vector *as_vector() const override { return this; } + std::unique_ptr copy() && override { return mkuniq(std::move(*this)); } + std::unique_ptr clone() const override { + vector *v = new vector(); + for (auto &e : *this) v->push_back(clone_ptr(e)); + return std::unique_ptr(v); + } +}; + +typedef ordered_map, obj::ptrless> map_base; +class map : public obj, public map_base { + public: + map() {} + map(const map &) = default; + map(map &&) = default; + map(const std::initializer_list> &init) { + for (auto &pair : init) (*this)[pair.first] = std::move(pair.second).copy(); + } + map &operator=(const map &) & = default; + map &operator=(map &&) & = default; + ~map() { + for (auto &e : *this) delete e.first; + } + bool operator<(const obj &a) const override { + if (const map *b = dynamic_cast(&a)) { + auto p1 = begin(), p2 = b->begin(); + while (p1 != end() && p2 != b->end()) { + if (*p1->first < *p2->first) return true; + if (*p1->first != *p2->first) return false; + if (*p1->second < *p2->second) return true; + if (*p1->second != *p2->second) return false; + p1++; + p2++; + } + return p2 != b->end(); + } + return std::type_index(typeid(*this)) < std::type_index(typeid(a)); + } + using obj::operator<=; + using obj::operator>=; + using obj::operator>; + bool operator==(const obj &a) const override { + if (const map *b = dynamic_cast(&a)) { + auto p1 = begin(), p2 = b->begin(); + while (p1 != end() && p2 != b->end()) { + if (*p1->first != *p2->first) return false; + if (*p1->second != *p2->second) return false; + p1++; + p2++; + } + return (p1 == end() && p2 == b->end()); + } + return false; + } + using obj::operator!=; + std::unique_ptr remove(const char *key) { + string tmp(key); + auto itr = find(&tmp); + if (itr != end()) { + std::unique_ptr val = std::move(itr->second); + this->erase(itr); + return val; + } + return std::unique_ptr(); + } + void print_on(std::ostream &out, int /*indent*/ = 0, int /*width*/ = 80, + const char * /*pfx*/ = "") const override; + bool test_width(int &limit) const override { + limit -= 2; + for (auto &e : *this) { + if (!e.first->test_width(limit)) return false; + if (e.second ? !e.second->test_width(limit) : (limit -= 4) < 0) return false; + if ((limit -= 4) < 0) return false; + } + return true; + } + using map_base::count; + map_base::size_type count(const char *str) const { + string tmp(str); + return count(&tmp); + } + map_base::size_type count(std::string &str) const { + string tmp(str); + return count(&tmp); + } + map_base::size_type count(int64_t n) const { + number tmp(n); + return count(&tmp); + } + // using map_base::operator[]; + obj *operator[](const std::unique_ptr &i) const { + auto rv = find(i.get()); + if (rv != end()) return rv->second.get(); + return 0; + } + obj *operator[](const char *str) const { + string tmp(str); + auto rv = find(&tmp); + if (rv != end()) return rv->second.get(); + return 0; + } + obj *operator[](const std::string &str) const { + string tmp(str); + auto rv = find(&tmp); + if (rv != end()) return rv->second.get(); + return 0; + } + obj *operator[](int64_t n) const { + number tmp(n); + auto rv = find(&tmp); + if (rv != end()) return rv->second.get(); + return 0; + } + + private: + class element_ref { + map &self; + std::unique_ptr key; + map_base::iterator iter; + + public: + element_ref(map &s, const char *k) : self(s) { + string tmp(k); + iter = self.find(&tmp); + if (iter == self.end()) key.reset(new string(std::move(tmp))); + } + element_ref(map &s, int64_t k) : self(s) { + number tmp(k); + iter = self.find(&tmp); + if (iter == self.end()) key.reset(new number(std::move(tmp))); + } + element_ref(map &s, std::unique_ptr &&k) : self(s) { + iter = self.find(k.get()); + if (iter == self.end()) key = std::move(k); + } + void operator=(decltype(nullptr)) { + if (key) { + iter = self.emplace(key.release(), std::unique_ptr()).first; + } else { + assert(iter != self.end()); + iter->second.reset(); + } + } + bool operator=(bool t) { + if (key) { + iter = self.emplace(key.release(), + std::unique_ptr(t ? static_cast(new True()) + : static_cast(new False()))) + .first; + } else { + assert(iter != self.end()); + iter->second.reset(t ? static_cast(new True()) + : static_cast(new False())); + } + return t; + } + bool operator=(void *); // not defined to avoid converting pointers to bool + bool operator==(string &str) { + if (key) return false; + assert(iter != self.end()); + return *iter->second == str; + } + bool operator!=(string &str) { return !(*this == str); } + bool operator==(const std::string &str) { + if (key) return false; + assert(iter != self.end()); + return *iter->second == str; + } + bool operator!=(const std::string &str) { return !(*this == str); } + bool operator==(int64_t v) { + if (key) return false; + assert(iter != self.end()); + return *iter->second == v; + } + bool operator!=(int64_t v) { return !(*this == v); } + const char *operator=(const char *v) { + if (key) { + iter = self.emplace(key.release(), std::unique_ptr(new string(v))).first; + } else { + assert(iter != self.end()); + iter->second.reset(new string(v)); + } + return v; + } + const std::string &operator=(const std::string &v) { + if (key) { + iter = self.emplace(key.release(), std::unique_ptr(new string(v))).first; + } else { + assert(iter != self.end()); + iter->second.reset(new string(v)); + } + return v; + } + int64_t operator=(int64_t v) { + if (key) { + iter = self.emplace(key.release(), std::unique_ptr(new number(v))).first; + } else { + assert(iter != self.end()); + iter->second.reset(new number(v)); + } + return v; + } + int operator=(int v) { return static_cast(*this = static_cast(v)); } + unsigned int operator=(unsigned int v) { return (unsigned int)(*this = (int64_t)v); } +#if defined(__clang__) && defined(__APPLE__) + // Clang ang gcc on Mac OS can't agree whether size_t overloads uint64_t or unsigned long + // or the overload is not defined! + size_t operator=(size_t v) { return (size_t)(*this = (int64_t)v); } +#endif + uint64_t operator=(uint64_t v) { return (uint64_t)(*this = (int64_t)v); } + vector &operator=(vector &&v) { + if (key) { + iter = self.emplace(key.release(), mkuniq(std::move(v))).first; + } else { + assert(iter != self.end()); + iter->second = mkuniq(std::move(v)); + } + return dynamic_cast(*iter->second); + } + map &operator=(map &&v) { + if (key) { + iter = self.emplace(key.release(), mkuniq(std::move(v))).first; + } else { + assert(iter != self.end()); + iter->second = mkuniq(std::move(v)); + } + return dynamic_cast(*iter->second); + } + const std::unique_ptr &operator=(std::unique_ptr &&v) { + if (key) { + iter = self.emplace(key.release(), std::move(v)).first; + } else { + assert(iter != self.end()); + iter->second = std::move(v); + } + return iter->second; + } + obj &operator*() { + assert(!key && iter != self.end()); + return *iter->second; + } + explicit operator bool() const { return !key; } + obj *get() const { return key ? 0 : iter->second.get(); } + obj *operator->() const { return key ? 0 : iter->second.get(); } + operator vector &() { + if (key) iter = self.emplace(key.release(), mkuniq()).first; + return dynamic_cast(*iter->second); + } + operator map &() { + if (key) iter = self.emplace(key.release(), mkuniq()).first; + return dynamic_cast(*iter->second); + } + element_ref operator[](const char *str) { + if (key) iter = self.emplace(key.release(), mkuniq()).first; + map *m = dynamic_cast(iter->second.get()); + if (!m) throw std::runtime_error("lookup in non-map json object"); + return element_ref(*m, str); + } + element_ref operator[](const std::string &str) { + if (key) iter = self.emplace(key.release(), mkuniq()).first; + map *m = dynamic_cast(iter->second.get()); + if (!m) throw std::runtime_error("lookup in non-map json object"); + return element_ref(*m, str.c_str()); + } + element_ref operator[](int64_t n) { + if (key) iter = self.emplace(key.release(), mkuniq()).first; + map *m = dynamic_cast(iter->second.get()); + if (!m) throw std::runtime_error("lookup in non-map json object"); + return element_ref(*m, n); + } + element_ref operator[](std::unique_ptr &&i) { + if (key) iter = self.emplace(key.release(), mkuniq()).first; + map *m = dynamic_cast(iter->second.get()); + if (!m) throw std::runtime_error("lookup in non-map json object"); + return element_ref(*m, std::move(i)); + } + template + void push_back(T &&v) { + vector &vec = *this; + vec.push_back(std::forward(v)); + } + template + bool is() const { + return !key && dynamic_cast(iter->second.get()) != nullptr; + } + template + T &to() { + if (key) iter = self.emplace(key.release(), mkuniq()).first; + return dynamic_cast(*iter->second); + } + }; + friend std::ostream &operator<<(std::ostream &out, const element_ref &el); + + public: + element_ref operator[](const char *str) { return element_ref(*this, str); } + element_ref operator[](const std::string &str) { return element_ref(*this, str.c_str()); } + element_ref operator[](int64_t n) { return element_ref(*this, n); } + element_ref operator[](std::unique_ptr &&i) { return element_ref(*this, std::move(i)); } + using map_base::erase; + map_base::size_type erase(const char *str) { + string tmp(str); + return map_base::erase(&tmp); + } + map_base::size_type erase(int64_t n) { + number tmp(n); + return map_base::erase(&tmp); + } + map *as_map() override { return this; } + const map *as_map() const override { return this; } + std::unique_ptr copy() && override { return mkuniq(std::move(*this)); } + std::unique_ptr clone() const override { + map *m = new map(); + for (auto &e : *this) + m->emplace(e.first ? e.first->clone().release() : nullptr, clone_ptr(e.second)); + return std::unique_ptr(m); + } + + /// Merges the given map into this one and returns this map. For any key collisions, if both + /// have a map, then they are merged recursively; if both have a vector, then the one in the + /// given map is appended to the one in this map; otherwise, the entry in the given map + /// replaces the entry in this one. + map &merge(const map &a); +}; + +inline void vector::push_back(map &&m) { emplace_back(mkuniq(std::move(m))); } + +std::istream &operator>>(std::istream &in, std::unique_ptr &json); +inline std::istream &operator>>(std::istream &in, obj *&json) { + std::unique_ptr p; + in >> p; + if (in) json = p.release(); + return in; +} + +inline std::ostream &operator<<(std::ostream &out, const obj *json) { + json->print_on(out); + return out; +} +inline std::ostream &operator<<(std::ostream &out, const std::unique_ptr &json) { + return out << json.get(); +} +inline std::ostream &operator<<(std::ostream &out, const map::element_ref &el) { + el->print_on(out); + return out; +} + +} // end namespace json + +extern void dump(const json::obj *); +extern void dump(const json::obj &); + +#endif /* BACKENDS_TOFINO_BF_ASM_JSON_H_ */ diff --git a/backends/tofino/bf-asm/json_diff.cpp b/backends/tofino/bf-asm/json_diff.cpp new file mode 100644 index 00000000000..27ff15e6219 --- /dev/null +++ b/backends/tofino/bf-asm/json_diff.cpp @@ -0,0 +1,628 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include + +#include "backends/tofino/bf-asm/json.h" +#include "fdstream.h" +#include "lib/ordered_map.h" + +static bool show_deletion = true; +static bool show_addition = true; +static bool sort_map = true; +static std::vector list_map_keys; +static std::set ignore_keys; +static std::map> ignore_key_indexes; +static std::vector> ignore_intkeys; + +bool is_list_map(json::vector *v, const char *key) { + if (!key) return false; + for (auto &e : *v) + if (json::map *m = dynamic_cast(e.get())) { + if (!m->count(key)) return false; + } else + return false; + return true; +} + +void add_ignore(const char *a) { + while (isspace(*a)) a++; + if (*a == '#' || *a == 0) return; + if (*a == '&' || *a == '=' || *a == '|' || isdigit(*a)) { + int64_t mask, val; + int end = 0; + if (sscanf(a, "%" PRIi64 " %n", &val, &end) >= 1) + ignore_intkeys.emplace_back(-1, val); + else if (sscanf(a, "== %" PRIi64 " %n", &val, &end) >= 1) + ignore_intkeys.emplace_back(-1, val); + else if (sscanf(a, "& %" PRIi64 " == %" PRIi64 " %n", &mask, &val, &end) >= 2) + ignore_intkeys.emplace_back(mask, val); + else if (sscanf(a, "| %" PRIi64 " == %" PRIi64 " %n", &mask, &val, &end) >= 2) + ignore_intkeys.emplace_back(~mask, val ^ mask); + else { + std::cerr << "Unknown ignore expression " << a << std::endl; + return; + } + if (a[end]) std::cerr << "extra text after ignore " << (a + end) << std::endl; + return; + } + if (auto *idx = strchr(a, '[')) { + int64_t val; + int end = 0; + if (sscanf(idx, "[%" PRIi64 " ] %n", &val, &end) >= 1 && end > 0) { + end += idx - a; + while (idx > a && isspace(idx[-1])) --idx; + std::string key(a, idx - a); + ignore_key_indexes[key].insert(val); + } else { + std::cerr << "Unknown ignore expression " << a << std::endl; + return; + } + if (a[end]) std::cerr << "extra text after ignore " << (a + end) << std::endl; + return; + } + ignore_keys.insert(a); +} +bool ignore(json::obj *o) { + if (json::string *s = dynamic_cast(o)) { + if (ignore_keys.count(*s)) return true; + } else if (json::number *n = dynamic_cast(o)) { + for (auto &k : ignore_intkeys) + if ((n->val & k.first) == k.second) return true; + } + return false; +} +bool ignore(std::unique_ptr &o) { return ignore(o.get()); } + +const std::set &ignore_indexes_for_key(json::obj *key) { + if (key && key->as_string() && ignore_key_indexes.count(*key->as_string())) + return ignore_key_indexes.at(*key->as_string()); + static std::set empty; + return empty; +} + +std::map build_list_map(json::vector *v, + const char *key) { + std::map rv; + assert(key); + for (auto &e : *v) { + json::map *m = dynamic_cast(e.get()); + assert(m); + rv[(*m)[key].get()] = m; + } + return rv; +} + +void do_prefix(int indent, const char *prefix) { + std::cout << '\n' << prefix; + if (indent) std::cout << std::setw(indent) << ' ' << std::setw(0); +} + +void do_output(json::obj *o, int indent, const char *prefix, const char *suffix = "") { + do_prefix(indent, prefix); + if (o) + o->print_on(std::cout, indent, 80 - indent, prefix); + else + std::cout << "null"; + std::cout << suffix; +} + +void do_output(int index, json::vector::iterator p, int indent, const char *prefix) { + do_prefix(indent, prefix); + std::cout << '[' << index << "] "; + if (*p) + (*p)->print_on(std::cout, indent, 80 - indent, prefix); + else + std::cout << "null"; +} + +void do_output(json::map::iterator p, int indent, const char *prefix) { + do_prefix(indent, prefix); + p->first->print_on(std::cout, indent, 80 - indent, prefix); + std::cout << ": "; + if (p->second) + p->second->print_on(std::cout, indent, 80 - indent, prefix); + else + std::cout << "null"; +} + +void do_output(std::map::iterator p, int indent, + const char *prefix) { + do_prefix(indent, prefix); + p->first->print_on(std::cout, indent, 80 - indent, prefix); + std::cout << ": "; + if (p->second) + p->second->print_on(std::cout, indent, 80 - indent, prefix); + else + std::cout << "null"; +} + +void do_output(std::map::iterator p, int indent, + const char *prefix) { + do_prefix(indent, prefix); + p->first->print_on(std::cout, indent, 80 - indent, prefix); + std::cout << ": "; + if (p->second) + p->second->print_on(std::cout, indent, 80 - indent, prefix); + else + std::cout << "null"; +} + +bool equiv(json::obj *a, json::obj *b, json::obj *key = nullptr); +bool equiv(std::unique_ptr &a, json::obj *b, json::obj *key = nullptr) { + return equiv(a.get(), b, key); +} +bool equiv(std::unique_ptr &a, std::unique_ptr &b, json::obj *key = nullptr) { + return equiv(a.get(), b.get(), key); +} +void print_diff(json::obj *a, json::obj *b, int indent, json::obj *key = nullptr); +void print_diff(std::unique_ptr &a, std::unique_ptr &b, int indent, + json::obj *key = nullptr) { + print_diff(a.get(), b.get(), indent, key); +} + +json::vector::iterator find(json::vector::iterator p, json::vector::iterator end, json::obj *m) { + while (p < end && !equiv(*p, m)) ++p; + return p; +} + +bool list_map_equiv(json::vector *a, json::vector *b, const char *key) { + auto bmap = build_list_map(b, key); + for (auto &e : *a) { + json::map *m = dynamic_cast(e.get()); + json::obj *ekey = (*m)[key].get(); + if (!bmap.count(ekey)) { + if (show_deletion && !ignore(ekey)) return false; + continue; + } + if (!ignore(ekey) && !equiv(m, bmap[ekey], ekey)) return false; + bmap.erase(ekey); + } + if (show_addition) + for (auto &e : bmap) + if (!ignore(e.first)) return false; + return true; +} +void list_map_print_diff(json::vector *a, json::vector *b, int indent, const char *key) { + auto amap = build_list_map(a, key); + auto bmap = build_list_map(b, key); + auto p1 = amap.begin(), p2 = bmap.begin(); + std::cout << " ["; + indent += 2; + while (p1 != amap.end() && p2 != bmap.end()) { + if (*p1->first < *p2->first) { + if (show_deletion && !ignore(p1->first)) do_output(p1, indent, "-"); + p1++; + continue; + } + if (*p2->first < *p1->first) { + if (show_addition && !ignore(p2->first)) do_output(p2, indent, "+"); + p2++; + continue; + } + if (!ignore(p1->first) && !equiv(p1->second, p2->second, p1->first)) { + int width = 80 - indent, copy; + if (p1->first->test_width(width) && (copy = width) && p1->second && + p1->second->test_width(width) && p2->second && p2->second->test_width(copy)) { + do_output(p1->first, indent, "-", ": "); + std::cout << p1->second; + do_output(p2->first, indent, "+", ": "); + std::cout << p2->second; + } else { + do_output(p1->first, indent, " ", ":"); + print_diff(p1->second, p2->second, indent, p1->first); + } + } + p1++; + p2++; + } + if (show_deletion) + while (p1 != amap.end()) { + if (!ignore(p1->first)) do_output(p1, indent, "-"); + p1++; + } + if (show_addition) + while (p2 != bmap.end()) { + if (!ignore(p2->first)) do_output(p2, indent, "+"); + p2++; + } + indent -= 2; + do_prefix(indent, " "); + std::cout << ']'; +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpotentially-evaluated-expression" +bool equiv(json::vector *a, json::vector *b, const std::set &ignore_idx) { + for (auto key : list_map_keys) + if (is_list_map(a, key) && is_list_map(b, key)) return list_map_equiv(a, b, key); + auto p1 = a->begin(), p2 = b->begin(); + while (p1 != a->end() && p2 != b->end()) { + if (!ignore_idx.count(p1 - a->begin()) && !equiv(*p1, *p2)) { + auto s1 = find(p1, a->end(), p2->get()); + auto s2 = find(p2, b->end(), p1->get()); + if (typeid(**p1) == typeid(**p2) && p1 - a->begin() == p2 - b->begin() && + (s1 - p1 == s2 - p2 || typeid(**p1) == typeid(json::vector) || + typeid(**p1) == typeid(json::map))) + return false; + if (s1 - p1 <= s2 - p2) { + if (show_deletion) return false; + ++p1; + } else { + if (show_addition) return false; + ++p2; + } + } else { + ++p1; + ++p2; + } + } + while (p1 != a->end() && ignore_idx.count(p1 - a->begin())) ++p1; + if (p1 != a->end() && show_deletion) return false; + while (p2 != b->end() && ignore_idx.count(p2 - b->begin())) ++p2; + if (p2 != b->end() && show_addition) return false; + return true; +} +void print_diff(json::vector *a, json::vector *b, const std::set &ignore_idx, int indent) { + for (auto key : list_map_keys) + if (is_list_map(a, key) && is_list_map(b, key)) { + list_map_print_diff(a, b, indent, key); + return; + } + auto p1 = a->begin(), p2 = b->begin(); + std::cout << " ["; + indent += 2; + while (p1 != a->end() && p2 != b->end()) { + if (!ignore_idx.count(p1 - a->begin()) && !equiv(*p1, *p2)) { + auto s1 = find(p1, a->end(), p2->get()); + auto s2 = find(p2, b->end(), p1->get()); + if ((p1 + 1 != a->end() && p2 + 1 != b->end() && equiv(p1[1], p2[1])) || + (typeid(**p1) == typeid(**p2) && p1 - a->begin() == p2 - b->begin() && + (s1 - p1 == s2 - p2 || typeid(**p1) == typeid(json::vector) || + typeid(**p1) == typeid(json::map)))) { + do_prefix(indent, " "); + std::cout << '[' << p1 - a->begin() << "]"; + print_diff(p1->get(), p2->get(), indent); + } else { + if (s1 - p1 <= s2 - p2) { + if (show_deletion) do_output(p1 - a->begin(), p1, indent, "-"); + ++p1; + } else { + if (show_addition) do_output(p2 - b->begin(), p2, indent, "+"); + ++p2; + } + continue; + } + } + + ++p1; + ++p2; + } + if (show_deletion) + while (p1 != a->end()) { + if (!ignore_idx.count(p1 - a->begin())) do_output(p1 - a->begin(), p1, indent, "-"); + ++p1; + } + if (show_addition) + while (p2 != b->end()) { + if (!ignore_idx.count(p2 - b->begin())) do_output(p2 - b->begin(), p2, indent, "+"); + ++p2; + } + indent -= 2; + do_prefix(indent, " "); + std::cout << ']'; +} +#pragma clang diagnostic pop + +std::map build_sort_map(json::map *m) { + std::map rv; + for (auto &e : *m) { + rv[e.first] = e.second.get(); + } + return rv; +} +bool sort_map_equiv(json::map *a, json::map *b) { + auto bmap = build_sort_map(b); + for (auto &e : *a) { + json::obj *ekey = e.first; + if (!bmap.count(ekey)) { + if (show_deletion && !ignore(ekey)) return false; + continue; + } + if (!ignore(ekey) && !equiv(e.second.get(), bmap[ekey], ekey)) return false; + bmap.erase(ekey); + } + if (show_addition) + for (auto &e : bmap) + if (!ignore(e.first)) return false; + return true; +} +void sort_map_print_diff(json::map *a, json::map *b, int indent) { + auto amap = build_sort_map(a); + auto bmap = build_sort_map(b); + auto p1 = amap.begin(), p2 = bmap.begin(); + std::cout << " {"; + indent += 2; + while (p1 != amap.end() && p2 != bmap.end()) { + if (*p1->first < *p2->first) { + if (show_deletion && !ignore(p1->first) && p1->second) do_output(p1, indent, "-"); + p1++; + continue; + } + if (*p2->first < *p1->first) { + if (show_addition && !ignore(p2->first) && p2->second) do_output(p2, indent, "+"); + p2++; + continue; + } + if (!ignore(p1->first) && !equiv(p1->second, p2->second, p1->first)) { + int width = 80 - indent, copy; + if (p1->first->test_width(width) && (copy = width) && p1->second && + p1->second->test_width(width) && p2->second && p2->second->test_width(copy)) { + do_output(p1->first, indent, "-", ": "); + std::cout << p1->second; + do_output(p2->first, indent, "+", ": "); + std::cout << p2->second; + } else { + do_output(p1->first, indent, " ", ":"); + print_diff(p1->second, p2->second, indent, p1->first); + } + } + p1++; + p2++; + } + if (show_deletion) + while (p1 != amap.end()) { + if (!ignore(p1->first)) do_output(p1, indent, "-"); + p1++; + } + if (show_addition) + while (p2 != bmap.end()) { + if (!ignore(p2->first)) do_output(p2, indent, "+"); + p2++; + } + indent -= 2; + do_prefix(indent, " "); + std::cout << '}'; +} + +bool equiv(json::map *a, json::map *b) { + if (sort_map) return sort_map_equiv(a, b); + auto p1 = a->begin(), p2 = b->begin(); + while (p1 != a->end() && p2 != b->end()) { + if (*p1->first < *p2->first) { + if (show_deletion && !ignore(p1->first)) return false; + ++p1; + } else if (*p2->first < *p1->first) { + if (show_addition && !ignore(p2->first)) return false; + ++p2; + } else if (!ignore(p1->first) && !(equiv(p1->second, p2->second, p1->first))) { + return false; + } else { + ++p1; + ++p2; + } + } + if (show_deletion) + for (; p1 != a->end(); ++p1) + if (!ignore(p1->first)) return false; + if (show_addition) + for (; p2 != b->end(); ++p2) + if (!ignore(p2->first)) return false; + return true; +} +void print_diff(json::map *a, json::map *b, int indent) { + if (sort_map) { + sort_map_print_diff(a, b, indent); + return; + } + auto p1 = a->begin(), p2 = b->begin(); + std::cout << " {"; + indent += 2; + while (p1 != a->end() && p2 != b->end()) { + if (*p1->first < *p2->first) { + if (show_deletion && !ignore(p1->first)) do_output(p1, indent, "-"); + p1++; + continue; + } + if (*p2->first < *p1->first) { + if (show_addition && !ignore(p2->first)) do_output(p2, indent, "+"); + p2++; + continue; + } + if (!ignore(p1->first) && !equiv(p1->second, p2->second, p1->first)) { + int width = 80 - indent, copy; + if (p1->first->test_width(width) && (copy = width) && p1->second && + p1->second->test_width(width) && p2->second && p2->second->test_width(copy)) { + do_output(p1->first, indent, "-", ": "); + std::cout << p1->second; + do_output(p2->first, indent, "+", ": "); + std::cout << p2->second; + } else { + do_output(p1->first, indent, " ", ":"); + print_diff(p1->second, p2->second, indent, p1->first); + } + } + p1++; + p2++; + } + if (show_deletion) + for (; p1 != a->end(); ++p1) + if (!ignore(p1->first)) do_output(p1, indent, "-"); + if (show_addition) + for (; p2 != b->end(); ++p2) + if (!ignore(p2->first)) do_output(p2, indent, "+"); + indent -= 2; + do_prefix(indent, " "); + std::cout << '}'; +} + +bool equiv(json::obj *a, json::obj *b, json::obj *key) { + if (a == b) return true; + // Check true for map/vector with nullptr v/s with no elements "{}" + if (!a) { + if (auto m = b->as_map()) { + if (m->empty()) return true; + } + if (auto v = b->as_vector()) { + if (v->empty()) return true; + } + } + if (!b) { + if (auto m = a->as_map()) { + if (m->empty()) return true; + } + if (auto v = a->as_vector()) { + if (v->empty()) return true; + } + } + if (!a || !b) return false; + if (typeid(*a) != typeid(*b)) return false; + if (typeid(*a) == typeid(json::vector)) + return equiv(static_cast(a), static_cast(b), + ignore_indexes_for_key(key)); + if (typeid(*a) == typeid(json::map)) + return equiv(static_cast(a), static_cast(b)); + return *a == *b; +} +void print_diff(json::obj *a, json::obj *b, int indent, json::obj *key) { + if (equiv(a, b)) + return; + else if (!a) { + if (show_deletion) do_output(b, indent, "+"); + return; + } else if (!b) { + if (show_addition) do_output(a, indent, "-"); + return; + } else if (typeid(*a) == typeid(*b)) { + if (typeid(*a) == typeid(json::vector)) { + print_diff(static_cast(a), static_cast(b), + ignore_indexes_for_key(key), indent); + return; + } else if (typeid(*a) == typeid(json::map)) { + print_diff(static_cast(a), static_cast(b), indent); + return; + } + } + do_output(a, indent, "-"); + do_output(b, indent, "+"); +} + +int do_diff(const char *a_name, json::obj *a, const char *b_name, json::obj *b) { + if (equiv(a, b)) return 0; + std::cout << "--- " << a_name << std::endl; + std::cout << "+++ " << b_name << std::endl; + print_diff(a, b, 0); + std::cout << std::endl; + return 1; +} +int do_diff(const char *a_name, std::unique_ptr &a, const char *b_name, + std::unique_ptr &b) { + return do_diff(a_name, a.get(), b_name, b.get()); +} + +int main(int ac, char **av) { + int error = 0; + std::unique_ptr file1; + const char *file1_name = 0; + for (int i = 1; i < ac; i++) + if (av[i][0] == '-' && av[i][1] == 0) { + if (file1) { + std::unique_ptr file2; + if (!(std::cin >> file2) || !file2) { + std::cerr << "Failed reading json from stdin" << std::endl; + error = 2; + } else if (!(error & 2)) + error |= do_diff(file1_name, file1, "", file2); + } else if (!(std::cin >> file1) || !file1) { + std::cerr << "Failed reading json from stdin" << std::endl; + error = 2; + } else + file1_name = ""; + } else if (av[i][0] == '-' || av[i][0] == '+') { + bool flag = av[i][0] == '+'; + for (char *arg = av[i] + 1; *arg;) switch (*arg++) { + case 'a': + show_addition = flag; + break; + case 'd': + show_deletion = flag; + break; + case 'i': + if (*av[++i] == '@') { + std::ifstream file(av[i] + 1); + std::string str; + if (!file) + std::cerr << "Can't read " << av[i] + 1 << std::endl; + else + while (getline(file, str)) add_ignore(str.c_str()); + } else + add_ignore(av[i]); + break; + case 'l': + list_map_keys.push_back(av[++i]); + break; + case 's': + sort_map = flag; + break; + default: + std::cerr << "Unknown option " << (flag ? '+' : '-') << arg[-1] + << std::endl; + error = 2; + } + } else { + std::istream *in = nullptr; + if (auto ext = strrchr(av[i], '.')) { + std::string cmd; + if (!strcmp(ext, ".gz") || !strcmp(ext, ".Z")) + cmd = "zcat "; + else if (!strcmp(ext, ".bz") || !strcmp(ext, ".bz2")) + cmd = "bzcat "; + if (!cmd.empty()) { + cmd += av[i]; + cmd = "2>/dev/null; " + cmd; // ignore errors (Broken Pipe in particular) + auto *pipe = popen(cmd.c_str(), "r"); + if (pipe) { + auto *pstream = new fdstream(fileno(pipe)); + pstream->setclose([pipe]() { pclose(pipe); }); + in = pstream; + } + } + } + if (!in) in = new std::ifstream(av[i]); + if (!in || !*in) { + std::cerr << "Can't open " << av[i] << " for reading" << std::endl; + error = 2; + } else if (file1) { + std::unique_ptr file2; + if (!(*in >> file2) || !file2) { + std::cerr << "Failed reading json from " << av[i] << std::endl; + error = 2; + } else if (!(error & 2)) + error |= do_diff(file1_name, file1, av[i], file2); + } else if (!(*in >> file1) || !file1) { + std::cerr << "Failed reading json from " << av[i] << std::endl; + error = 2; + } else + file1_name = av[i]; + delete in; + } + if (error & 2) std::cerr << "usage: " << av[0] << " [-adi:l:] file1 file2" << std::endl; + return error; +} diff --git a/backends/tofino/bf-asm/lex-yaml.l b/backends/tofino/bf-asm/lex-yaml.l new file mode 100644 index 00000000000..c28facd913e --- /dev/null +++ b/backends/tofino/bf-asm/lex-yaml.l @@ -0,0 +1,283 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + + +%x LINESTART LINE1 LINE2 LINE3 COMMENT +%s NORMAL + +%{ +#include +#include +#include +static std::stack indent; +static int parens=0; +static int indent_depth(const char *); +static int parse_num(YYSTYPE *, const char *s, int base); +static int parse_match(YYSTYPE *, const char *s, int bits_per_digit); + +#if YYDEBUG +#undef BEGIN +/* DANGER -- the depends on the internals of how flex sets states, but as + * DANGER -- its only for debugging, its not too bad */ +#define BEGIN(S) ((yy_start) = 1 + 2*(S), \ + yydebug ? fprintf(stderr, "Setting lexer state "#S"\n") : 0) +#define DB(...) fprintf(stderr, __VA_ARGS__) +#else +#define DB(...) +#endif + +#pragma clang diagnostic ignored "-Wnull-conversion" + +%} + +ID [A-Za-z_@$]([-.]?*[A-Za-z0-9_@$])* +STR \"(\\.|[^\n"\\])*\" +%option nounput noyywrap + +%% + +.*|\n { yyless(0); BEGIN(LINESTART); indent.push(0); } +[ \t]* { int depth = indent_depth(yytext); + if (depth < indent.top()) { + indent.pop(); + yyless(0); + return UNINDENT; } + BEGIN(NORMAL); + if (depth > indent.top()) { + indent.push(depth); + return INDENT; } } + +"#line" { BEGIN(LINE1); } +"# " { BEGIN(LINE1); } +[0-9]+ { line_file_map[lineno].second = atoi(yytext)-1; + DB("next line is %s\n", yytext); + BEGIN(LINE2); } +\"[^"]* { line_file_map[lineno].first = yytext+1; + DB("file is '%s'\n", yytext+1); + BEGIN(LINE3); } +[ \t] ; +. { BEGIN(LINE3); } +. ; +\n { lineno++; BEGIN(LINESTART); } +<> { BEGIN(LINESTART); } + +[ \t]*"#" { BEGIN(LINE3); } +. { yyless(0); + if (indent.top() > 0) { + indent.pop(); + return UNINDENT; } + BEGIN(NORMAL); } +<> { if (indent.top() > 0) { + indent.pop(); + return UNINDENT; } + BEGIN(NORMAL); } +[ \t\r]*\n { lineno++; } + +[[({] { parens++; return *yytext; } +[])}] { if (--parens < 0) parens = 0; + return *yytext; } +\n { lineno++; + if (parens == 0) { + BEGIN(LINESTART); + return '\n'; } } +[ \t\r]+ ; +".." { return DOTDOT; } +{ID} { yylval.str = strdup(yytext); return ID; } +{STR} { yylval.str = strndup(yytext+1, yyleng-2); return STR; } +[0-9_]+ { return parse_num(&yylval, yytext, 10); } +0[xX][0-9a-fA-F_]+ { return parse_num(&yylval, yytext+2, 16); } +0[oO][0-7_]+ { return parse_num(&yylval, yytext+2, 8); } +0[bB][0-1_]+ { return parse_num(&yylval, yytext+2, 2); } +0[xX][0-9a-fA-F*_]+ { return parse_match(&yylval, yytext+2, 4); } +0[oO][0-7*_]+ { return parse_match(&yylval, yytext+2, 3); } +0[bB][0-1*_]+ { return parse_match(&yylval, yytext+2, 1); } +"*" { return parse_match(&yylval, yytext, 0); } + +"#".* ; +"/*" { BEGIN(COMMENT); } +"*/" { BEGIN(NORMAL); } +. ; +\n { lineno++; } + +. { return *yytext; } + + +%% + +/* flex's #line generation is broken, so we manually resync so we can debug */ +#line 104 "lex-yaml.l" +int indent_depth(const char *pfx) { + int rv = 0; + while (*pfx) + switch(*pfx++) { + case ' ': rv++; break; + case '\t': rv &= ~7; rv += 8; break; + default: + return rv;} + return rv; +} + +#include "backends/tofino/bf-asm/gen/uptr_sizes.h" + +void bigint_mul(VECTOR(uintptr_t) &val, unsigned f) { + unsigned carry = 0; + for (int i = 0; i < val.size; i++) { +#if defined(uint2ptr_t) + uint2ptr_t v = val.data[i]; + v = v * f + carry; + val.data[i] = (uintptr_t)v; + carry = v >> CHAR_BIT * sizeof(uintptr_t); +#elif defined(uinthptr_t) + uinthptr_t lo = val.data[i], + hi = val.data[i] >> CHAR_BIT * sizeof(uinthptr_t); + uintptr_t tmp = (uintptr_t)lo * f + carry; + lo = tmp; + tmp >>= CHAR_BIT * sizeof(uinthptr_t); + tmp += (uintptr_t)hi * f; + carry = tmp >> CHAR_BIT * sizeof(uinthptr_t); + val.data[i] = (tmp << (CHAR_BIT * sizeof(uinthptr_t))) + lo; +#else +#error "No appropriately sized type for bigint_mul" +#endif + } + if (carry) + VECTOR_add(val, carry); +} + +void bigint_add(VECTOR(uintptr_t) &val, unsigned a) { + for (int i = 0; i < val.size; i++) { + if ((val.data[i] += a) >= a) + return; + a = 1; } + VECTOR_add(val, a); +} + +void bigint_init(VECTOR(uintptr_t) &val, int64_t v) { + if (sizeof(int64_t)/sizeof(uintptr_t) > 1) { + VECTOR_init(val, sizeof(int64_t)/sizeof(uintptr_t)); + do { + val.data[val.size++] = v; + v >>= CHAR_BIT * sizeof(uintptr_t) / 2; + v >>= CHAR_BIT * sizeof(uintptr_t) / 2; + } while (v > 0); + } else { + VECTOR_init1(val, v); + } +} + +int parse_num(YYSTYPE *val, const char *s, int base) { + int rv = INT; + val->i = 0; + s--; + while (*++s) { + if (*s == '_') continue; + /* The comparison is intentionally against LONG_MAX, so that we put values larger than + uintptr_t size in big ints. + */ + if (rv == INT && val->i > INT64_MAX/base) { + bigint_init(val->bigi, val->i); + rv = BIGINT; } + if (rv == INT) + val->i *= base; + else + bigint_mul(val->bigi, base); + switch (*s) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (rv == INT) + val->i += *s - '0'; + else + bigint_add(val->bigi, *s - '0'); + break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + if (rv == INT) + val->i += *s - 'a' + 10; + else + bigint_add(val->bigi, *s - 'a' + 10); + break; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + if (rv == INT) + val->i += *s - 'A' + 10; + else + bigint_add(val->bigi, *s - 'A' + 10); + break; + default: + assert(0); } + if (rv == INT && val->i > 0xffffffff) { + // We limit INT tokens to what will fit in 32 bits (unsigned) even though + // we use a int64_t to hold them, as most parts of the compiler can't deal + // with larger constants. The few places that can deal with >32bit values + // handle BIGINTs + bigint_init(val->bigi, val->i); + rv = BIGINT; } } + return rv; +} + +int parse_match(YYSTYPE *val, const char *s, int bits_per_digit) { + val->match.word0 = val->match.word1 = 0; + int rv = MATCH; + VECTOR(match_t) bigm = EMPTY_VECTOR_INIT; + if (*s == '*' && bits_per_digit == 0) return rv; + unsigned digit = 0, digit_mask = (1U << bits_per_digit) - 1; + decltype(val->match.word0) overflow_mask = digit_mask; + overflow_mask <<= sizeof(val->match.word0) * 8 - bits_per_digit; + s--; + while (*++s) { + if (*s == '_') continue; + if (rv == BIGMATCH || ((val->match.word0 | val->match.word1) & overflow_mask)) { + rv = BIGMATCH; + if (bigm.size < 2) { + VECTOR_resize(bigm, 2); + bigm.data[0].word0 = bigm.data[0].word1 = + bigm.data[1].word0 = bigm.data[1].word1 = 0; } + if ((bigm.data[bigm.size-1].word0 | bigm.data[bigm.size-1].word1) & overflow_mask) { + VECTOR_resize(bigm, bigm.size+1); + bigm.data[bigm.size-1].word0 = bigm.data[bigm.size-1].word1 = 0; } + for (int i = bigm.size-1; i > 0; --i) { + bigm.data[i].word0 <<= bits_per_digit; + bigm.data[i].word0 |= bigm.data[i].word0 >> (64 - bits_per_digit); + bigm.data[i].word1 <<= bits_per_digit; + bigm.data[i].word1 |= bigm.data[i].word1 >> (64 - bits_per_digit); } + bigm.data[1].word0 |= val->match.word0 >> (64 - bits_per_digit); + bigm.data[1].word1 |= val->match.word1 >> (64 - bits_per_digit); } + val->match.word0 <<= bits_per_digit; + val->match.word1 <<= bits_per_digit; + switch (*s) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + digit = *s - '0'; + break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + digit = *s - 'a' + 10; + break; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + digit = *s - 'A' + 10; + break; + case '*': + val->match.word1 |= digit_mask; + digit = 0; + break; + default: + assert(0); } + assert((digit & ~digit_mask) == 0); + val->match.word1 |= digit; + val->match.word0 |= digit_mask & ~digit; } + if (rv == BIGMATCH) { + bigm.data[0] = val->match; + val->bigm = bigm; } + return rv; +} diff --git a/backends/tofino/bf-asm/map.h b/backends/tofino/bf-asm/map.h new file mode 100644 index 00000000000..fc6e82ba3b5 --- /dev/null +++ b/backends/tofino/bf-asm/map.h @@ -0,0 +1,255 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_MAP_H_ +#define BACKENDS_TOFINO_BF_ASM_MAP_H_ + +#include + +template +inline V get(const std::map &m, T key, V def = V()) { + auto it = m.find(key); + if (it != m.end()) return it->second; + return def; +} + +template +inline V *getref(std::map &m, T key) { + auto it = m.find(key); + if (it != m.end()) return &it->second; + return 0; +} + +template +inline const V *getref(const std::map &m, T key) { + auto it = m.find(key); + if (it != m.end()) return &it->second; + return 0; +} + +template +inline V get(const std::map *m, T key, V def = V()) { + return m ? get(*m, key, def) : def; +} + +template +inline V *getref(std::map *m, T key) { + return m ? getref(*m, key) : 0; +} + +template +inline const V *getref(const std::map *m, T key) { + return m ? getref(*m, key) : 0; +} + +/* iterate over the keys in a map */ +template +struct IterKeys { + class iterator + : public std::iterator::iterator_category, + typename std::iterator_traits::value_type, + typename std::iterator_traits::difference_type, + typename std::iterator_traits::pointer, + typename std::iterator_traits::reference> { + PairIter it; + + public: + iterator() {} + explicit iterator(PairIter i) : it(i) {} + iterator &operator=(PairIter i) { + it = i; + return *this; + } + iterator &operator++() { + ++it; + return *this; + } + iterator &operator--() { + --it; + return *this; + } + iterator operator++(int) { + auto copy = *this; + ++it; + return copy; + } + iterator operator--(int) { + auto copy = *this; + --it; + return copy; + } + bool operator==(const iterator &i) const { return it == i.it; } + bool operator!=(const iterator &i) const { return it != i.it; } + decltype(*&it->first) operator*() const { return it->first; } + decltype(&it->first) operator->() const { return &it->first; } + } b, e; + + template + IterKeys(U &map) : b(map.begin()), e(map.end()) {} // NOLINT(runtime/explicit) + IterKeys(PairIter b, PairIter e) : b(b), e(e) {} + iterator begin() const { return b; } + iterator end() const { return e; } + + protected: + IterKeys() {} +}; + +template +struct IterKeysCopy : IterKeys { + Map m; + explicit IterKeysCopy(Map &&map) : m(std::move(map)) { + // move the map into this object, then setup the iterators + this->b = m.begin(); + this->e = m.end(); + } +}; + +template +IterKeys Keys(Map &m) { + return IterKeys(m); +} + +template +IterKeys Keys(const Map &m) { + return IterKeys(m); +} + +template +IterKeysCopy Keys(Map &&m) { + return IterKeysCopy(std::move(m)); +} + +template +IterKeys Keys(std::pair range) { + return IterKeys(range.first, range.second); +} + +/* iterate over the values in a map */ +template +struct IterValues { + class iterator + : public std::iterator::iterator_category, + typename std::iterator_traits::value_type, + typename std::iterator_traits::difference_type, + typename std::iterator_traits::pointer, + typename std::iterator_traits::reference> { + PairIter it; + + public: + iterator() {} + explicit iterator(PairIter i) : it(i) {} + iterator &operator=(PairIter i) { + it = i; + return *this; + } + iterator &operator++() { + ++it; + return *this; + } + iterator &operator--() { + --it; + return *this; + } + iterator operator++(int) { + auto copy = *this; + ++it; + return copy; + } + iterator operator--(int) { + auto copy = *this; + --it; + return copy; + } + bool operator==(const iterator &i) const { return it == i.it; } + bool operator!=(const iterator &i) const { return it != i.it; } + decltype(*&it->second) operator*() const { return it->second; } + decltype(&it->second) operator->() const { return &it->second; } + } b, e; + + template + IterValues(U &map) : b(map.begin()), e(map.end()) {} // NOLINT(runtime/explicit) + IterValues(PairIter b, PairIter e) : b(b), e(e) {} + iterator begin() const { return b; } + iterator end() const { return e; } + + protected: + IterValues() {} +}; + +template +struct IterValuesCopy : IterValues { + Map m; + explicit IterValuesCopy(Map &&map) : m(std::move(map)) { + // move the map into this object, then setup the iterators + this->b = m.begin(); + this->e = m.end(); + } +}; + +template +IterValues Values(Map &m) { + return IterValues(m); +} + +template +IterValues Values(const Map &m) { + return IterValues(m); +} + +template +IterValuesCopy Values(Map &&m) { + return IterValuesCopy(std::move(m)); +} + +template +IterValues Values(std::pair range) { + return IterValues(range.first, range.second); +} + +/* iterate over the values for a single key in a multimap */ +template +class MapForKey { + M ↦ + typename M::key_type key; + class iterator { + const MapForKey &self; + decltype(map.begin()) it; + + public: + iterator(const MapForKey &s, decltype(map.begin()) i) : self(s), it(i) {} + iterator &operator++() { + if (++it != self.map.end() && it->first != self.key) it = self.map.end(); + return *this; + } + bool operator==(const iterator &i) const { return it == i.it; } + bool operator!=(const iterator &i) const { return it != i.it; } + decltype(*&it->second) operator*() const { return it->second; } + decltype(&it->second) operator->() const { return &it->second; } + }; + + public: + MapForKey(M &m, typename M::key_type k) : map(m), key(k) {} + iterator begin() const { return iterator(*this, map.find(key)); } + iterator end() const { return iterator(*this, map.end()); } +}; + +template +MapForKey ValuesForKey(M &m, typename M::key_type k) { + return MapForKey(m, k); +} + +#endif /* BACKENDS_TOFINO_BF_ASM_MAP_H_ */ diff --git a/backends/tofino/bf-asm/mask_counter.h b/backends/tofino/bf-asm/mask_counter.h new file mode 100644 index 00000000000..caa93b1d696 --- /dev/null +++ b/backends/tofino/bf-asm/mask_counter.h @@ -0,0 +1,65 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_MASK_COUNTER_H_ +#define BACKENDS_TOFINO_BF_ASM_MASK_COUNTER_H_ + +#include + +#include "lib/bitvec.h" + +class MaskCounter { + unsigned mask, val; + bool oflo; + + public: + explicit MaskCounter(unsigned m) : mask(m), val(0), oflo(false) {} + explicit operator bool() const { return !oflo; } + operator unsigned() const { return val; } + bool operator==(const MaskCounter &a) const { return val == a.val && oflo == a.oflo; } + MaskCounter &operator++() { + val = ((val | ~mask) + 1) & mask; + if (val == 0) oflo = true; + return *this; + } + MaskCounter operator++(int) { + MaskCounter tmp(*this); + ++*this; + return tmp; + } + MaskCounter &operator--() { + val = (val - 1) & mask; + if (val == mask) oflo = true; + return *this; + } + MaskCounter operator--(int) { + MaskCounter tmp(*this); + --*this; + return tmp; + } + MaskCounter &clear() { + val = 0; + oflo = false; + return *this; + } + MaskCounter &overflow(bool v = true) { + oflo = v; + return *this; + } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_MASK_COUNTER_H_ */ diff --git a/backends/tofino/bf-asm/match_source.h b/backends/tofino/bf-asm/match_source.h new file mode 100644 index 00000000000..0014e0ed38f --- /dev/null +++ b/backends/tofino/bf-asm/match_source.h @@ -0,0 +1,84 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_MATCH_SOURCE_H_ +#define BACKENDS_TOFINO_BF_ASM_MATCH_SOURCE_H_ + +#include +#include + +#include "backends/tofino/bf-asm/asm-types.h" +#include "lib/stringify.h" + +/** + * A source for a match key of a table. The source can either be from the input xbar, or from the + * galois field matrix, as indicated in uArch Section Exact Match Row Vertical/Horizontal (VH) + * Xbars. This class is the parent of HashMatchSource and Phv::Ref. + */ +class MatchSource : public IHasDbPrint { + public: + virtual int fieldlobit() const = 0; + virtual int fieldhibit() const = 0; + virtual unsigned size() const = 0; + virtual int slicelobit() const = 0; + virtual int slicehibit() const = 0; + virtual const char *name() const = 0; + virtual int get_lineno() const = 0; + virtual std::string toString() const = 0; + virtual void dbprint(std::ostream &out) const = 0; +}; + +/** + * The source used by proxy hash tables for their match key. + */ +class HashMatchSource : public MatchSource { + int lo = 0; + int hi = 0; + + public: + int lineno = 0; + HashMatchSource(int line, int l, int h) : lo(l), hi(h), lineno(line) {} + explicit HashMatchSource(value_t value) { + if (CHECKTYPE(value, tCMD)) { + lineno = value.lineno; + if (value != "hash_group") + error(value.lineno, "Hash Match source must come from a hash group"); + if (value.vec.size != 2) error(value.lineno, "Hash Match source requires a range"); + if (CHECKTYPE(value.vec[1], tRANGE)) { + lo = value.vec[1].range.lo; + hi = value.vec[1].range.hi; + } + } + } + + int get_lineno() const override { return lineno; } + int fieldlobit() const override { return lo < 0 ? 0 : lo; } + int fieldhibit() const override { return hi < 0 ? 0 : hi; } + unsigned size() const override { return hi >= lo && lo >= 0 ? hi - lo + 1 : 0; } + int slicelobit() const override { return fieldlobit(); } + int slicehibit() const override { return fieldhibit(); } + const char *name() const override { return "hash_group"; } + std::string toString() const override { + std::stringstream str; + str << *this; + return str.str(); + } + + void dbprint(std::ostream &out) const { out << name() << "(" << lo << ".." << hi << ")"; } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_MATCH_SOURCE_H_ */ diff --git a/backends/tofino/bf-asm/match_table.cpp b/backends/tofino/bf-asm/match_table.cpp new file mode 100644 index 00000000000..7a602c4728d --- /dev/null +++ b/backends/tofino/bf-asm/match_table.cpp @@ -0,0 +1,700 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "action_bus.h" +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "input_xbar.h" +#include "instruction.h" +#include "lib/algorithm.h" +#include "misc.h" + +Table::Format *MatchTable::get_format() const { + if (!format && gateway) return gateway->get_format(); + return format.get(); +} + +Table::Format::Field *MatchTable::lookup_field(const std::string &n, const std::string &act) const { + auto *rv = format ? format->field(n) : nullptr; + if (!rv && gateway) rv = gateway->lookup_field(n, act); + return rv; +} + +void MatchTable::common_init_setup(const VECTOR(pair_t) & data, bool ternary, + P4Table::type p4type) { + Table::common_init_setup(data, ternary, p4type); + setup_logical_id(); + if (Target::DYNAMIC_CONFIG()) + if (auto *dconfig = get(data, "dynamic_config")) + if (CHECKTYPESIZE(*dconfig, tMAP)) + for (auto &kv : dconfig->map) dynamic_config.emplace_back(this, kv); + for (auto &kv : data) + if (kv.key == "input_xbar" && CHECKTYPESIZE(kv.value, tMAP)) + input_xbar.emplace_back(InputXbar::create(this, ternary, kv.key, kv.value.map)); +} + +bool MatchTable::common_setup(pair_t &kv, const VECTOR(pair_t) & data, P4Table::type p4type) { + if (Table::common_setup(kv, data, p4type)) { + return true; + } + if (kv.key == "input_xbar" || kv.key == "hash_dist") { + /* done in common_init_setup */ + return true; + } + if (kv.key == "dynamic_config" && Target::DYNAMIC_CONFIG()) { + /* done in common_init_setup */ + return true; + } + if (kv.key == "always_run") { + if ((always_run = get_bool(kv.value)) && !Target::SUPPORT_ALWAYS_RUN()) + error(kv.key.lineno, "always_run not supported on %s", Target::name()); + return true; + } + if (kv.key == "gateway") { + if (CHECKTYPE(kv.value, tMAP)) { + gateway = GatewayTable::create(kv.key.lineno, name_ + " gateway", gress, stage, -1, + kv.value.map); + gateway->set_match_table(this, false); + } + return true; + } + if (kv.key == "idletime") { + if (CHECKTYPE(kv.value, tMAP)) { + idletime = IdletimeTable::create(kv.key.lineno, name_ + " idletime", gress, stage, -1, + kv.value.map); + idletime->set_match_table(this, false); + } + return true; + } + if (kv.key == "selector") { + attached.selector.setup(kv.value, this); + return true; + } + if (kv.key == "selector_length") { + attached.selector_length.setup(kv.value, this); + return true; + } + if (kv.key == "meter_color") { + attached.meter_color.setup(kv.value, this); + return true; + } + if (kv.key == "stats") { + if (kv.value.type == tVEC) + for (auto &v : kv.value.vec) attached.stats.emplace_back(v, this); + else + attached.stats.emplace_back(kv.value, this); + return true; + } + if (kv.key == "meter") { + if (kv.value.type == tVEC) + for (auto &v : kv.value.vec) attached.meters.emplace_back(v, this); + else + attached.meters.emplace_back(kv.value, this); + return true; + } + if (kv.key == "stateful") { + if (kv.value.type == tVEC) + for (auto &v : kv.value.vec) attached.statefuls.emplace_back(v, this); + else + attached.statefuls.emplace_back(kv.value, this); + return true; + } + if (kv.key == "table_counter") { + if (kv.value == "table_miss") + table_counter = TABLE_MISS; + else if (kv.value == "table_hit") + table_counter = TABLE_HIT; + else if (kv.value == "gateway_miss") + table_counter = GATEWAY_MISS; + else if (kv.value == "gateway_hit") + table_counter = GATEWAY_HIT; + else if (kv.value == "gateway_inhibit") + table_counter = GATEWAY_INHIBIT; + else if (kv.value == "disabled") + table_counter = DISABLED; + else + error(kv.value.lineno, "Invalid table counter %s", value_desc(kv.value)); + return true; + } + return false; +} + +bool MatchTable::is_attached(const Table *tbl) const { + return tbl && (tbl == gateway || tbl == idletime || get_attached()->is_attached(tbl)); +} + +bitvec MatchTable::compute_reachable_tables() { + Table::compute_reachable_tables(); + if (gateway) reachable_tables_ |= gateway->reachable_tables(); + if (idletime) reachable_tables_ |= idletime->reachable_tables(); + reachable_tables_ |= attached.compute_reachable_tables(); + return reachable_tables_; +} + +/** + * Return the first default found meter type of a stateful/meter call. If the meter type + * is considered to be default, then all of the meter types would be identical + */ +METER_ACCESS_TYPE MatchTable::default_meter_access_type(bool for_stateful) { + METER_ACCESS_TYPE rv = NOP; + auto actions = get_actions(); + if (actions == nullptr) return rv; + for (auto it = actions->begin(); it != actions->end(); it++) { + if (it->default_only) continue; + for (auto &call : it->attached) { + auto type = call->table_type(); + if (!((type == Table::METER && !for_stateful) || + (type == Table::STATEFUL && for_stateful))) + continue; + // Currently the first argument is the meter type + if (call.args[0].type == Table::Call::Arg::Const) { + return static_cast(call.args[0].value()); + } else if (auto n = call.args[0].name()) { + if (auto *st = call->to()) { + if (auto *act = st->actions->action(call.args[0].name())) { + return static_cast((act->code << 1) | 1); + } + } + } + } + } + return rv; +} + +std::vector MatchTable::get_calls() const { + std::vector rv = Table::get_calls(); + if (attached.selector) rv.emplace_back(attached.selector); + if (attached.selector_length) rv.emplace_back(attached.selector_length); + for (auto &c : attached.stats) + if (c) rv.emplace_back(c); + for (auto &c : attached.meters) + if (c) rv.emplace_back(c); + for (auto &c : attached.statefuls) + if (c) rv.emplace_back(c); + if (attached.meter_color) rv.emplace_back(attached.meter_color); + return rv; +} + +void MatchTable::pass0() { + LOG1("### match table " << name() << " pass0 " << loc()); +#if 0 + // redundant with (and supercedes) choose_logical_id in pass2. That function is much + // better, taking dependencies into account, so logical_id should not be allocated here + alloc_id("logical", logical_id, stage->pass1_logical_id, + LOGICAL_TABLES_PER_STAGE, true, stage->logical_id_use); +#endif + if (logical_id >= 0) { + if (stage->logical_id_use[logical_id] && stage->logical_id_use[logical_id] != this) { + error(lineno, "Duplicate logical id %d use", logical_id); + error(stage->logical_id_use[logical_id]->lineno, "previous use here"); + } + stage->logical_id_use[logical_id] = this; + } + for (auto physid : physical_ids) { + if (stage->physical_id_use[physid] && stage->physical_id_use[physid] != this) { + error(lineno, "Duplicate physical id %d use", physid); + error(stage->physical_id_use[physid]->lineno, "previous use here"); + } + stage->physical_id_use[physid] = this; + } + if (action.check() && action->set_match_table(this, !action.is_direct_call()) != ACTION) + error(action.lineno, "%s is not an action table", action->name()); + attached.pass0(this); +} + +void MatchTable::pass1() { + if (gateway) { + // needs to happen before Actions::pass1 so that extra_next_lut is setup + gateway->setup_map_indexing(this); + } + Table::pass1(); + if (!p4_table) + p4_table = P4Table::alloc(P4Table::MatchEntry, this); + else + p4_table->check(this); + // Set up default action. This will look up action and/or tind for default + // action if the match_table doesnt have one specified + if (default_action.empty()) default_action = get_default_action(); + if (table_counter >= GATEWAY_MISS && !gateway) + error(lineno, "Can't count gateway events on table %s as it doesn't have a gateway", + name()); + if (!p4_params_list.empty()) { + for (auto &p : p4_params_list) { + // bit_width_full should be generated in assembly as 'full_size' in + // the 'p4_param_order'. This is the full size of the field as used + // in p4 program. + if (!p.bit_width_full) p.bit_width_full = p.bit_width; + + std::size_t found = p.name.find(".$valid"); + if (found != std::string::npos) p.is_valid = true; + } + } + if (idletime) { + idletime->logical_id = logical_id; + idletime->pass1(); + } + for (auto &ixb : input_xbar) ixb->pass1(); + for (auto &hd : hash_dist) hd.pass1(this, HashDistribution::OTHER, false); + if (gateway) { + gateway->logical_id = logical_id; + gateway->pass1(); + } +} + +void Table::allocate_physical_ids(unsigned usable) { + if (physical_ids) { + auto unusable = physical_ids - bitvec(usable); + BUG_CHECK(unusable.empty(), "table %s using physical id %d which appears to be invalid", + name(), *unusable.begin()); + return; + } + if (!Target::MATCH_REQUIRES_PHYSID()) return; + for (int i = 0; i < PHYSICAL_TABLES_PER_STAGE; ++i) { + if (!((usable >> i) & 1)) continue; + if (stage->physical_id_use[i]) continue; + physical_ids[i] = 1; + stage->physical_id_use[i] = this; + return; + } + error(lineno, "No physical id available for table %s", name()); +} + +void MatchTable::pass3() { + if (gateway) { + gateway->pass3(); + } +} + +void MatchTable::gen_idletime_tbl_cfg(json::map &stage_tbl) const { + if (idletime) idletime->gen_stage_tbl_cfg(stage_tbl); +} + +#include "jbay/match_table.cpp" // NOLINT(build/include) +#include "tofino/match_table.cpp" // NOLINT(build/include) + +template +void MatchTable::write_common_regs(typename TARGET::mau_regs ®s, int type, Table *result) { + /* this follows the order and behavior in stage_match_entry_table.py + * it can be reorganized to be clearer */ + + /*------------------------ + * data path + *-----------------------*/ + if (gress == EGRESS) regs.dp.imem_table_addr_egress |= 1 << logical_id; + + /*------------------------ + * Match Merge + *-----------------------*/ + auto &merge = regs.rams.match.merge; + auto &adrdist = regs.rams.match.adrdist; + if (gress != GHOST) merge.predication_ctl[gress].table_thread |= 1 << logical_id; + if (gress == INGRESS || gress == GHOST) { + merge.logical_table_thread[0].logical_table_thread_ingress |= 1 << logical_id; + merge.logical_table_thread[1].logical_table_thread_ingress |= 1 << logical_id; + merge.logical_table_thread[2].logical_table_thread_ingress |= 1 << logical_id; + } else if (gress == EGRESS) { + merge.logical_table_thread[0].logical_table_thread_egress |= 1 << logical_id; + merge.logical_table_thread[1].logical_table_thread_egress |= 1 << logical_id; + merge.logical_table_thread[2].logical_table_thread_egress |= 1 << logical_id; + } + adrdist.adr_dist_table_thread[timing_thread(gress)][0] |= 1 << logical_id; + adrdist.adr_dist_table_thread[timing_thread(gress)][1] |= 1 << logical_id; + + Actions *actions = action && action->actions ? action->actions.get() : this->actions.get(); + + std::set result_buses; + if (result) { + actions = result->action && result->action->actions ? result->action->actions.get() + : result->actions.get(); + for (auto &row : result->layout) { + int r_bus = row.row * 2; + if (row.bus.count(Layout::RESULT_BUS)) + r_bus += row.bus.at(Layout::RESULT_BUS) & 1; + else if (row.bus.count(Layout::TIND_BUS)) + r_bus += row.bus.at(Layout::TIND_BUS); + else + continue; + result_buses.insert(r_bus); + } + } else { + /* ternary match with no indirection table */ + auto tern_table = this->to(); + BUG_CHECK(tern_table != nullptr); + if (tern_table->indirect_bus >= 0) result_buses.insert(tern_table->indirect_bus); + result = this; + } + + for (auto r_bus : result_buses) { + auto &shift_en = merge.mau_payload_shifter_enable[type][r_bus]; + setup_muxctl(merge.match_to_logical_table_ixbar_outputmap[type][r_bus], logical_id); + setup_muxctl(merge.match_to_logical_table_ixbar_outputmap[type + 2][r_bus], logical_id); + + int default_action = 0; + unsigned adr_mask = 0; + unsigned adr_default = 0; + unsigned adr_per_entry_en = 0; + + /** + * This section of code determines the registers required to determine the + * instruction code to run for this particular table. This uses the information + * provided by the instruction code. + * + * The address is built of two parts, the instruction code and the per flow enable + * bit. These can either come from overhead, or from the default register. + * The keyword $DEFAULT indicates that the value comes from the default + * register + */ + auto instr_call = instruction_call(); + // FIXME: Workaround until a format is provided on the gateway to find the + // action bit section. This will be a quick add on. + if (instr_call.args[0] == "$DEFAULT") { + for (auto it = actions->begin(); it != actions->end(); it++) { + if (it->code != -1) { + adr_default |= it->addr; + break; + } + } + } else if (auto field = instr_call.args[0].field()) { + adr_mask |= (1U << field->size) - 1; + } + + if (instr_call.args[1] == "$DEFAULT") { + adr_default |= ACTION_INSTRUCTION_ADR_ENABLE; + } else if (auto field = instr_call.args[1].field()) { + if (auto addr_field = instr_call.args[0].field()) { + adr_per_entry_en = field->bit(0) - addr_field->bit(0); + } else { + adr_per_entry_en = 0; + } + } + shift_en.action_instruction_adr_payload_shifter_en = 1; + merge.mau_action_instruction_adr_mask[type][r_bus] = adr_mask; + merge.mau_action_instruction_adr_default[type][r_bus] = adr_default; + merge.mau_action_instruction_adr_per_entry_en_mux_ctl[type][r_bus] = adr_per_entry_en; + + if (idletime) idletime->write_merge_regs(regs, type, r_bus); + if (result->action) { + if (auto adt = result->action->to()) { + merge.mau_actiondata_adr_default[type][r_bus] = + adt->determine_default(result->action); + } + shift_en.actiondata_adr_payload_shifter_en = 1; + } + if (!get_attached()->stats.empty()) shift_en.stats_adr_payload_shifter_en = 1; + if (!get_attached()->meters.empty() || !get_attached()->statefuls.empty()) + shift_en.meter_adr_payload_shifter_en = 1; + + result->write_merge_regs(regs, type, r_bus); + } + + /*------------------------ + * Action instruction Address + *-----------------------*/ + int max_code = actions->max_code; + if (options.match_compiler) + if (auto *action_format = lookup_field("action")) + max_code = (1 << (action_format->size - (gateway ? 1 : 0))) - 1; + /** + * The action map can be used if the choices for the instruction are < 8. The map data + * table will be used if the number of choices are between 2 and 8, and references + * the instruction call to determine whether the instruction comes from the map + * data table or the default register. + */ + auto instr_call = instruction_call(); + bool use_action_map = + instr_call.args[0].field() && max_code < ACTION_INSTRUCTION_SUCCESSOR_TABLE_DEPTH; + // FIXME: Workaround until a format is provided on the gateway to find the + // action bit section. This will be a quick add on. + + if (use_action_map) { + merge.mau_action_instruction_adr_map_en[type] |= (1U << logical_id); + for (auto &act : *actions) + if ((act.name != result->default_action) || !result->default_only_action) { + merge.mau_action_instruction_adr_map_data[type][logical_id][act.code / 4] + .set_subfield(act.addr + ACTION_INSTRUCTION_ADR_ENABLE, + (act.code % 4) * TARGET::ACTION_INSTRUCTION_MAP_WIDTH, + TARGET::ACTION_INSTRUCTION_MAP_WIDTH); + } + } + + /** + * This register is now the responsiblity of the driver for all tables, as the driver + * will initialize this value from the initial default action. If we ever want to + * move some of this responsibility back to the compiler, then this code can be used + * for this, but it is currently incorrect for tables that have been split across + * multiple stages for non noop default actions. + if (this->to()) { + merge.mau_action_instruction_adr_miss_value[logical_id] = 0; + } else if (!default_action.empty()) { + auto *act = actions->action(default_action); + merge.mau_action_instruction_adr_miss_value[logical_id] = + ACTION_INSTRUCTION_ADR_ENABLE + act->addr; + } else if (!result->default_action.empty()) { + auto *act = actions->action(result->default_action); + merge.mau_action_instruction_adr_miss_value[logical_id] = + ACTION_INSTRUCTION_ADR_ENABLE + act->addr; } + */ + + /** + * No direct call for a next table, like instruction. The next table can be determined + * from other parameters. If there is a next parameter in the format, then this is the + * field to be used as an extractor. + * + * If there is no next field, but there is more than one possible entry in the hitmap, + * then the action instruction is being used as the index. + * + * If necessary, i.e. something becomes more complex, then perhaps a call needs to be + * added. + * + * Also, a quick note that though the match_next_table_adr_default is not necessary to set, + * the diagram in 6.4.3.3. Next Table Processing, the default register is after the mask. + * However, in hardware, the default register is before the mask. + */ + int next_field_size = result->get_format_field_size("next"); + int action_field_size = result->get_format_field_size("action"); + + if (next_field_size > 0) { + next_table_adr_mask = ((1U << next_field_size) - 1); + } else if (result->get_hit_next().size() > 1) { + next_table_adr_mask = ((1U << action_field_size) - 1); + } + write_next_table_regs(regs, result); + + /*------------------------ + * Immediate data found in overhead + *-----------------------*/ + if (result->format) { + for (auto &row : result->layout) { + int r_bus = row.row * 2; + if (row.bus.count(Layout::RESULT_BUS)) + r_bus += row.bus.at(Layout::RESULT_BUS) & 1; + else if (row.bus.count(Layout::TIND_BUS)) + r_bus += row.bus.at(Layout::TIND_BUS); + else + continue; + merge.mau_immediate_data_mask[type][r_bus] = bitMask(result->format->immed_size); + if (result->format->immed_size > 0) + merge.mau_payload_shifter_enable[type][r_bus].immediate_data_payload_shifter_en = 1; + } + } + if (result->action_bus) { + result->action_bus->write_immed_regs(regs, result); + for (auto &mtab : get_attached()->meters) { + // if the meter table outputs something on the action-bus of the meter + // home row, need to set up the action hv xbar properly + result->action_bus->write_action_regs(regs, result, mtab->home_row(), 0); + } + for (auto &stab : get_attached()->statefuls) { + // if the stateful table outputs something on the action-bus of the meter + // home row, need to set up the action hv xbar properly + result->action_bus->write_action_regs(regs, result, stab->home_row(), 0); + } + } + + // FIXME: + // The action parameters that are stored as immediates in the match + // overhead need to be properly packed into this register. We had been + // previously assuming that the compiler would do that for us, specifying + // the bits needed here as the argument to the action call; eg assembly + // code like: + // default_action: actname(0x100) + // for the default action being actname with the value 0x100 for its + // parameters stored as immediates (which might actually be several + // parameters in the P4 source code.) To get this from the + // default_action_parameters map, we need to look up those argument names + // in the match table format and action aliases and figure out which ones + // correspond to match immediates, and pack the values appropriately. + // Doable but non-trivial, probably requiring a small helper function. Need + // to deal with both exact match and ternary indirect. + // + // For now, most miss configuration registers are only written by the driver + // (since the user API says what miss behavior to perform). The compiler + // (glass) relies on the driver to write them but this could change in + // future. This particular register would only be set if the compiler chose + // to allocate action parameters in match overhead. + // + // if (default_action_parameters.size() > 0) + // merge.mau_immediate_data_miss_value[logical_id] = default_action_parameters[0]; + // else if (result->default_action_parameters.size() > 0) + // merge.mau_immediate_data_miss_value[logical_id] = result->default_action_parameters[0]; + + for (auto &ixb : input_xbar) ixb->write_regs(regs); + /* DANGER -- you might think we should call write_regs on other related things here + * (actions, hash_dist, idletime, gateway) rather than just input_xbar, but those are + * all called by the various callers of this method. Not clear why input_xbar is + * different */ + + if (gress == EGRESS) regs.cfg_regs.mau_cfg_lt_thread |= 1U << logical_id; + if (options.match_compiler && dynamic_cast(this)) return; // skip the rest + + if (table_counter) { + merge.mau_table_counter_ctl[logical_id / 8U].set_subfield(table_counter, + 3 * (logical_id % 8U), 3); + } else { // Set to TABLE_HIT by default + merge.mau_table_counter_ctl[logical_id / 8U].set_subfield(TABLE_HIT, 3 * (logical_id % 8U), + 3); + } +} + +int MatchTable::get_address_mau_actiondata_adr_default(unsigned log2size, bool per_flow_enable) { + int huffman_ones = log2size > 2 ? log2size - 3 : 0; + BUG_CHECK(huffman_ones < 7); + int rv = (1 << huffman_ones) - 1; + rv = ((rv << 10) & 0xf8000) | (rv & 0x1f); + if (!per_flow_enable) rv |= 1 << 22; + return rv; +} + +/** + * Generates the hash_bits node for a single hash_function node in the JSON. + * + * Will add the impact of a single hash_table (64 bit section of the input xbar) to the hash + * bits. If the table requires multiple hash_tables, then the previous hash table value will + * be looked up and added. FIXME: At some point refactor this function to not keep + * doing this rewrite. + * + * The JSON for each hash bit has the following: + * hash_bit - The hash bit in which this is output on the Galois matrix. (Really whatever + * this bit position is just has to coordinate across the other driver structures, but + * those are also based on the Galois matrix position). + * seed - the bit that is xored in at the end of the calcuation + * bits_to_xor - The field bits from the P4 API that will determine the value of this bit, + * and must be XORed for this bit. This is a vector of fields with 4 values. + * - field_bit - The p4 field bit to be XORed + * - field_name - The p4 field name to be XORed + * The next two parameters are only needed for dynamic_key_masks, as they indicate + * to the driver which bit to turn off + * - hash_match_group - Which 128 bit input xbar group this bit is appearing in (0-7) + * - hash_match_group_bit - The bit offset within the 128 bit input xbar group. + */ +void MatchTable::gen_hash_bits(const std::map &hash_table, + InputXbar::HashTable hash_table_id, json::vector &hash_bits, + unsigned hash_group_no, bitvec hash_bits_used) const { + for (auto &col : hash_table) { + if (!hash_bits_used.getbit(col.first)) continue; + json::map hash_bit; + bool hash_bit_added = false; + json::vector *bits_to_xor = nullptr; + // FIXME: This function has a lot of unnecessary copying and moving around. + for (auto &hb : hash_bits) { + if (hb->to()["hash_bit"]->to() == json::number(col.first)) { + bits_to_xor = &(hb->to()["bits_to_xor"]->to()); + hash_bit_added = true; + } + } + if (!hash_bit_added) bits_to_xor = &(hash_bit["bits_to_xor"] = json::vector()); + hash_bit["hash_bit"] = col.first; + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + hash_bit["seed"] = input_xbar[0]->get_seed_bit(hash_group_no, col.first); + for (const auto &bit : col.second.data) { + if (auto ref = input_xbar[0]->get_hashtable_bit(hash_table_id, bit)) { + std::string field_name, global_name; + field_name = ref.name(); + + auto field_bit = remove_name_tail_range(field_name) + ref.lobit(); + global_name = field_name; + + // Look up this field in the param list to get a custom key + // name, if present. + auto p = find_p4_param(field_name); + if (!p && !p4_params_list.empty()) { + warning(col.second.lineno, + "Cannot find field name %s in p4_param_order " + "for table %s", + field_name.c_str(), name()); + } else if (p && !p->key_name.empty()) { + field_name = p->key_name; + } + auto group = input_xbar[0]->hashtable_input_group(hash_table_id); + int group_bit = bit; + // FIXME -- this adjustment is a hack for tofino1/2. Should have a virtual + // method on InputXbar? or something in Target? + if (group.index != hash_table_id.index && (hash_table_id.index & 1)) + group_bit += 64; + bits_to_xor->push_back( + json::map{{"field_bit", json::number(field_bit)}, + {"field_name", json::string(field_name)}, + {"global_name", json::string(global_name)}, + {"hash_match_group", json::number(group.index)}, + {"hash_match_group_bit", json::number(group_bit)}}); + } + } + if (!hash_bit_added) hash_bits.push_back(std::move(hash_bit)); + } +} + +void MatchTable::add_hash_functions(json::map &stage_tbl) const { + json::vector &hash_functions = stage_tbl["hash_functions"] = json::vector(); + // TODO: Hash functions are not generated for ALPM atcams as the + // partition index bits used in hash which is a compiler generated field and + // should not be in 'match_key_fields'. The tests in p4factory are written + // with match_spec to not include the partition index field. Glass also + // generates an empty 'hash_functions' node + if (is_alpm()) return; + // Emit hash info only if p4_param_order (match_key_fields) are present + // FIXME: This input_xbar is populated if its a part of the hash_action + // table or the hash_distribution which is incorrect. This should move + // inside the hash_dist so this condition does not occur in the + // hash_action table + bitvec hash_matrix_req; + hash_matrix_req.setrange(0, EXACT_HASH_GROUP_SIZE); + if (!p4_params_list.empty() && !input_xbar.empty()) { + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + auto ht = input_xbar[0]->get_hash_tables(); + if (ht.size() > 0) { + // Merge all bits to xor across multiple hash ways in single + // json::vector for each hash bit + for (const auto &hash_table : ht) { + json::map hash_function; + json::vector &hash_bits = hash_function["hash_bits"] = json::vector(); + hash_function["hash_function_number"] = hash_table.first.uid(); + gen_hash_bits(hash_table.second, hash_table.first, hash_bits, + hash_table.first.uid(), hash_matrix_req); + hash_functions.push_back(std::move(hash_function)); + } + } + } +} + +void MatchTable::add_all_reference_tables(json::map &tbl, Table *match_table) const { + auto mt = (!match_table) ? this : match_table; + json::vector &action_data_table_refs = tbl["action_data_table_refs"]; + json::vector &selection_table_refs = tbl["selection_table_refs"]; + json::vector &meter_table_refs = tbl["meter_table_refs"]; + json::vector &statistics_table_refs = tbl["statistics_table_refs"]; + json::vector &stateful_table_refs = tbl["stateful_table_refs"]; + add_reference_table(action_data_table_refs, mt->action); + if (auto a = mt->get_attached()) { + if (a->selector) { + unsigned sel_mask = (1U << METER_TYPE_START_BIT) - 1; + sel_mask &= ~((1U << SELECTOR_LOWER_HUFFMAN_BITS) - 1); + add_reference_table(selection_table_refs, a->selector); + } + for (auto &m : a->meters) { + add_reference_table(meter_table_refs, m); + } + for (auto &s : a->stats) { + add_reference_table(statistics_table_refs, s); + } + for (auto &s : a->statefuls) { + add_reference_table(stateful_table_refs, s); + } + } +} diff --git a/backends/tofino/bf-asm/meter.cpp b/backends/tofino/bf-asm/meter.cpp new file mode 100644 index 00000000000..64cb1419823 --- /dev/null +++ b/backends/tofino/bf-asm/meter.cpp @@ -0,0 +1,1032 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "data_switchbox.h" +#include "input_xbar.h" +#include "misc.h" + +// target specific template specializations +#include "jbay/meter.h" +#include "tofino/meter.h" + +Table::Layout::bus_type_t MeterTable::default_bus_type() const { + // FIXME -- this is a bit of a hack -- if color_mapram_addr has been set, we want the + // bus_type for color maprams, not for the meter proper (which should not actually + // have a bus specified?) + if (color_mapram_addr == IDLE_MAP_ADDR) return Layout::IDLE_BUS; + warning(lineno, "meter table should not have bus:, will be ignored"); + return Layout::SEARCH_BUS; +} + +void MeterTable::setup(VECTOR(pair_t) & data) { + common_init_setup(data, false, P4Table::Stateful); + for (auto &kv : MapIterChecked(data, true)) { + if (common_setup(kv, data, P4Table::Meter)) { + } else if (kv.key == "input_xbar") { + if (CHECKTYPE(kv.value, tMAP)) + input_xbar.emplace_back(InputXbar::create(this, false, kv.key, kv.value.map)); + } else if (kv.key == "color_aware") { + if (kv.value == "per_flow") + color_aware = color_aware_per_flow_enable = true; + else + color_aware = get_bool(kv.value); + } else if (kv.key == "color_maprams") { + if (CHECKTYPE(kv.value, tMAP)) { + if (auto addr_type = get(kv.value.map, "address")) { + if (CHECKTYPE(*addr_type, tSTR)) { + if (*addr_type == "idletime") + color_mapram_addr = IDLE_MAP_ADDR; + else if (*addr_type == "stats") + color_mapram_addr = STATS_MAP_ADDR; + else + error(addr_type->lineno, "Unrecognized color mapram address type %s", + addr_type->s); + } + } + setup_layout(color_maprams, kv.value.map, " color_maprams"); + if (auto *vpn = get(kv.value.map, "vpns")) + if (CHECKTYPE(*vpn, tVEC)) setup_vpns(color_maprams, &vpn->vec, true); + } + } else if (kv.key == "pre_color") { + if (CHECKTYPE(kv.value, tCMD)) { + if (kv.value != "hash_dist") + error(kv.value.lineno, "Pre color must come from hash distribution"); + if (kv.value.vec.size != 3) + error(kv.value.lineno, + "Pre color hash distribution requires two parameters," + " but has %d", + kv.value.vec.size); + if (CHECKTYPE(kv.value.vec[1], tINT)) pre_color_hash_dist_unit = kv.value.vec[1].i; + if (CHECKTYPE(kv.value.vec[2], tRANGE)) { + auto range = kv.value.vec[2]; + int diff = range.range.hi - range.range.lo + 1; + if (diff != 2 || range.range.lo % 2 != 0) + error(kv.value.lineno, "Invalid hash distribution range for precolor"); + pre_color_bit_lo = range.range.lo; + } + } + } else if (kv.key == "type") { + if (kv.value == "standard") + type = STANDARD; + else if (kv.value == "lpf") + type = LPF; + else if (kv.value == "wred") + type = RED; + else + error(kv.value.lineno, "Unknown meter type %s", value_desc(kv.value)); + } else if (kv.key == "red_output") { + if (CHECKTYPE(kv.value, tMAP)) { + for (auto &v : kv.value.map) { + if (CHECKTYPE(v.key, tSTR) && CHECKTYPE(v.value, tINT)) { + if (v.key == "drop") + red_drop_value = v.value.i; + else if (v.key == "nodrop") + red_nodrop_value = v.value.i; + else + error(kv.value.lineno, "Unknown meter red param: %s", v.key.s); + } + } + } + } else if (kv.key == "count") { + if (kv.value == "bytes") + count = BYTES; + else if (kv.value == "packets") + count = PACKETS; + else + error(kv.value.lineno, "Unknown meter count %s", value_desc(kv.value)); + } else if (kv.key == "teop") { + if (gress != EGRESS) error(kv.value.lineno, "tEOP can only be used in EGRESS"); + if (!Target::SUPPORT_TRUE_EOP()) + error(kv.value.lineno, "tEOP is not available on device"); + if (CHECKTYPE(kv.value, tINT)) { + teop = kv.value.i; + if (teop < 0 || teop > 3) + error(kv.value.lineno, "Invalid tEOP bus %d, valid values are 0-3", teop); + } + BUG_CHECK(!stage->teop[teop].first, + "previously used tEOP bus %d used again in stage %d", teop, stage->stageno); + stage->teop[teop] = {true, stage->stageno}; + } else if (kv.key == "green") { + if (CHECKTYPE(kv.value, tINT)) { + green_value = kv.value.i; + } + } else if (kv.key == "yellow") { + if (CHECKTYPE(kv.value, tINT)) { + yellow_value = kv.value.i; + } + } else if (kv.key == "red") { + if (CHECKTYPE(kv.value, tINT)) { + red_value = kv.value.i; + } + } else if (kv.key == "profile") { + if (CHECKTYPE(kv.value, tINT)) { + profile = kv.value.i; + } + } else if (kv.key == "sweep_interval") { + if (CHECKTYPE(kv.value, tINT)) { + // sweep_interval value in assembly if present is from + // meter_sweep_interval pragma in p4 program. Allowed values for + // the meter_sweep_interval register are [0:20]. but [5:20] are + // only to be used with shifting meter time scale. We check and + // throw an error if value is present and not in range[0:4] + int intvl = kv.value.i; + if (intvl >= 0 && intvl <= 4) + sweep_interval = intvl; + else + error( + lineno, + "Invalid meter sweep interval of %d. Allowed values are in the range[0:4]", + intvl); + } + } else if (kv.key == "bytecount_adjust") { + if (CHECKTYPE(kv.value, tINT)) { + bytecount_adjust = kv.value.i; + } + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } + if (teop >= 0 && count != BYTES) error(lineno, "tEOP bus can only used when counting bytes"); + if (Target::SRAM_GLOBAL_ACCESS()) + alloc_global_srams(); + else + alloc_rams(true, stage->sram_use); +} + +void MeterTable::pass1() { + LOG1("### Meter table " << name() << " pass1 " << loc()); + if (!p4_table) + p4_table = P4Table::alloc(P4Table::Meter, this); + else + p4_table->check(this); + alloc_vpns(); + alloc_maprams(); + if (color_maprams.empty() && type != LPF && type != RED) + error(lineno, "Missing color_maprams in meter table %s", name()); + if (uses_colormaprams() && color_mapram_addr == NO_COLOR_MAP) + error(lineno, "Missing color mapram address type in table %s", name()); + for (auto &r : color_maprams) { + for (auto &memunit : r.memunits) { + BUG_CHECK(memunit.row == r.row, "memunit on wrong row"); + if (Table *old = stage->mapram_use[r.row][memunit.col]) + error(r.lineno, + "Table %s trying to use mapram %d,%d for color, which is " + "in use by table %s", + name(), r.row, memunit.col, old->name()); + stage->mapram_use[r.row][memunit.col] = this; + } + } + if (!no_vpns && !color_maprams.empty() && color_maprams[0].vpns.empty()) + setup_vpns(color_maprams, 0); + std::sort(layout.begin(), layout.end(), + [](const Layout &a, const Layout &b) -> bool { return a.row > b.row; }); + stage->table_use[timing_thread(gress)] |= Stage::USE_METER; + if (type == LPF || type == RED) + stage->table_use[timing_thread(gress)] |= Stage::USE_METER_LPF_RED; + for (auto &ixb : input_xbar) ixb->pass1(); + for (auto &hd : hash_dist) hd.pass1(this, HashDistribution::OTHER, false); + int prev_row = -1; + for (auto &row : layout) { + if (home_rows.count(row.row)) prev_row = -1; + + if (prev_row >= 0) + need_bus(lineno, stage->overflow_bus_use, row.row, "Overflow"); + else + need_bus(lineno, stage->meter_bus_use, row.row, "Meter data"); + for (int r = (row.row + 1) | 1; r < prev_row; r += 2) + need_bus(lineno, stage->overflow_bus_use, r, "Overflow"); + prev_row = row.row; + } + Synth2Port::pass1(); +} + +void MeterTable::pass2() { + LOG1("### Meter table " << name() << " pass2 " << loc()); + for (auto &ixb : input_xbar) ixb->pass2(); + + for (auto match_table : get_match_tables()) { + for (auto &hd : match_table->hash_dist) { + if (hd.id == pre_color_hash_dist_unit) { + hd.meter_pre_color = true; + hd.meter_mask_index = pre_color_bit_lo / 2; + } + } + } + if (get_match_tables().size() > 1 && color_mapram_addr == IDLE_MAP_ADDR) + error(lineno, "Shared meter cannot use idletime addressing for color maprams"); + for (auto &hd : hash_dist) hd.pass2(this); +} + +void MeterTable::pass3() { LOG1("### Meter table " << name() << " pass3 " << loc()); } + +int MeterTable::direct_shiftcount() const { + return 64 + METER_ADDRESS_ZERO_PAD - 7; // meters are always 128 bits wide +} + +int MeterTable::indirect_shiftcount() const { + return METER_ADDRESS_ZERO_PAD - 7; // meters are always 128 bits wide +} + +int MeterTable::address_shift() const { + return 7; // meters are always 128 bits wide +} + +int MeterTable::color_shiftcount(Table::Call &call, int group, int tcam_shift) const { + int extra_padding = 0; + int zero_pad = 0; + if (color_mapram_addr == IDLE_MAP_ADDR) { + extra_padding = IDLETIME_ADDRESS_ZERO_PAD - IDLETIME_HUFFMAN_BITS; + zero_pad = IDLETIME_ADDRESS_ZERO_PAD; + } else if (color_mapram_addr == STATS_MAP_ADDR) { + extra_padding = STAT_ADDRESS_ZERO_PAD - STAT_METER_COLOR_LOWER_HUFFMAN_BITS; + zero_pad = STAT_ADDRESS_ZERO_PAD; + } + + if (call.args[0].name() && strcmp(call.args[0].name(), "$DIRECT") == 0) { + return 64 + tcam_shift + extra_padding; + } else if (auto f = call.args[0].field()) { + return f->by_group[group]->bit(0) % 128U + extra_padding; + } else if (auto f = call.args[1].field()) { + return f->bit(0) + zero_pad; + } else { + return 0; + } +} + +unsigned MeterTable::determine_shiftcount(Table::Call &call, int group, unsigned word, + int tcam_shift) const { + return determine_meter_shiftcount(call, group, word, tcam_shift); +} + +template +void MeterTable::write_merge_regs_vt(REGS ®s, MatchTable *match, int type, int bus, + const std::vector &args) { + auto &merge = regs.rams.match.merge; + unsigned adr_mask = 0U; + unsigned per_entry_en_mux_ctl = 0U; + unsigned adr_default = 0U; + unsigned meter_type_position = 0U; + METER_ACCESS_TYPE default_type = match->default_meter_access_type(false); + AttachedTable::determine_meter_merge_regs(match, type, bus, args, default_type, adr_mask, + per_entry_en_mux_ctl, adr_default, + meter_type_position); + merge.mau_meter_adr_default[type][bus] = adr_default; + merge.mau_meter_adr_mask[type][bus] = adr_mask; + merge.mau_meter_adr_per_entry_en_mux_ctl[type][bus] = per_entry_en_mux_ctl; + merge.mau_meter_adr_type_position[type][bus] = meter_type_position; +} + +template +void MeterTable::write_color_regs(REGS ®s, MatchTable *match, int type, int bus, + const std::vector &args) { + BUG_CHECK(uses_colormaprams(), "meter %s does not use color maprams, but uses color?", name()); + auto &merge = regs.rams.match.merge; + unsigned adr_mask = 0U; + unsigned per_entry_en_mux_ctl = 0U; + unsigned adr_default = 0U; + unsigned meter_type_position = 0U; + AttachedTable::determine_meter_merge_regs(match, type, bus, args, METER_COLOR_ACCESS, adr_mask, + per_entry_en_mux_ctl, adr_default, + meter_type_position); + + // Based on the uArch section 6.2.8.4.9 Map RAM Addressing, color maprams can be + // addressed by either idletime or stats based addresses. Which address is used + // can be specified in the asm file, and is built according to the specification + + if (color_mapram_addr == IDLE_MAP_ADDR) { + unsigned idle_mask = (1U << IDLETIME_ADDRESS_BITS) - 1; + unsigned full_idle_mask = (1U << IDLETIME_FULL_ADDRESS_BITS) - 1; + unsigned shift_diff = METER_LOWER_HUFFMAN_BITS - IDLETIME_HUFFMAN_BITS; + merge.mau_idletime_adr_mask[type][bus] = (adr_mask >> shift_diff) & idle_mask; + merge.mau_idletime_adr_default[type][bus] = (adr_default >> shift_diff) & full_idle_mask; + if (per_entry_en_mux_ctl > shift_diff) + merge.mau_idletime_adr_per_entry_en_mux_ctl[type][bus] = + per_entry_en_mux_ctl - shift_diff; + else + merge.mau_idletime_adr_per_entry_en_mux_ctl[type][bus] = 0; + } else if (color_mapram_addr == STATS_MAP_ADDR) { + unsigned stats_mask = (1U << STAT_ADDRESS_BITS) - 1; + unsigned full_stats_mask = (1U << STAT_FULL_ADDRESS_BITS) - 1; + unsigned shift_diff = METER_LOWER_HUFFMAN_BITS - STAT_METER_COLOR_LOWER_HUFFMAN_BITS; + merge.mau_stats_adr_mask[type][bus] = (adr_mask >> shift_diff) & stats_mask; + merge.mau_stats_adr_default[type][bus] = (adr_default >> shift_diff) & full_stats_mask; + if (per_entry_en_mux_ctl > shift_diff) + merge.mau_stats_adr_per_entry_en_mux_ctl[type][bus] = per_entry_en_mux_ctl - shift_diff; + else + merge.mau_stats_adr_per_entry_en_mux_ctl[type][bus] = 0; + } else { + BUG(); + } +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void MeterTable::write_color_regs, mau_regs &, + MatchTable *, int, int, const std::vector &); + +template +void MeterTable::setup_exact_shift(REGS ®s, int bus, int group, int word, int word_group, + Call &meter_call, Call &color_call) { + auto &merge = regs.rams.match.merge; + int shiftcount = determine_shiftcount(meter_call, group, word, 0); + merge.mau_meter_adr_exact_shiftcount[bus][word_group] = shiftcount; + if (uses_colormaprams()) { + int color_shift = color_shiftcount(color_call, group, 0); + if (color_mapram_addr == IDLE_MAP_ADDR) { + merge.mau_idletime_adr_exact_shiftcount[bus][word_group] = color_shift; + merge.mau_payload_shifter_enable[0][bus].idletime_adr_payload_shifter_en = 1; + } else if (color_mapram_addr == STATS_MAP_ADDR) { + merge.mau_stats_adr_exact_shiftcount[bus][word_group] = color_shift; + merge.mau_payload_shifter_enable[0][bus].stats_adr_payload_shifter_en = 1; + } + } +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void MeterTable::setup_exact_shift, mau_regs &, + int, int, int, int, Call &, Call &); + +template +void MeterTable::setup_tcam_shift(REGS ®s, int bus, int tcam_shift, Call &meter_call, + Call &color_call) { + auto &merge = regs.rams.match.merge; + int shiftcount = determine_shiftcount(meter_call, 0, 0, tcam_shift); + merge.mau_meter_adr_tcam_shiftcount[bus] = shiftcount; + if (uses_colormaprams()) { + int color_shift = color_shiftcount(color_call, 0, tcam_shift); + if (color_mapram_addr == IDLE_MAP_ADDR) { + merge.mau_idletime_adr_tcam_shiftcount[bus] = color_shift; + merge.mau_payload_shifter_enable[1][bus].idletime_adr_payload_shifter_en = 1; + } else if (color_mapram_addr == STATS_MAP_ADDR) { + merge.mau_stats_adr_tcam_shiftcount[bus] = color_shift; + merge.mau_payload_shifter_enable[1][bus].stats_adr_payload_shifter_en = 1; + } + } +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void MeterTable::setup_tcam_shift, mau_regs &, + int, int, Call &, Call &); + +template +void MeterTable::write_regs_home_row(REGS ®s, unsigned row) { + auto &map_alu = regs.rams.map_alu; + auto &map_alu_row = map_alu.row[row]; + auto &adrdist = regs.rams.match.adrdist; + unsigned side = 1; // Meter can only be on right side + int minvpn, maxvpn; + layout_vpn_bounds(minvpn, maxvpn, true); + + if (home_rows.size() > 1) { + int sparevpn; + layout_vpn_bounds(minvpn, sparevpn, false); + bool block_start = false; + bool block_end = false; + minvpn = INT_MAX; + maxvpn = INT_MIN; + for (Layout &logical_row : layout) { + // Block Start with the home row and End with the Spare VPN + if (logical_row.row / 2U == row) block_start = true; + + if (block_start) { + for (auto v : logical_row.vpns) { + if (v == sparevpn) { + block_end = true; + break; + } + + if (v < minvpn) minvpn = v; + if (v > maxvpn) maxvpn = v; + } + } + if (block_end) { + BUG_CHECK(minvpn != INT_MAX && maxvpn != INT_MIN); + break; + } + } + BUG_CHECK(block_start && block_end); + } + + int meter_group_index = row / 2U; + auto &meter = map_alu.meter_group[meter_group_index].meter; + auto &meter_ctl = meter.meter_ctl; + auto &red_value_ctl = meter.red_value_ctl; + + int first_home_row = *home_rows.begin(); + if (count == BYTES) { + auto meter_bytecount_adjust_size = meter_ctl.meter_bytecount_adjust.size(); + auto meter_bytecount_adjust_mask = ((1U << meter_bytecount_adjust_size) - 1); + int bytecount_adjust_max = (1U << (meter_bytecount_adjust_size - 1)) - 1; + int bytecount_adjust_min = -1 * (1U << (meter_bytecount_adjust_size - 1)); + if (bytecount_adjust > bytecount_adjust_max || bytecount_adjust < bytecount_adjust_min) { + error(lineno, + "The bytecount adjust value of %d on meter %s " + "does not fit within allowed range for %d bits - { %d, %d }", + bytecount_adjust, name(), meter_bytecount_adjust_size, bytecount_adjust_min, + bytecount_adjust_max); + } + meter_ctl.meter_bytecount_adjust = bytecount_adjust & meter_bytecount_adjust_mask; + } + auto &delay_ctl = map_alu.meter_alu_group_data_delay_ctl[meter_group_index]; + delay_ctl.meter_alu_right_group_delay = + Target::METER_ALU_GROUP_DATA_DELAY() + row / 4 + stage->tcam_delay(gress); + switch (type) { + case LPF: + meter_ctl.lpf_enable = 1; + delay_ctl.meter_alu_right_group_enable = 1; + break; + case RED: + meter_ctl.lpf_enable = 1; + meter_ctl.red_enable = 1; + delay_ctl.meter_alu_right_group_enable = 1; + red_value_ctl.red_nodrop_value = red_nodrop_value; + red_value_ctl.red_drop_value = red_drop_value; + break; + default: + meter_ctl.meter_enable = 1; + // RNG: + // Enables random number generator for meter probabilistic charging + // when green/yellow burst size exponent > 14. This should be set + // when any meter entry in the table has a burstsize exponent > 14 + // RNG is also enabled whenever red_enable config bit is set. + + // this should always be turned on + // for color-based meters, to handle an issue with large burst + // sizes. This applies to both packet-based and byte-based meters. + // Mike F said, "The hardware adjusts the rate under the hood to + // match the desired rate. Without enabling the RNG, the hardware + // will always overcharge the buckets thereby reducing the rate." + meter_ctl.meter_rng_enable = 1; + meter_ctl.meter_time_scale = profile; + break; + } + if (count == BYTES) meter_ctl.meter_byte = 1; + if (gress == EGRESS) meter_ctl.meter_alu_egress = 1; + auto &error_ctl = map_alu.meter_alu_group_error_ctl[meter_group_index]; + error_ctl.meter_alu_group_ecc_error_enable = 1; + error_ctl.meter_alu_group_thread = gress; + auto &meter_sweep_ctl = adrdist.meter_sweep_ctl[meter_group_index]; + // The driver will manage turning on the meter sweep enable, + // so the compiler should not configure this value (check glass + // code) + // meter_sweep_ctl.meter_sweep_en = 1; + meter_sweep_ctl.meter_sweep_offset = minvpn; + meter_sweep_ctl.meter_sweep_size = maxvpn; + meter_sweep_ctl.meter_sweep_remove_hole_pos = 0; // FIXME -- see CSR? + meter_sweep_ctl.meter_sweep_remove_hole_en = 0; // FIXME + meter_sweep_ctl.meter_sweep_interval = sweep_interval + profile; + for (auto &ixb : input_xbar) { + auto &vh_adr_xbar = regs.rams.array.row[row].vh_adr_xbar; + auto &data_ctl = regs.rams.array.row[row].vh_xbar[side].stateful_meter_alu_data_ctl; + // FIXME: Currently in the compiler, the data headed to the meter alu/stateful alu + // can only come from hash or the search bus, but not both, thus it is + // currenlty safe for them to be mutually exclusive. If the compiler was to + // allocate fields to both, this would have to interpret the information + // correctly + auto hashdata_bytemask = bitmask2bytemask(ixb->hash_group_bituse()); + if (hashdata_bytemask != 0U) { + vh_adr_xbar.alu_hashdata_bytemask.alu_hashdata_bytemask_right = hashdata_bytemask; + setup_muxctl(vh_adr_xbar.exactmatch_row_hashadr_xbar_ctl[2 + side], ixb->hash_group()); + } else { + // FIXME: Need to be some validation between Tofino and JBay if the input + // xbar is valid for these meters. + bitvec bytemask = ixb->bytemask(); + bytemask >>= bytemask.min().index(); + unsigned u_bytemask = bytemask.getrange(0, bytemask.max().index() + 1); + data_ctl.stateful_meter_alu_data_bytemask = u_bytemask; + data_ctl.stateful_meter_alu_data_xbar_ctl = 8 | ixb->match_group(); + } + } + if (output_used) { + auto &action_ctl = map_alu.meter_alu_group_action_ctl[meter_group_index]; + action_ctl.right_alu_action_enable = 1; + action_ctl.right_alu_action_delay = stage->meter_alu_delay(gress, false); + auto &switch_ctl = regs.rams.array.switchbox.row[row].ctl; + switch_ctl.r_action_o_mux_select.r_action_o_sel_action_rd_r_i = 1; + // disable action data address huffman decoding, on the assumtion we're not + // trying to combine this with an action data table on the same home row. + // Otherwise, the huffman decoding will think this is an 8-bit value and + // replicate it. + regs.rams.array.row[row] + .action_hv_xbar.action_hv_xbar_disable_ram_adr.action_hv_xbar_disable_ram_adr_right = 1; + } + map_alu_row.i2portctl.synth2port_vpn_ctl.synth2port_vpn_base = minvpn; + map_alu_row.i2portctl.synth2port_vpn_ctl.synth2port_vpn_limit = maxvpn; + auto &movereg_meter_ctl = adrdist.movereg_meter_ctl[meter_group_index]; + if (run_at_eop()) movereg_meter_ctl.movereg_meter_ctl_deferred = 1; + movereg_meter_ctl.movereg_ad_meter_shift = 7; + movereg_meter_ctl.movereg_meter_ctl_lt = logical_id; + if (direct) movereg_meter_ctl.movereg_meter_ctl_direct = 1; + movereg_meter_ctl.movereg_meter_ctl_color_en = 1; + for (MatchTable *m : match_tables) { + if (direct) adrdist.movereg_ad_direct[1] |= 1U << m->logical_id; + // The first ALU will drive this xbar register + if (first_home_row / 4U == meter_group_index) { + adrdist.movereg_ad_meter_alu_to_logical_xbar_ctl[m->logical_id / 8U].set_subfield( + 4 | meter_group_index, 3 * (m->logical_id % 8U), 3); + } + } +} + +template +void MeterTable::write_mapram_color_regs(REGS ®s, bool &push_on_overflow) { + auto &map_alu = regs.rams.map_alu; + auto &adrdist = regs.rams.match.adrdist; + auto &merge = regs.rams.match.merge; + int curr_home_row = -1; + + for (Layout &row : color_maprams) { + curr_home_row = get_home_row_for_row(row.row * 2); + // Allocating color maprams above home row is invalid + // as color writes can only be distributed to maprams + // via buses going on the home row or below + BUG_CHECK(curr_home_row / 4U >= row.row / 2U); + + int color_map_color = color_maprams.empty() ? 0 : (curr_home_row / 4U) & 1; + if (row.row == curr_home_row / 2) { /* on the home row */ + if (color_map_color) + map_alu.mapram_color_switchbox.row[row.row] + .ctl.r_color1_mux_select.r_color1_sel_color_r_i = 1; + else + map_alu.mapram_color_switchbox.row[row.row] + .ctl.r_color0_mux_select.r_color0_sel_color_r_i = 1; + } else if (row.row / 4U == curr_home_row / 8U) { /* same half as home */ + if (color_map_color) + map_alu.mapram_color_switchbox.row[row.row] + .ctl.r_color1_mux_select.r_color1_sel_oflo_color_r_i = 1; + else + map_alu.mapram_color_switchbox.row[row.row] + .ctl.r_color0_mux_select.r_color0_sel_oflo_color_r_i = 1; + } else { /* other half from home */ + map_alu.mapram_color_switchbox.row[row.row].ctl.t_oflo_color_o_mux_select = 1; + merge.mau_match_central_mapram_read_color_oflo_ctl |= 1U << color_map_color; + } + + /* + * Below diagrams show how select bits are set to + * route color data from meter alu located on the home + * row down to the color maprams + * ********************************************* + * - ROUTE FROM RIGHT TO BOTTOM + * Bus coming from Meter ALU on current home row + * .------------ + * | r_color_write_i + * v + * .---. + * | |<---- (select = 1'b1) + * | | b_oflo_color_write_o_sel_r_color_write_i + * .___. + * | + * | b_oflo_color_write_o + * v + * Bus going to color map rams below + * + * ********************************************* + * - ROUTE FROM TOP TO BOTTOM + * Bus coming from home row above + * | + * | t_oflo_color_write_i + * v + * .---. + * | |<---- (select = 1'b1) + * | | b_oflo_color_write_o_sel_t_oflo_color_write_i + * .___. + * | + * | b_oflo_color_write_o + * v + * Bus going to color map rams below + * + * ********************************************* + * - ROUTE FROM TOP TO RIGHT + * Bus coming from home row above + * | + * | t_oflo_color_write_i + * v + * .---. + * | |<---- (select = 1'b1) + * | | r_oflo_color_write_o_mux_select + * .___. + * | + * | r_oflo_color_write_o + * .----------------> + * Bus going to color map rams on right + * + * ********************************************* + * + * A - Meter 1 Map Rams + * a - Meter 1 Color Map Rams + * B - Meter 1 Map Rams + * b - Meter 1 Color Map Rams + * + * Log Phy Columns SW Mtr + * Row Row 0 1 2 3 4 5 Box ALU + * .---..---..---..---..---..---. + * 15 7 | A || A || A || A || A || A | 3 3 + * .___..___..___..___..___..___. + * .---..---..---..---..---..---. + * 13 6 | A || A || A || A || a || a | + * .___..___..___..___..___..___. + * .---..---..---..---..---..---. + * 11 5 | B || B || B || B || B || a | 2 2 + * .___..___..___..___..___..___. + * .---..---..---..---..---..---. + * 9 4 | B || B || B || B || B || b | + * .___..___..___..___..___..___. + * .---..---..---..---..---..---. + * 7 3 | b || b || - || - || - || - | 1 1 + * .___..___..___..___..___..___. + * + * Meter Color Write Switchbox is configured to + * - set b_oflo_color_write_o_sel_r_color_write_i (1'b1) + * This routes meter alu 3 data down to rows 6 & 5 where + * meter 1 color maprams are located [6,4] [6,5] [5,5] + * + * Meter ALU 2 is configured to + * - set b_oflo_color_write_o_sel_t_oflo_color_write_i (1'b1) + * This routes meter alu data from above to the + * meter 1 color mapram located at [5,5] + * - set b_oflo_color_write_o_sel_r_color_write_i (1'b1) + * This routes meter alu 2 data down to rows 4 & 3 where + * meter 2 color maprams are located [4,5] [3,0] [3,1] + */ + if (row.row != curr_home_row / 2) { /* ALU home row */ + map_alu.mapram_color_write_switchbox[curr_home_row / 4U] + .ctl.b_oflo_color_write_o_mux_select.b_oflo_color_write_o_sel_r_color_write_i = 1; + map_alu.mapram_color_write_switchbox[row.row / 2U].ctl.r_oflo_color_write_o_mux_select = + 1; + BUG_CHECK(curr_home_row / 4U >= row.row / 2U); + /* b_oflo_color_write_o_sel_t_oflo_color_write_i must be set for all + * switchboxes below the homerow and above current row + * It should never be set for a switchbox above the home row + * It should never be set on the switchbox on the current row + * as that would drive the top overflow down to any color maprams below. + * This is invalid and can cause corruption if there is another meter occupying + * color maprams on the below row. + */ + // Switch box below home row + int switchbox_upper = curr_home_row / 4U - 1; + // Switch box above current row + int switchbox_lower = row.row % 2 ? (int)row.row / 2U + 1 : (int)row.row / 2U; + for (int i = switchbox_upper; i >= switchbox_lower; i--) { + if (i == 3) continue; // Never set on top switchbox + + map_alu.mapram_color_write_switchbox[i] + .ctl.b_oflo_color_write_o_mux_select + .b_oflo_color_write_o_sel_t_oflo_color_write_i = 1; + } + } + auto &map_alu_row = map_alu.row[row.row]; + auto vpn = row.vpns.begin(); + if (color_mapram_addr == STATS_MAP_ADDR) { + BUG_CHECK((row.row % 2) == 0); + for (MatchTable *m : match_tables) + adrdist.mau_ad_stats_virt_lt[row.row / 2] |= (1U << m->logical_id); + } + // Enable the row to be used (even if only color maprams are on this row) + map_alu_row.i2portctl.synth2port_ctl.synth2port_enable = 1; + // If the color mapram is not on the same row as the meter ALU, even if no meter + // RAMs are on the same row, the address still needs to overflow to that row + if (row.row < curr_home_row / 2) { + auto &adr_ctl = map_alu_row.vh_xbars.adr_dist_oflo_adr_xbar_ctl[1]; + // Mapram rows are 0-7, not 0-15 like logical rows + if (curr_home_row >= UPPER_MATCH_CENTRAL_FIRST_LOGICAL_ROW && + row.row < UPPER_MATCH_CENTRAL_FIRST_ROW) { + adr_ctl.adr_dist_oflo_adr_xbar_source_index = 0; + adr_ctl.adr_dist_oflo_adr_xbar_source_sel = AdrDist::OVERFLOW; + push_on_overflow = true; + BUG_CHECK(options.target == TOFINO); + } else { + adr_ctl.adr_dist_oflo_adr_xbar_source_index = curr_home_row % 8; + adr_ctl.adr_dist_oflo_adr_xbar_source_sel = AdrDist::METER; + } + adr_ctl.adr_dist_oflo_adr_xbar_enable = 1; + } + + for (auto &memunit : row.memunits) { + int col = memunit.col; + BUG_CHECK(memunit.stage == INT_MIN && memunit.row == row.row, "bogus %s in row %d", + memunit.desc(), row.row); + auto &mapram_config = map_alu_row.adrmux.mapram_config[col]; + if (row.row == curr_home_row / 2) + mapram_config.mapram_color_bus_select = MapRam::ColorBus::COLOR; + else + mapram_config.mapram_color_bus_select = MapRam::ColorBus::OVERFLOW; + mapram_config.mapram_type = MapRam::COLOR; + mapram_config.mapram_logical_table = logical_id; + BUG_CHECK(vpn != row.vpns.end(), "vpn not found!"); + mapram_config.mapram_vpn = *vpn; + // These two registers must be programmed for meter-color map rams in this way as a + // work-around for hardware issue as described in TOF-1944 + // The basic problem is that software reads of the meter color map ram are only + // returning 6-bits of data instead of the necessary 8-bits. Hardware defaults to + // 6 bits, since the meter color map ram case is not explicitly called out. + // By setting these bits, all 8-bits will be returned. + mapram_config.mapram_parity_generate = 1; + mapram_config.mapram_parity_check = 0; + // glass does not set ecc for color maprams? + // mapram_config.mapram_ecc_check = 1; + // mapram_config.mapram_ecc_generate = 1; + if (gress == INGRESS) + mapram_config.mapram_ingress = 1; + else + mapram_config.mapram_egress = 1; + mapram_config.mapram_enable = 1; + if (row.row != curr_home_row / 2) { /* ALU home row */ + mapram_config.mapram_color_write_bus_select = 1; + } + auto &ram_address_mux_ctl = map_alu_row.adrmux.ram_address_mux_ctl[1][col]; + if (row.row == curr_home_row / 2) { /* ALU home row */ + ram_address_mux_ctl.synth2port_radr_mux_select_home_row = 1; + } else { + ram_address_mux_ctl.synth2port_radr_mux_select_oflo = 1; + } + map_alu_row.i2portctl.synth2port_ctl.synth2port_mapram_color |= 1U << col; + ram_address_mux_ctl.map_ram_wadr_shift = 1; + ram_address_mux_ctl.map_ram_wadr_mux_select = MapRam::Mux::COLOR; + ram_address_mux_ctl.map_ram_wadr_mux_enable = 1; + ram_address_mux_ctl.map_ram_radr_mux_select_color = 1; + ram_address_mux_ctl.ram_ofo_stats_mux_select_statsmeter = 1; + // Indicating what bus to pull from, either stats or idletime for the color mapram + if (color_mapram_addr == IDLE_MAP_ADDR) { + ram_address_mux_ctl.ram_stats_meter_adr_mux_select_idlet = 1; + setup_muxctl(map_alu_row.vh_xbars.adr_dist_idletime_adr_xbar_ctl[col], + row.bus.at(Layout::IDLE_BUS) % 10); + } else if (color_mapram_addr == STATS_MAP_ADDR) { + ram_address_mux_ctl.ram_stats_meter_adr_mux_select_stats = 1; + } + if (gress) + regs.cfg_regs.mau_cfg_mram_thread[col / 3U] |= 1U << (col % 3U * 8U + row.row); + ++vpn; + } + } + + // Additional BUG_CHECK to verify that both these regs are not set on a switchbox + // - map_alu.mapram_color_write_switchbox[x].ctl.b_oflo_color_write_o_sel_r_color_write_i + // - map_alu.mapram_color_write_switchbox[x].ctl.b_oflo_color_write_o_sel_t_oflo_color_write_i + // Both these regs should never be set on a swithbox as it implies routing from both top and + // right map alu to the bottom rows. This leads to corruption of color data. + // Additional BUG_CHECK to verify that top row switchbox does not have + // this regs set + // - map_alu.mapram_color_write_switchbox[x].ctl.b_oflo_color_write_o_sel_t_oflo_color_write_i + for (int i = 0; i <= 3; i++) { + auto t_oflo_write_i = map_alu.mapram_color_write_switchbox[i] + .ctl.b_oflo_color_write_o_mux_select + .b_oflo_color_write_o_sel_t_oflo_color_write_i == 1; + if (i == 3) { + BUG_CHECK(!t_oflo_write_i, + "Color maprams have invalid configuration" + " may cause corruption of color data from meter"); + } + auto r_oflo_write_i = + map_alu.mapram_color_write_switchbox[i] + .ctl.b_oflo_color_write_o_mux_select.b_oflo_color_write_o_sel_r_color_write_i == 1; + LOG5("i: " << i << "t_oflo: " << t_oflo_write_i << ", r_oflo: " << r_oflo_write_i); + BUG_CHECK(!(t_oflo_write_i & r_oflo_write_i), + "Color maprams have invalid configuration" + " may cause corruption of color data from meter"); + } +} + +template +void MeterTable::write_regs_vt(REGS ®s) { + LOG1("### Meter table " << name() << " write_regs " << loc()); + for (auto &ixb : input_xbar) ixb->write_regs(regs); + Layout *home = nullptr; + bool push_on_overflow = false; + auto &map_alu = regs.rams.map_alu; + auto &adrdist = regs.rams.match.adrdist; + DataSwitchboxSetup *swbox = nullptr; + for (Layout &logical_row : layout) { + unsigned row = logical_row.row / 2U; + unsigned side = logical_row.row & 1; /* 0 == left 1 == right */ + BUG_CHECK(side == 1); /* no map rams or alus on left side anymore */ + auto vpn = logical_row.vpns.begin(); + auto mapram = logical_row.maprams.begin(); + auto &map_alu_row = map_alu.row[row]; + auto home_it = home_rows.find(logical_row.row); + if (home_it != home_rows.end()) { + home = &logical_row; + swbox = new DataSwitchboxSetup(regs, this, logical_row.row, + (++home_it == home_rows.end()) ? -1 : *home_it); + } + BUG_CHECK(home != nullptr); + LOG2("# DataSwitchbox.setup_row(" << row << ") home=" << home->row / 2U); + swbox->setup_row(row); + for (auto &memunit : logical_row.memunits) { + int logical_col = memunit.col; + BUG_CHECK(memunit.stage == INT_MIN && memunit.row == logical_row.row, + "bogus %s in logical row %d", memunit.desc(), logical_row.row); + unsigned col = logical_col + 6 * side; + LOG2("# DataSwitchbox.setup_row_col(" << row << ", " << col << ", vpn=" << *vpn + << ") home=" << home->row / 2U); + swbox->setup_row_col(row, col, *vpn); + write_mapram_regs(regs, row, *mapram, *vpn, MapRam::METER); + if (gress) regs.cfg_regs.mau_cfg_uram_thread[col / 4U] |= 1U << (col % 4U * 8U + row); + ++mapram, ++vpn; + } + if (&logical_row == home) { + write_regs_home_row(regs, row); + } else { + auto &adr_ctl = map_alu_row.vh_xbars.adr_dist_oflo_adr_xbar_ctl[side]; + if (home->row >= UPPER_MATCH_CENTRAL_FIRST_LOGICAL_ROW && + logical_row.row < UPPER_MATCH_CENTRAL_FIRST_LOGICAL_ROW) { + adr_ctl.adr_dist_oflo_adr_xbar_source_index = 0; + adr_ctl.adr_dist_oflo_adr_xbar_source_sel = AdrDist::OVERFLOW; + push_on_overflow = true; + BUG_CHECK(options.target == TOFINO); + } else { + adr_ctl.adr_dist_oflo_adr_xbar_source_index = home->row % 8; + adr_ctl.adr_dist_oflo_adr_xbar_source_sel = AdrDist::METER; + } + adr_ctl.adr_dist_oflo_adr_xbar_enable = 1; + } + } + auto &merge = regs.rams.match.merge; + write_mapram_color_regs(regs, push_on_overflow); + if (home_rows.size() > 1) write_alu_vpn_range(regs); + + for (int home_row : home_rows) { + for (MatchTable *m : match_tables) { + adrdist.adr_dist_meter_adr_icxbar_ctl[m->logical_id] |= 1U << (home_row / 4U); + // auto &icxbar = adrdist.adr_dist_meter_adr_icxbar_ctl[m->logical_id]; + // icxbar.address_distr_to_logical_rows = 1U << home->row; + // icxbar.address_distr_to_overflow = push_on_overflow; + // if (direct) + // regs.cfg_regs.mau_cfg_lt_meter_are_direct |= 1 << m->logical_id; + adrdist.meter_color_output_map[m->logical_id].set_subfield(green_value, 0, 8); + adrdist.meter_color_output_map[m->logical_id].set_subfield(yellow_value, 8, 8); + adrdist.meter_color_output_map[m->logical_id].set_subfield(yellow_value, 16, 8); + adrdist.meter_color_output_map[m->logical_id].set_subfield(red_value, 24, 8); + if (type != LPF) adrdist.meter_enable |= 1U << m->logical_id; + /*auto &movereg_ad_ctl = adrdist.movereg_ad_ctl[m->logical_id]; + movereg_ad_ctl.movereg_meter_deferred = 1; + if (!color_maprams.empty()) + movereg_ad_ctl.movereg_ad_idle_as_mc = 1; + else + movereg_ad_ctl.movereg_ad_stats_as_mc = 1; + movereg_ad_ctl.movereg_ad_direct_meter = direct; + movereg_ad_ctl.movereg_ad_meter_shift = 7; */ + meter_color_logical_to_phys(regs, m->logical_id, home_row / 4U); + adrdist.mau_ad_meter_virt_lt[home_row / 4U] |= 1 << m->logical_id; + } + if (run_at_eop()) { + if (teop >= 0) { + setup_teop_regs(regs, home_row / 4U); + } else { + adrdist.deferred_ram_ctl[1][home_row / 4U].deferred_ram_en = 1; + adrdist.deferred_ram_ctl[1][home_row / 4U].deferred_ram_thread = gress; + if (gress) regs.cfg_regs.mau_cfg_dram_thread |= 0x10 << (home_row / 4U); + } + adrdist.meter_bubble_req[timing_thread(gress)].bubble_req_1x_class_en |= + 1 << ((home_row / 4U) + 4); + } else { + adrdist.meter_bubble_req[timing_thread(gress)].bubble_req_1x_class_en |= + 1 << (home_row / 4U); + adrdist.packet_action_at_headertime[1][home_row / 4U] = 1; + } + if (push_on_overflow) { + adrdist.oflo_adr_user[0] = adrdist.oflo_adr_user[1] = AdrDist::METER; + adrdist.deferred_oflo_ctl = 1 << ((home_row - 8) / 2U); + } + if (gress == INGRESS || gress == GHOST) { + merge.meter_alu_thread[0].meter_alu_thread_ingress |= 1U << home_row / 4U; + merge.meter_alu_thread[1].meter_alu_thread_ingress |= 1U << home_row / 4U; + } else if (gress == EGRESS) { + merge.meter_alu_thread[0].meter_alu_thread_egress |= 1U << home_row / 4U; + merge.meter_alu_thread[1].meter_alu_thread_egress |= 1U << home_row / 4U; + } + } + + for (auto &hd : hash_dist) hd.write_regs(regs, this); +} + +// FIXME -- refactor these specializations better +template <> +void MeterTable::meter_color_logical_to_phys(Target::Tofino::mau_regs ®s, int logical_id, + int alu) { + auto &merge = regs.rams.match.merge; + auto &adrdist = regs.rams.match.adrdist; + if (!color_maprams.empty()) { + merge.mau_mapram_color_map_to_logical_ctl[logical_id / 8].set_subfield( + 0x4 | alu, 3 * (logical_id % 8U), 3); + + // Determining which buses to send the color mapram address to + if (color_mapram_addr == IDLE_MAP_ADDR) { + adrdist.movereg_idle_ctl[logical_id].movereg_idle_ctl_mc = 1; + for (auto lo : color_maprams) { + int bus_index = lo.bus.at(Layout::IDLE_BUS); + // upper and lower idletime busses appear to be independent with + // no overflow between them + if (lo.row >= UPPER_MATCH_CENTRAL_FIRST_ROW) bus_index += IDLETIME_BUSSES_PER_HALF; + adrdist.adr_dist_idletime_adr_oxbar_ctl[bus_index / 4].set_subfield( + logical_id | 0x10, 5 * (bus_index % 4), 5); + } + + } else if (color_mapram_addr == STATS_MAP_ADDR) { + for (auto lo : color_maprams) { + adrdist.adr_dist_stats_adr_icxbar_ctl[logical_id] |= (1U << (lo.row / 2)); + adrdist.packet_action_at_headertime[0][lo.row / 2] = 1; + } + } else { + BUG(); + } + setup_muxctl(adrdist.meter_color_logical_to_phys_ixbar_ctl[logical_id], alu); + } +} + +template <> +void MeterTable::meter_color_logical_to_phys(Target::JBay::mau_regs ®s, int logical_id, + int alu) { + auto &merge = regs.rams.match.merge; + auto &adrdist = regs.rams.match.adrdist; + if (!color_maprams.empty()) { + merge.mau_mapram_color_map_to_logical_ctl[alu] |= 1 << logical_id; + // Determining which buses to send the color mapram address to + if (color_mapram_addr == IDLE_MAP_ADDR) { + adrdist.movereg_idle_ctl[logical_id].movereg_idle_ctl_mc = 1; + for (auto lo : color_maprams) { + int bus_index = lo.bus.at(Layout::IDLE_BUS); + // No overflow bus exist between upper and lower half so every color mapram have + // to use their respective bus + if (lo.row >= UPPER_MATCH_CENTRAL_FIRST_ROW) bus_index += IDLETIME_BUSSES_PER_HALF; + adrdist.adr_dist_idletime_adr_oxbar_ctl[bus_index / 4].set_subfield( + logical_id | 0x10, 5 * (bus_index % 4), 5); + } + + } else if (color_mapram_addr == STATS_MAP_ADDR) { + for (auto lo : color_maprams) { + adrdist.adr_dist_stats_adr_icxbar_ctl[logical_id] |= (1U << (lo.row / 2)); + adrdist.packet_action_at_headertime[0][lo.row / 2] = 1; + } + } else { + BUG(); + } + } + adrdist.meter_color_logical_to_phys_icxbar_ctl[logical_id] |= 1 << alu; +} + +void MeterTable::gen_tbl_cfg(json::vector &out) const { + // FIXME -- factor common Synth2Port stuff + auto spare_mems = determine_spare_bank_memory_units(); + int size = (layout_size() - spare_mems.size()) * SRAM_DEPTH; + json::map &tbl = *base_tbl_cfg(out, "meter", size); + json::map &stage_tbl = *add_stage_tbl_cfg(tbl, "meter", size); + stage_tbl["color_memory_resource_allocation"] = + gen_memory_resource_allocation_tbl_cfg("map_ram", color_maprams); + switch (type) { + case STANDARD: + tbl["meter_type"] = "standard"; + tbl["meter_profile"] = profile; + break; + case LPF: + tbl["meter_type"] = "lpf"; + break; + case RED: + tbl["meter_type"] = "red"; + break; + default: + tbl["meter_type"] = "standard"; + break; + } + switch (count) { + case PACKETS: + tbl["meter_granularity"] = "packets"; + break; + case BYTES: + tbl["meter_granularity"] = "bytes"; + break; + default: + tbl["meter_granularity"] = "packets"; + break; + } + tbl["enable_color_aware_pfe"] = color_aware_per_flow_enable; + /* this is not needed. but the driver asserts on existence of + * this or enable_color_aware which both seem to be redundant */ + tbl["pre_color_field_name"] = ""; + tbl["enable_pfe"] = per_flow_enable; + tbl["pfe_bit_position"] = per_flow_enable_bit(); + tbl["color_aware_pfe_address_type_bit_position"] = 0; // FIXME + tbl["reference_dictionary"] = json::map(); // To be removed in future + stage_tbl["default_lower_huffman_bits_included"] = METER_LOWER_HUFFMAN_BITS; + if (home_rows.size() > 1) + add_alu_indexes(stage_tbl, "meter_alu_index"); + else + add_alu_index(stage_tbl, "meter_alu_index"); + if (context_json) stage_tbl.merge(*context_json); +} + +DEFINE_TABLE_TYPE_WITH_SPECIALIZATION(MeterTable, TARGET_CLASS) +FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void MeterTable::write_merge_regs, + (mau_regs & regs, MatchTable *match, int type, int bus, + const std::vector &args), + { write_merge_regs_vt(regs, match, type, bus, args); }) diff --git a/backends/tofino/bf-asm/misc.cpp b/backends/tofino/bf-asm/misc.cpp new file mode 100644 index 00000000000..28cddb8932f --- /dev/null +++ b/backends/tofino/bf-asm/misc.cpp @@ -0,0 +1,223 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "misc.h" + +#include +#include +#include + +#include "backends/tofino/bf-asm/target.h" +#include "bfas.h" + +int remove_name_tail_range(std::string &name, int *size) { + auto tail = name.rfind('.'); + if (tail == std::string::npos) return 0; + unsigned lo, hi; + int len = -1; + if (sscanf(&name[tail], ".%u-%u%n", &lo, &hi, &len) >= 2 && tail + len == name.size() && + hi >= lo) { + name.erase(tail); + if (size) *size = hi - lo + 1; + return lo; + } + return 0; +} + +std::string int_to_hex_string(unsigned val, unsigned width) { + std::stringstream sval; + sval << std::setfill('0') << std::setw(width) << std::hex << val << std::setfill(' '); + return sval.str(); +} + +void add_cfg_reg(json::vector &cfg_cache, std::string full_name, std::string name, + std::string val) { + json::map cfg_cache_reg; + cfg_cache_reg["fully_qualified_name"] = full_name; + cfg_cache_reg["name"] = name; + cfg_cache_reg["value"] = val; + cfg_cache.push_back(std::move(cfg_cache_reg)); +} + +bool check_zero_string(const std::string &s) { + char zero = '0'; + return s.find_first_not_of(zero) == std::string::npos; +} + +std::string get_filename(const char *s) { + std::string fname = s; + fname = fname.substr(fname.find_last_of("/") + 1); + fname = fname.substr(0, fname.find_last_of(".")); + return fname; +} + +std::string get_directory(const char *s) { + std::string fname = s; + auto tail = fname.find_last_of("/"); + if (tail == std::string::npos) + fname = "."; + else + fname = fname.substr(0, tail); + return fname; +} + +/* Given a p4 name, split into instance and field names if possible + * - else return a copy of the original name */ +void gen_instfield_name(const std::string &fullname, std::string &instname, + std::string &field_name) { + auto dotpos = fullname.rfind('.'); + if (dotpos == std::string::npos) { + instname = fullname; + field_name = std::string(); + } else { + instname = fullname.substr(0, dotpos); + field_name = fullname.substr(dotpos + 1, fullname.size()); + } +} + +uint64_t bitMask(unsigned size) { + BUG_CHECK(size <= 64 && "bitMask(size), maximum size is 64"); + if (size == 64) return ~UINT64_C(0); + return (UINT64_C(1) << size) - 1; +} + +uint64_t bitRange(unsigned lo, unsigned hi) { + BUG_CHECK(hi >= lo && hi < 64, "bitRange(%u,%u) invalid", lo, hi); + if (lo == 0 && hi + 1 == 64) return ~UINT64_C(0); + return ((UINT64_C(1) << (hi - lo + 1)) - 1) << lo; +} + +int parity(uint32_t v) { + v ^= v >> 16; + v ^= v >> 8; + v ^= v >> 4; + v ^= v >> 2; + v ^= v >> 1; + return v & 1; +} + +int parity_2b(uint32_t v) { + v ^= v >> 16; + v ^= v >> 8; + v ^= v >> 4; + v ^= v >> 2; + return v & 3; +} + +bool check_bigint_unsigned(value_t value, uint32_t byte_width) { + BUG_CHECK(value.type == tBIGINT); + + /* -- zero is in the range */ + if (value.bigi.size == 0) return true; + + constexpr uint64_t size_bigint_item(sizeof(value.bigi.data[0])); + + bool overflow(false); + + /* -- all items above the max_index must by zero */ + const uint64_t max_index(((byte_width + size_bigint_item - 1) / size_bigint_item) - 1); + for (int i(max_index + 1); i < value.bigi.size; ++i) { + if (value.bigi.data[i] != 0) { + overflow = true; + } + } + /* -- check limit in the boundary bigint part */ + if (value.bigi.size > max_index) { + const uint64_t ext_width(byte_width % size_bigint_item); + if (ext_width > 0 && value.bigi.data[max_index] >= (1 << (ext_width * 8))) { + overflow = true; + } + } + if (overflow) { + error(value.lineno, "the integer constant is wider than the requested width %u bytes", + byte_width); + return false; + } + + return true; +} + +bool input_int_match(const value_t value, match_t &match, int width) { + BUG_CHECK(width <= sizeof(match_t::word0) * 8); + + using MatchType = decltype(match_t::word0); + MatchType mask; + if (width < sizeof(MatchType) * 8) + mask = (1ULL << width) - 1; + else + mask = std::numeric_limits::max(); + if (value.type == tINT) { + if (!check_range_strict(value, 0, mask)) return false; + convert_i2m(value.i, match); + } else if (value.type == tBIGINT) { + /* -- As the match type is uint64_t and value_t::i is int64_t, constants + * above 0x7fffffffffffffff are passed as big integers. */ + if (value.bigi.size > 1) { + error(value.lineno, "the match constant is out of the expected range <0, %lu>", mask); + return false; + } + MatchType v(0); + if (value.bigi.size > 0) v = value.bigi.data[0]; + if (v > mask) { + error(value.lineno, "the match constant is out of the expected range <0, %lu>", mask); + return false; + } + convert_i2m(v, match); + } else { + value_t fixed_value = value; + fixed_value.m = value.m; + fix_match_star(fixed_value.m, mask); + if (!check_range_match(fixed_value, mask, width)) return false; + match = fixed_value.m; + } + return true; +} + +unsigned match_t::dirtcam(unsigned width, unsigned bit) { + static unsigned masks[] = {0x5555, 0x3333, 0xf0f0, 0xffff}; + BUG_CHECK(width <= 4, "dirtcam of more than 4 bits?"); + unsigned rv = (1U << (1U << width)) - 1; + for (unsigned i = 0; i < width; ++i, ++bit) { + if (!((word0 >> bit) & 1)) rv &= ~masks[i]; + if (!((word1 >> bit) & 1)) rv &= masks[i]; + } + return rv; +} + +unsigned wmatch_t::dirtcam(unsigned width, unsigned bit) { + static unsigned masks[] = {0x5555, 0x3333, 0xf0f0, 0xffff}; + BUG_CHECK(width <= 4, "dirtcam of more than 4 bits?"); + unsigned rv = (1U << (1U << width)) - 1; + for (unsigned i = 0; i < width; ++i, ++bit) { + // treat both bits 0 as don't care rather than never match + if (!word0[bit] && !word1[bit]) continue; + if (!word0[bit]) rv &= ~masks[i]; + if (!word1[bit]) rv &= masks[i]; + } + return rv; +} + +bool require_keys(const value_t &data, std::set keys) { + for (auto key : keys) { + pair_t *kv = data.map[key]; + if (!kv) { + error(data.lineno, "missing required key '%s'", key); + return false; + } + } + return true; +} diff --git a/backends/tofino/bf-asm/misc.h b/backends/tofino/bf-asm/misc.h new file mode 100644 index 00000000000..910e666a632 --- /dev/null +++ b/backends/tofino/bf-asm/misc.h @@ -0,0 +1,218 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_MISC_H_ +#define BACKENDS_TOFINO_BF_ASM_MISC_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "asm-types.h" +#include "backends/tofino/bf-asm/json.h" + +template +auto setup_muxctl(T ®, int val) -> decltype((void)reg.enabled_2bit_muxctl_enable) { + reg.enabled_2bit_muxctl_select = val; + reg.enabled_2bit_muxctl_enable = 1; +} +template +auto setup_muxctl(T ®, int val) -> decltype((void)reg.enabled_3bit_muxctl_enable) { + reg.enabled_3bit_muxctl_select = val; + reg.enabled_3bit_muxctl_enable = 1; +} +template +auto setup_muxctl(T ®, int val) -> decltype((void)reg.enabled_4bit_muxctl_enable) { + reg.enabled_4bit_muxctl_select = val; + reg.enabled_4bit_muxctl_enable = 1; +} +template +auto setup_muxctl(T ®, int val) -> decltype((void)reg.enabled_5bit_muxctl_enable) { + reg.enabled_5bit_muxctl_select = val; + reg.enabled_5bit_muxctl_enable = 1; +} +template +auto setup_muxctl(T ®, int val) -> decltype((void)reg.exactmatch_row_vh_xbar_enable) { + reg.exactmatch_row_vh_xbar_select = val; + reg.exactmatch_row_vh_xbar_enable = 1; +} + +template +void append(std::vector &a, const std::vector &b) { + for (auto &e : b) a.push_back(e); +} + +template +T join(const std::vector &vec, U sep) { + T rv; + bool first = true; + for (auto &el : vec) { + if (first) + first = false; + else + rv += sep; + rv += el; + } + return rv; +} + +extern int remove_name_tail_range(std::string &, int *size = nullptr); + +// Convert an integer to hex string of specified width (in bytes) +std::string int_to_hex_string(unsigned val, unsigned width); + +// Add a reg to CJSON Configuration Cache +void add_cfg_reg(json::vector &cfg_cache, std::string full_name, std::string name, std::string val); + +bool check_zero_string(const std::string &s); + +// Get filename +std::string get_filename(const char *s); +std::string get_directory(const char *s); + +/** Given a p4 name, eg. "inst.field", write "inst" to @instname and "field" to + * @fieldname. If @fullname cannot be split, writes @fullname to @instname and + * "" to @fieldname. + */ +void gen_instfield_name(const std::string &fullname, std::string &instname, std::string &fieldname); + +/// Compare pointers based on the pointed at type +/// For use as a Comparator for map/set types +template +struct ptrless { + bool operator()(const T *a, const T *b) const { return b ? a ? *a < *b : true : false; } + bool operator()(const std::unique_ptr &a, const std::unique_ptr &b) const { + return b ? a ? *a < *b : true : false; + } +}; + +/* word with size (lowest) bits set */ +uint64_t bitMask(unsigned size); +/* word with range of bits from lo to hi (inclusive) set */ +uint64_t bitRange(unsigned lo, unsigned hi); + +int parity(uint32_t v); +int parity_2b(uint32_t v); // two-bit parity (parity of pairs in the word) + +inline bool check_value(const value_t value, const decltype(value_t::i) expected) { + if (!CHECKTYPE(value, tINT)) return false; + if (value.i != expected) { + error(value.lineno, "unexpected value %ld; expected %ld", value.i, expected); + return false; + } + return true; +} + +/** + * @brief Check range of an input integer value (tINT) + * + * This method is designated mainly for checking input integer constants. The template + * parameter defines target type in which the value is going to be stored. As the + * higher limit is quite often 0xffff... we must handle signed and unsigned integers + * correctly. + * + * @tparam IntType Target type which the value will be stored in. + * @param value The checked value + * @param lo lower inclusive limit + * @param hi higher include limit + * @return False if the value is out of the specified limits + */ +template ::value>::type> +bool check_range_strict(value_t value, IntType lo, IntType hi) { + auto format_error_message([](value_t value, IntType lo, IntType hi) { + /* -- As we don't know actual type of the IntType, we cannot use the printf-like + * formatting. */ + std::ostringstream oss; + oss << "value " << value.i << " is out of allowed range <" << +lo << "; " << +hi << ">"; + error(value.lineno, "%s", oss.str().c_str()); + }); + + if (!CHECKTYPE(value, tINT)) return false; + + /* -- Handle different ranges (signed, unsigned, different size) of the value_t::i + * and IntType. */ + typedef boost::numeric::converter Converter; + if (Converter::out_of_range(value.i)) { + format_error_message(value, lo, hi); + return false; + } + + /* -- Now check requested limits */ + IntType converted(static_cast(value.i)); + if (converted < lo || converted > hi) { + format_error_message(value, lo, hi); + return false; + } + return true; +} + +inline bool check_range(const value_t value, const decltype(value_t::i) lo, + const decltype(value_t::i) hi) { + return check_range_strict(value, lo, hi); +} + +inline bool check_range_match(const value_t &match, const decltype(match_t::word0) mask, + int width) { + if (!CHECKTYPE(match, tMATCH)) return false; + if ((match.m.word0 | match.m.word1) != mask) { + error(match.lineno, "invalid match width; expected %i bits", width); + return false; + } + return true; +} + +template +void convert_i2m(IntType i, match_t &m) { + static_assert(sizeof(IntType) == sizeof(match_t::word0)); + static_assert(std::is_integral::value); + + m.word0 = ~static_cast(i); + m.word1 = static_cast(i); +} + +bool check_bigint_unsigned(value_t value, uint32_t byte_width); + +/// * is parsed as match_t::word0 == 0 && match_t::word1 == 0. +/// The function converts the match according to the specified with @p mask. +inline void fix_match_star(match_t &match, const decltype(match_t::word0) mask) { + if (match.word0 == 0 && match.word1 == 0) match.word0 = match.word1 = mask; +} + +/// The function reads a tINT or tMATCH value, performs range checks, and converts +/// the value to a new tMATCH value. +/// @param value Input value +/// @param match Output value +/// @param width Expected width of the input value used for range checks +/// @pre @p value must be a tINT or tMATCH value. +/// @return True if the value is correctly parsed +bool input_int_match(const value_t value, match_t &match, int width); + +/// Check if a tMAP value contains all the given keys. +/// @param value A tMAP value +/// @param keys A set of keys +/// @pre @p value must be a tMAP +/// @return True if the given keys are a subset of the map's keys +bool require_keys(const value_t &data, std::set keys); + +#endif /* BACKENDS_TOFINO_BF_ASM_MISC_H_ */ diff --git a/backends/tofino/bf-asm/mksizes.cpp b/backends/tofino/bf-asm/mksizes.cpp new file mode 100644 index 00000000000..746d1509e66 --- /dev/null +++ b/backends/tofino/bf-asm/mksizes.cpp @@ -0,0 +1,40 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include + +int main() { + if (sizeof(unsigned long long) == 2 * sizeof(uintptr_t)) + std::cout << "#define uint2ptr_t unsigned long long" << std::endl; + else if (sizeof(unsigned long) == 2 * sizeof(uintptr_t)) + std::cout << "#define uint2ptr_t unsigned long" << std::endl; + else if (sizeof(unsigned) == 2 * sizeof(uintptr_t)) + std::cout << "#define uint2ptr_t unsigned" << std::endl; + else if (sizeof(unsigned long) * 2 == sizeof(uintptr_t)) + std::cout << "#define uinthptr_t unsigned long" << std::endl; + else if (sizeof(unsigned) * 2 == sizeof(uintptr_t)) + std::cout << "#define uinthptr_t unsigned" << std::endl; + else if (sizeof(unsigned short) * 2 == sizeof(uintptr_t)) + std::cout << "#define uinthptr_t unsigned short" << std::endl; + else { + std::cerr << "Can't find a type that is 2x or 1/2x a uinputr_t" << std::endl; + return 1; + } + return 0; +} diff --git a/backends/tofino/bf-asm/mktags b/backends/tofino/bf-asm/mktags new file mode 100755 index 00000000000..fb7fd6e555b --- /dev/null +++ b/backends/tofino/bf-asm/mktags @@ -0,0 +1,23 @@ +#!/bin/sh + +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 + +ctags -R -I VECTOR --exclude=test --exclude=submodules \ + --regex-C++='/^DECLARE_(ABSTRACT_)?TABLE_TYPE\(([a-zA-Z0-9_]+)/\2/c/' + +ctags -a -R $HOME/bf-utils/include/bfutils diff --git a/backends/tofino/bf-asm/p4_table.cpp b/backends/tofino/bf-asm/p4_table.cpp new file mode 100644 index 00000000000..e6461273822 --- /dev/null +++ b/backends/tofino/bf-asm/p4_table.cpp @@ -0,0 +1,254 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "p4_table.h" + +#include "backends/tofino/bf-asm/tables.h" + +static std::map alpms; + +std::map P4Table::by_handle; +std::map> P4Table::by_name; +unsigned P4Table::max_handle[7]; + +// handle[29:24] is used as type field. +const char *P4Table::type_name[] = {0, "match", "action", "selection", "statistics", + "meter", "stateful"}; + +// handle[19:16] is used as handle offset field for multipipe +static unsigned apply_handle_offset(unsigned handle, unsigned offset) { + return handle | (offset & 0xff) << 16; +} + +// clear bit[19:16] which is used to encode pipe_id. +static unsigned clear_handle_offset(unsigned handle) { return handle & 0xff00ffff; } + +P4Table *P4Table::get(P4Table::type t, VECTOR(pair_t) & data) { + BUG_CHECK(t < NUM_TABLE_TYPES); + P4Table *rv; + auto *h = ::get(data, "handle"); + auto *n = ::get(data, "name"); + if (h) { + if (!CHECKTYPE(*h, tINT)) return nullptr; + unsigned handle = h->i; + handle = clear_handle_offset(handle); + if (handle >> 24 && handle >> 24 != t) { + error(h->lineno, "Incorrect handle type %d for %s table", handle >> 24, type_name[t]); + return 0; + } + handle &= 0xffffff; + if (!handle) { + error(h->lineno, "zero handle"); + return 0; + } + if (handle > max_handle[t]) max_handle[t] = handle; + handle |= t << 24; + handle = apply_handle_offset(handle, unique_table_offset); + if (!(rv = by_handle[handle])) { + if (!n || !CHECKTYPE(*n, tSTR) || !by_name[t].count(n->s) || + (rv = by_name[t][n->s])->handle != (unsigned)t << 24) + rv = by_handle[handle] = new P4Table; + rv->handle = handle; + } + } else if (n) { + if (!CHECKTYPE(*n, tSTR)) return 0; + if (!(rv = by_name[t][n->s])) { + rv = by_name[t][n->s] = new P4Table; + rv->name = n->s; + rv->handle = apply_handle_offset(++max_handle[t] | (t << 24), unique_table_offset); + } + } else { + error(data.size ? data[0].key.lineno : 0, "no handle or name in p4 info"); + return 0; + } + for (auto &kv : MapIterChecked(data)) { + if (rv->lineno <= 0 || rv->lineno > kv.key.lineno) rv->lineno = kv.key.lineno; + if (kv.key == "handle") { + } else if (kv.key == "name") { + if (CHECKTYPE(kv.value, tSTR)) { + if (!rv->name.empty() && rv->name != kv.value.s) { + error(kv.value.lineno, "Inconsistent P4 name for handle 0x%x", rv->handle); + warning(rv->lineno, "Previously set here"); + } else if (rv->name.empty()) { + rv->name = kv.value.s; + if (!by_name[t].count(rv->name)) by_name[t][rv->name] = rv; + } + } + } else if (kv.key == "size") { + if (CHECKTYPE(kv.value, tINT)) { + if (rv->explicit_size && rv->size != (unsigned)kv.value.i) { + error(kv.value.lineno, "Inconsistent size for P4 handle 0x%x", rv->handle); + warning(rv->lineno, "Previously set here"); + } else { + rv->size = kv.value.i; + rv->explicit_size = true; + } + } + } else if (kv.key == "action_profile") { + if (CHECKTYPE(kv.value, tSTR)) rv->action_profile = kv.value.s; + } else if (kv.key == "match_type") { + if (CHECKTYPE(kv.value, tSTR)) rv->match_type = kv.value.s; + } else if (kv.key == "preferred_match_type") { + if (CHECKTYPE(kv.value, tSTR)) rv->preferred_match_type = kv.value.s; + } else if (kv.key == "disable_atomic_modify") { + if (CHECKTYPE(kv.value, tSTR)) + if (strncmp(kv.value.s, "true", 4) == 0) rv->disable_atomic_modify = true; + } else if (kv.key == "stage_table_type") { + if (CHECKTYPE(kv.value, tSTR)) rv->stage_table_type = kv.value.s; + } else if (kv.key == "how_referenced") { + if (CHECKTYPE(kv.value, tSTR)) { + if (strcmp(kv.value.s, "direct") != 0 && strcmp(kv.value.s, "indirect") != 0) + error(kv.value.lineno, "how_referenced must be either direct or indirect"); + else + rv->how_referenced = kv.value.s; + } + } else if (kv.key == "hidden") { + rv->hidden = get_bool(kv.value); + } else { + warning(kv.key.lineno, "ignoring unknown item %s in p4 info", value_desc(kv.key)); + } + } + return rv; +} + +P4Table *P4Table::alloc(P4Table::type t, Table *tbl) { + unsigned handle = apply_handle_offset(++max_handle[t] | (t << 24), unique_table_offset); + P4Table *rv = by_handle[handle] = new P4Table; + rv->handle = handle; + rv->name = tbl->name(); + return rv; +} + +void P4Table::check(Table *tbl) { + if (name.empty()) name = tbl->name(); + if (!(handle & 0xffffff)) { + auto table_type = (handle >> 24) & 0x3f; + handle += ++max_handle[table_type]; + } +} + +json::map *P4Table::base_tbl_cfg(json::vector &out, int size, const Table *table) const { + json::map *tbl_ptr = nullptr; + for (auto &_table_o : out) { + auto &_table = _table_o->to(); + if (_table["name"] == name) { + if (_table["handle"] && _table["handle"] != handle) continue; + tbl_ptr = &_table; + break; + } + } + if (!tbl_ptr) { + tbl_ptr = new json::map(); + out.emplace_back(tbl_ptr); + } + json::map &tbl = *tbl_ptr; + tbl["direction"] = direction_name(table->gress); + if (handle) tbl["handle"] = handle; + auto table_type = (handle >> 24) & 0x3f; + BUG_CHECK(table_type < NUM_TABLE_TYPES); + tbl["name"] = p4_name(); + tbl["table_type"] = type_name[table_type]; + if (!explicit_size && tbl["size"]) + tbl["size"]->as_number()->val += size; + else + tbl["size"] = explicit_size ? this->size : size; + if (hidden) tbl["p4_hidden"] = true; + return &tbl; +} + +void P4Table::base_alpm_tbl_cfg(json::map &out, int size, const Table *table, + P4Table::alpm_type atype) const { + if (is_alpm()) { + json::map **alpm_cfg = nullptr; + unsigned *alpm_table_handle = nullptr; + auto *alpm = &alpms[this]; + if (alpm) { + auto p4Name = p4_name(); + if (!p4Name) { + error(table->lineno, "No p4 table found for alpm table : %s", table->name()); + return; + } + std::string name = p4Name; + if (atype == P4Table::PreClassifier) { + alpm_cfg = &alpm->alpm_pre_classifier_table_cfg; + alpm_table_handle = &alpm->alpm_pre_classifier_table_handle; + // Both alpm pre-classifier and atcam tables share the same + // table name. For driver to uniquely distinguish a + // pre-classifier from the atcam table during snapshot, we add a + // suffix to the p4 name + name += "_pre_classifier"; + } else if (atype == P4Table::Atcam) { + alpm_cfg = &alpm->alpm_atcam_table_cfg; + alpm_table_handle = &alpm->alpm_atcam_table_handle; + } + *alpm_cfg = &out; + json::map &tbl = out; + tbl["direction"] = direction_name(table->gress); + auto table_type = (handle >> 24) & 0x3f; + BUG_CHECK(table_type < NUM_TABLE_TYPES); + if (!(*alpm_table_handle & 0xffffff)) + *alpm_table_handle = apply_handle_offset( + (P4Table::MatchEntry << 24) + (++max_handle[table_type]), unique_table_offset); + if (*alpm_table_handle) tbl["handle"] = *alpm_table_handle; + tbl["name"] = name; + tbl["table_type"] = type_name[table_type]; + tbl["size"] = explicit_size ? this->size : size; + } + } +} + +void P4Table::set_partition_action_handle(unsigned handle) { + alpms[this].set_partition_action_handle.insert(handle); +} + +void P4Table::set_partition_field_name(std::string name) { + alpms[this].partition_field_name = name; +} + +std::string P4Table::get_partition_field_name() const { + if (alpms.count(this)) return alpms[this].partition_field_name; + return ""; +} + +std::set P4Table::get_partition_action_handle() const { + if (alpms.count(this)) { + return alpms[this].set_partition_action_handle; + } + return {}; +} + +unsigned P4Table::get_alpm_atcam_table_handle() const { + if (alpms.count(this)) return alpms[this].alpm_atcam_table_handle; + return 0; +} + +std::string P4Table::direction_name(gress_t gress) { + switch (gress) { + case INGRESS: + return "ingress"; + break; + case EGRESS: + return "egress"; + break; + case GHOST: + return "ghost"; + break; + default: + BUG(); + } + return ""; +} diff --git a/backends/tofino/bf-asm/p4_table.h b/backends/tofino/bf-asm/p4_table.h new file mode 100644 index 00000000000..6b937c7539d --- /dev/null +++ b/backends/tofino/bf-asm/p4_table.h @@ -0,0 +1,94 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_P4_TABLE_H_ +#define BACKENDS_TOFINO_BF_ASM_P4_TABLE_H_ + +#include +#include + +#include "asm-types.h" +#include "backends/tofino/bf-asm/json.h" + +class Table; +class P4Table; + +struct alpm_t { + std::string partition_field_name = ""; + unsigned alpm_atcam_table_handle = 0; + unsigned alpm_pre_classifier_table_handle = 0; + std::set set_partition_action_handle; + json::map *alpm_atcam_table_cfg = 0; // handle to cjson alpm table + json::map *alpm_pre_classifier_table_cfg = 0; // handle to cjson ternary pre classifier table +}; + +class P4Table { + int lineno = -1; + std::string name, preferred_match_type; + std::string stage_table_type; + unsigned handle = 0; + bool explicit_size = false; + bool hidden = false; + json::map *config = 0; + P4Table() {} + + public: + bool disable_atomic_modify = false; + unsigned size = 0; + std::string match_type, action_profile, how_referenced; + enum type { + None = 0, + MatchEntry = 1, + ActionData = 2, + Selection = 3, + Statistics = 4, + Meter = 5, + Stateful = 6, + NUM_TABLE_TYPES = 7 + }; + enum alpm_type { PreClassifier = 1, Atcam = 2 }; + static const char *type_name[]; + + private: + static std::map by_handle; + static std::map> by_name; + static unsigned max_handle[]; + + public: + static P4Table *get(type t, VECTOR(pair_t) & d); + static P4Table *alloc(type t, Table *tbl); + void check(Table *tbl); + const char *p4_name() const { return name.empty() ? nullptr : name.c_str(); } + unsigned get_handle() { return handle; } + unsigned p4_size() { return size; } + std::string p4_stage_table_type() { return stage_table_type; } + json::map *base_tbl_cfg(json::vector &out, int size, const Table *table) const; + void base_alpm_tbl_cfg(json::map &out, int size, const Table *table, + P4Table::alpm_type atype) const; + bool is_alpm() const { + if (match_type == "alpm") return true; + return false; + } + void set_partition_action_handle(unsigned handle); + void set_partition_field_name(std::string name); + std::string get_partition_field_name() const; + std::set get_partition_action_handle() const; + unsigned get_alpm_atcam_table_handle() const; + static std::string direction_name(gress_t); +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_P4_TABLE_H_ */ diff --git a/backends/tofino/bf-asm/parser-tofino-jbay.cpp b/backends/tofino/bf-asm/parser-tofino-jbay.cpp new file mode 100644 index 00000000000..fbe83db78a4 --- /dev/null +++ b/backends/tofino/bf-asm/parser-tofino-jbay.cpp @@ -0,0 +1,2026 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "parser-tofino-jbay.h" + +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/target.h" +#include "constants.h" +#include "lib/algorithm.h" +#include "lib/ordered_set.h" +#include "lib/range.h" +#include "misc.h" +#include "phv.h" +#include "top_level.h" +#include "vector.h" + +/* Dummy specializations so that all specializations are covered */ + +void AsmParser::init_port_use(bitvec &port_use, const value_t &arg) { + if (arg.type == tVEC) { + for (int i = 0; i < arg.vec.size; i++) { + init_port_use(port_use, arg[i]); + } + } else if (arg.type == tRANGE) { + if (arg.range.hi > arg.range.lo) + error(arg.lineno, "port range hi index %d cannot be smaller than lo index %d", + arg.range.hi, arg.range.lo); + port_use.setrange(arg.range.lo, arg.range.hi - arg.range.lo + 1); + } else if (arg.type == tINT) { + port_use.setbit(arg.i); + } +} + +void AsmParser::start(int lineno, VECTOR(value_t) args) { + if (args.size != 0 && args[0] != "ingress" && args[0] != "egress" && + (args[0] != "ghost" || options.target < JBAY)) + error(lineno, "parser must specify ingress%s or egress", + options.target >= JBAY ? ", ghost" : ""); +} + +void AsmParser::input(VECTOR(value_t) args, value_t data) { + if (args.size > 0 && args[0] == "ghost") { + // Backward compatibility for old ghost parser syntax + // ghost parser : W0 + if (data.type == tVEC) { + for (int i = 0; i < data.vec.size; i++) { + ghost_parser.push_back(Phv::Ref(GHOST, 0, data[i])); + } + // New ghost parser syntax + // parser ghost: + // ghost_md: W0 + // pipe_mask: 0 + } else if (data.type == tMAP) { + for (auto &kv : MapIterChecked(data.map, true)) { + if (kv.key == "ghost_md") { + if (kv.value.type == tVEC) { + for (int i = 0; i < kv.value.vec.size; i++) { + ghost_parser.push_back(Phv::Ref(GHOST, 0, data[i])); + } + } else { + ghost_parser.push_back(Phv::Ref(GHOST, 0, kv.value)); + } + } else if (kv.key == "pipe_mask") { + if (!CHECKTYPE(kv.value, tINT)) continue; + ghost_pipe_mask = kv.value.i; + } + } + } else { + ghost_parser.push_back(Phv::Ref(GHOST, 0, data)); + } + return; + } + + gress_t gress = (args.size > 0 && args[0] == "egress") ? EGRESS : INGRESS; + auto *p = new Parser(phv_use, gress, parser[gress].size()); + parser[gress].push_back(p); + if (args.size == 1) { + p->port_use.setrange(0, Target::NUM_PARSERS()); + } else if (args.size == 2) { + init_port_use(p->port_use, args[1]); + } + p->input(args, data); +} + +void AsmParser::process() { + for (auto gress : Range(INGRESS, EGRESS)) { + for (auto p : parser[gress]) { + p->ghost_parser = ghost_parser; + p->ghost_pipe_mask = ghost_pipe_mask; + p->process(); + } + } + + bitvec phv_allow_bitwise_or; + for (auto p : parser[INGRESS]) { + phv_allow_bitwise_or |= p->phv_allow_bitwise_or; + } + for (auto p : parser[EGRESS]) { + phv_allow_bitwise_or |= p->phv_allow_bitwise_or; + } + for (auto p : parser[INGRESS]) { + p->phv_allow_bitwise_or = phv_allow_bitwise_or; + } + for (auto p : parser[EGRESS]) { + p->phv_allow_bitwise_or = phv_allow_bitwise_or; + } + + bitvec phv_allow_clear_on_write; + for (auto p : parser[INGRESS]) { + phv_allow_clear_on_write |= p->phv_allow_clear_on_write; + } + for (auto p : parser[EGRESS]) { + phv_allow_clear_on_write |= p->phv_allow_clear_on_write; + } + for (auto p : parser[INGRESS]) { + p->phv_allow_clear_on_write = phv_allow_clear_on_write; + } + for (auto p : parser[EGRESS]) { + p->phv_allow_clear_on_write = phv_allow_clear_on_write; + } + + bitvec phv_init_valid; + for (auto p : parser[INGRESS]) { + phv_init_valid |= p->phv_init_valid; + } + for (auto p : parser[EGRESS]) { + phv_init_valid |= p->phv_init_valid; + } + for (auto p : parser[INGRESS]) { + p->phv_init_valid = phv_init_valid; + } + for (auto p : parser[EGRESS]) { + p->phv_init_valid = phv_init_valid; + } +} + +void AsmParser::output(json::map &ctxt_json) { + ctxt_json["parser"]["ingress"] = json::vector(); + ctxt_json["parser"]["egress"] = json::vector(); + + bool use_multiple_parser_impl = false; + + for (auto gress : Range(INGRESS, EGRESS)) { + if (parser[gress].size() > 1) use_multiple_parser_impl = true; + } + /// We use the 'parsers' node in ctxt json to implement + /// multiple parser instances support. + /// We use the 'parser' node for all single parser + /// instance support. + for (auto gress : Range(INGRESS, EGRESS)) { + /// remove after multi-parser support is fully-tested. + if (use_multiple_parser_impl) { + for (auto p : parser[gress]) { + p->output(ctxt_json); + } + } else { + if (!parser[gress].empty() && parser[gress][0] != nullptr) + parser[gress][0]->output_legacy(ctxt_json); + } + } +} + +std::vector AsmParser::test_get_parser(gress_t gress) { + if ((gress == INGRESS) || (gress == EGRESS)) return parser[gress]; + return std::vector(); +} + +std::map>> Parser::clots; +std::array, PARSER_MAX_CLOTS> Parser::clot_use; +unsigned Parser::max_handle = 0; + +static void collect_phv_vector(value_t value, gress_t gress, bitvec &bv) { + for (auto &el : value.vec) { + Phv::Ref reg(gress, 0, el); + if (reg.check()) { + int id = reg->reg.uid; + bv[id] = 1; + } + } +} + +void Parser::input(VECTOR(value_t) args, value_t data) { + lineno = data.lineno; + if (!CHECKTYPE(data, tMAP)) return; + for (gress_t gress : Range(INGRESS, EGRESS)) { + if (args.size > 0) { + if (args[0] == "ingress" && gress != INGRESS) continue; + if (args[0] == "egress" && gress != EGRESS) continue; + } else if (error_count > 0) { + break; + } + for (auto &kv : MapIterChecked(data.map, true)) { + if (kv.key == "name" && (kv.value.type == tSTR)) { + name = kv.value.s; + continue; + } + if (kv.key == "start" && (kv.value.type == tVEC || kv.value.type == tSTR)) { + if (kv.value.type == tVEC) { + for (int i = 0; i < 4 && i < kv.value.vec.size; i++) + start_state[i] = kv.value[i]; + } else { + for (int i = 0; i < 4; i++) start_state[i] = kv.value; + } + continue; + } + if (kv.key == "priority" && (kv.value.type == tVEC || kv.value.type == tINT)) { + if (kv.value.type == tVEC) { + for (int i = 0; i < 4 && i < kv.value.vec.size; i++) + if (CHECKTYPE(kv.value[i], tINT)) priority[i] = kv.value[i].i; + } else { + for (int i = 0; i < 4; i++) priority[i] = kv.value.i; + } + continue; + } + if (kv.key == "priority_threshold" && + (kv.value.type == tVEC || kv.value.type == tINT)) { + if (kv.value.type == tVEC) { + for (int i = 0; i < 4 && i < kv.value.vec.size; i++) + if (CHECKTYPE(kv.value[i], tINT)) pri_thresh[i] = kv.value[i].i; + } else { + for (int i = 0; i < 4; i++) pri_thresh[i] = kv.value.i; + } + continue; + } + if (kv.key == "parser_error") { + if (parser_error.lineno >= 0) { + error(kv.key.lineno, "Multiple parser_error declarations"); + warning(parser_error.lineno, "Previous was here"); + } else { + parser_error = Phv::Ref(gress, 0, kv.value); + } + continue; + } + if (kv.key == "bitwise_or") { + if (CHECKTYPE(kv.value, tVEC)) + collect_phv_vector(kv.value, gress, phv_allow_bitwise_or); + + continue; + } + if (kv.key == "clear_on_write") { + if (options.target == TOFINO) + error(kv.key.lineno, "Tofino parser does not support clear-on-write semantic"); + + if (CHECKTYPE(kv.value, tVEC)) + collect_phv_vector(kv.value, gress, phv_allow_clear_on_write); + + continue; + } + if (kv.key == "init_zero") { + if (CHECKTYPE(kv.value, tVEC)) { + collect_phv_vector(kv.value, gress, phv_init_valid); + collect_phv_vector(kv.value, gress, phv_use[gress]); + } + + continue; + } + if (kv.key == "hdr_len_adj") { + if (CHECKTYPE(kv.value, tINT)) hdr_len_adj = kv.value.i; + continue; + } + if (kv.key == "states") { + if (CHECKTYPE(kv.value, tMAP)) + for (auto &st : kv.value.map) define_state(gress, st); + continue; + } + if (kv.key == "bubble") { // obfuscated name for reverse engineering + if (CHECKTYPE(kv.value, tMAP)) { + rate_limit.lineno = kv.key.lineno; + rate_limit.parse(kv.value.map); + } + continue; + } + if (gress == EGRESS && kv.key == "meta_opt") { + if (CHECKTYPE(kv.value, tINT)) meta_opt = kv.value.i; + continue; + } + if (kv.key == "parse_depth_checks_disabled") { + if (options.target == TOFINO) + options.tof1_egr_parse_depth_checks_disabled = get_bool(kv.value); + else + warning(kv.key.lineno, + "parse_depth_checks_disabled unexpected: supported only by Tofino"); + continue; + } + define_state(gress, kv); + } + + // process the CLOTs immediately rather than in Parser::process() so that it + // happens before Deparser::process() + for (auto &map : Values(clots)) { + for (auto &vec : Values(map)) { + State::Match::Clot *maxlen = 0; + for (auto *cl : vec) { + if (cl->tag >= 0) clot_use[cl->tag].push_back(cl); + if (!maxlen || cl->max_length > maxlen->max_length) maxlen = cl; + } + for (auto *cl : vec) cl->max_length = maxlen->max_length; + } + } + + for (auto &map : Values(clots)) { + std::map clot_alloc; + unsigned free_clot_tag = 0; + while (free_clot_tag < PARSER_MAX_CLOTS && !clot_use[free_clot_tag].empty()) + ++free_clot_tag; + + for (auto &vec : Values(map)) { + for (auto *cl : vec) { + if (cl->tag >= 0) continue; + if (clot_alloc.count(cl->name)) { + cl->tag = clot_alloc.at(cl->name); + clot_use[cl->tag].push_back(cl); + } else if (free_clot_tag >= PARSER_MAX_CLOTS) { + error(cl->lineno, "Too many CLOTs (%d max)", PARSER_MAX_CLOTS); + } else { + clot_alloc[cl->name] = cl->tag = free_clot_tag++; + clot_use[cl->tag].push_back(cl); + while (free_clot_tag < PARSER_MAX_CLOTS && !clot_use[free_clot_tag].empty()) + ++free_clot_tag; + } + } + } + } + } +} + +void Parser::define_state(gress_t gress, pair_t &kv) { + if (!CHECKTYPE2M(kv.key, tSTR, tCMD, "state declaration")) return; + const char *name = kv.key.s; + match_t stateno = {0, 0}; + if (kv.key.type == tCMD) { + name = kv.key[0].s; + if (!CHECKTYPE2(kv.key[1], tINT, tMATCH)) return; + if (kv.key[1].type == tINT) { + if (kv.key[1].i > PARSER_STATE_MASK) + error(kv.key.lineno, "Explicit state out of range"); + stateno.word1 = kv.key[1].i; + stateno.word0 = (~kv.key[1].i) & PARSER_STATE_MASK; + } else { + stateno = kv.key[1].m; + if ((stateno.word0 | stateno.word1) > PARSER_STATE_MASK) + error(kv.key.lineno, "Explicit state out of range"); + stateno.word0 |= ~(stateno.word0 | stateno.word1) & PARSER_STATE_MASK; + } + } + if (!CHECKTYPE(kv.value, tMAP)) return; + auto n = states.emplace(name, new State(kv.key.lineno, name, gress, stateno, kv.value.map)); + if (n.second) { + all.push_back(n.first->second); + } else { + error(kv.key.lineno, "State %s already defined in %sgress", name, gress ? "e" : "in"); + warning(n.first->second->lineno, "previously defined here"); + } +} + +void Parser::process() { + if (all.empty()) return; + for (auto st : all) st->pass1(this); + for (gress_t gress : Range(INGRESS, EGRESS)) { + if (states.empty()) continue; + if (start_state[0].lineno < 0) { + State *start = get_start_state(); + if (!start) { + error(lineno, "No %sgress parser start state", gress ? "e" : "in"); + continue; + } else { + for (int i = 0; i < 4; i++) { + start_state[i].name = start->name; + start_state[i].lineno = start->lineno; + start_state[i].ptr.push_back(start); + } + } + } else { + for (int i = 0; i < 4; i++) start_state[i].check(gress, this, 0); + } + for (int i = 0; i < 4 && !start_state[i]; i++) + if (!start_state[i]->can_be_start()) { + std::string name = std::string("'; + LOG1("Creating new " << gress << " " << name << " state"); + auto n = states.emplace(name, new State(lineno, name.c_str(), gress, match_t{0, 0}, + VECTOR(pair_t){0, 0, 0})); + BUG_CHECK(n.second); + State *state = n.first->second; + state->def = new State::Match(lineno, gress, *start_state[i]); + for (int j = 3; j >= i; j--) + if (start_state[j] == start_state[i]) { + start_state[j].name = name; + start_state[j].ptr[0] = state; + } + all.insert(all.begin(), state); + } + if (parser_error.lineno >= 0) + if (parser_error.check() && parser_error.gress() == gress) + phv_use[gress][parser_error->reg.uid] = 1; + } + if (ghost_parser.size()) { + int total_size = 0; + int curr_parser_id = -1; + std::sort(ghost_parser.begin(), ghost_parser.end()); + for (Phv::Ref &r : ghost_parser) { + r.check(); + total_size += r.size(); + if (curr_parser_id >= 0) { + if ((curr_parser_id + 1) != r->reg.parser_id()) + error(ghost_parser[0].lineno, "ghost thread input must be 32 consecutive bits"); + } + curr_parser_id = r->reg.parser_id(); + } + if (total_size != 32) error(ghost_parser[0].lineno, "ghost thread input must be 32 bits"); + } + if (error_count > 0) return; + int all_index = 0; + for (auto st : all) st->all_idx = all_index++; + bitvec unreach(0, all_index); + for (int i = 0; i < 4; i++) + if (!states.empty()) start_state[i]->unmark_reachable(this, unreach); + for (auto u : unreach) + warning(all[u]->lineno, "%sgress state %s unreachable", all[u]->gress ? "E" : "In", + all[u]->name.c_str()); + if (phv_use[INGRESS].intersects(phv_use[EGRESS])) { + bitvec tmp = phv_use[INGRESS]; + tmp &= phv_use[EGRESS]; + for (int reg : tmp) + error(lineno, "Phv register %s(R%d) used by both ingress and egress", + Phv::reg(reg)->name, reg); + } + if (options.match_compiler || 1) { /* FIXME -- need proper liveness analysis */ + Phv::setuse(INGRESS, phv_use[INGRESS]); + Phv::setuse(EGRESS, phv_use[EGRESS]); + } +} + +int Parser::get_header_stack_size_from_valid_bits(std::vector sets) { + // Find Set operation that holds the stack valid bits, then + // find the largest value of "$.$valid". + for (const auto *set : sets) { + auto reg = Phv::reg(set->where.name()); + if (reg) { + auto aliases = Phv::aliases(reg, 0); + if (std::find_if(aliases.begin(), aliases.end(), [](const std::string &s) { + return s.find(".$stkvalid") != std::string::npos; + }) != aliases.end()) { + int stack_size = 0; + while (std::find_if( + aliases.begin(), aliases.end(), [&stack_size](const std::string &s) { + return s.find("$" + std::to_string(stack_size) + ".$valid") != + std::string::npos; + }) != aliases.end()) + stack_size++; + return stack_size; + } + } + } + return 0; +} + +/** + * @brief Returns the deepest parser depth, starting from state s. + * Returned value in bits. + */ +int Parser::state_prsr_dph_max(const State *s) { + std::map> visited; // pair: first=curr_dph_bits + // second=recurse count + return state_prsr_dph_max(s, visited, -hdr_len_adj * 8); +} + +/** + * @brief Returns the deepest parser depth for state s, considering the depth + * is already at curr_dph_bits at the time it's being called. + * Returned value in bits. + */ +int Parser::state_prsr_dph_max(const State *s, + std::map> &visited, + int curr_dph_bits) { + int parser_depth_max_bits = parser_depth_max_bytes * 8; + int parser_depth_min_bits = parser_depth_min_bytes * 8; + if (!s) return 0; + if (s->ignore_max_depth && curr_dph_bits >= parser_depth_min_bits) return curr_dph_bits; + // Keep track of states visited along with the parser depth at time of visit + // and the number of times the state was called recursively. Return 0 if current + // curr_dph_bits value is smaller or equal to the largest value seen so far, + // or if the state was called enough times to fill the header stack if one + // is used. + if (visited.count(s) && (visited.at(s).first >= curr_dph_bits)) { + LOG5(" State : " << s->name << " --> largest depth : " << visited[s].first + << " >= current depth : " << curr_dph_bits << " --> Ignore."); + return 0; + } + visited[s].first = curr_dph_bits; + visited[s].second++; + int curr_state_prsr_dph_max = 0; + for (const auto *m : s->match) { + auto local_bits_shifted = curr_dph_bits + (m->shift * 8); + std::string next_name = m->next ? m->next->name : std::string("END"); + LOG5(" State : " << s->name << " --> " << m->match << " --> " << next_name + << " | Bits: " << curr_dph_bits << ", shift : " << m->shift * 8 + << ", intr_md_bits : " << m->intr_md_bits + << ", Total Bits : " << local_bits_shifted); + // Look for non-unrolled loops that save in header stacks. In that case, use + // header stack size to limit parser depth calculation. + if (m->offset_inc) { + // One of the Set operations will set the header stack entries $valid bits. + // Get stack size information from these valid bits. + int stack_size = get_header_stack_size_from_valid_bits(m->set); + LOG5(" State : stack_size = " << stack_size + << ", visited count = " << visited[s].second); + // Do not go beyond header stack size to find parser depth. + if (visited[s].second > stack_size) { + LOG5(" State : reached end of header stack, size = " << stack_size); + continue; + } + } + + if (local_bits_shifted < parser_depth_max_bits) { + if (m->next) { + for (auto n : m->next.ptr) { + int prsr_dph = state_prsr_dph_max(n, visited, local_bits_shifted); + curr_state_prsr_dph_max = std::max(curr_state_prsr_dph_max, prsr_dph); + } + } else { + curr_state_prsr_dph_max = std::max(curr_state_prsr_dph_max, local_bits_shifted); + } + } else { + LOG5(" State : " << s->name << " --> " << m->match << " --> " << next_name + << " | Reached " << parser_depth_max_bits + << " bits, maximum supported by target."); + curr_state_prsr_dph_max = parser_depth_max_bits; + } + + // No point in going any further with the other matches + // if we reached the maximum allowed by the target. + if (curr_state_prsr_dph_max >= parser_depth_max_bits) break; + + // If the current match is a default or catch-all transition, then + // break out of the loop as any following transitions will never + // be taken. + uint64_t mask = bitMask(s->key.width); + if ((m->match.word0 & m->match.word1 & mask) == mask) { + LOG5(" State : catch-all transition, break out of loop."); + break; + } + } + visited[s].second--; + return curr_state_prsr_dph_max; +} + +int Parser::get_prsr_max_dph() { + // Look for the longest parser depth from all configured start states. + // Return the longest one found. + // + // Note: at this point start_state[] contains the start states either + // read from the bfa file, or deduced from the standard/typical + // start state names returned from get_start_state() during + // Parser::process. + // + int prsr_dph_max = 0; + std::set visited; + for (auto &state : start_state) { + if (state) { + BUG_CHECK(states[state.name], "Start state %s not found in states table.", + state.name.c_str()); + if (visited.count(state.name)) continue; + visited.insert(state.name); + int prsr_dph = state_prsr_dph_max(states[state.name]); + LOG4("state " << state.name << " dph=" << prsr_dph); + prsr_dph_max = std::max(prsr_dph_max, prsr_dph); + } + } + prsr_dph_max = (prsr_dph_max + 0x7) & ~0x7; + prsr_dph_max /= 8; + prsr_dph_max = (prsr_dph_max + 0xf) & ~0xf; + prsr_dph_max /= 16; + // P4C-5341/5342: For Tofino EPB, the one additional word is sent beyond prsr_dph_max. + if (options.target == TOFINO && gress == EGRESS) prsr_dph_max -= 1; + return std::max(prsr_dph_max, 4); +} + +void Parser::output_default_ports(json::vector &vec, bitvec port_use) { + while (!port_use.empty()) { + auto idx = port_use.ffs(0); + vec.push_back(idx); + port_use.clrbit(idx); + } +} + +std::map Parser::parser_handles; + +void Parser::write_config(RegisterSetBase ®s, json::map &json, bool legacy) { + if (auto *tofino_regs = dynamic_cast(®s)) + write_config(*tofino_regs, json, legacy); + else if (auto *jbay_regs = dynamic_cast(®s)) + write_config(*jbay_regs, json, legacy); +} + +// output context.json format with multiple parser support +void Parser::output(json::map &ctxt_json) { + json::vector &cjson = ctxt_json["parsers"][gress ? "egress" : "ingress"]; + if (all.empty()) return; + for (auto st : all) st->pass2(this); + if (error_count > 0) return; + tcam_row_use = PARSER_TCAM_DEPTH; + SWITCH_FOREACH_TARGET( + options.target, auto *regs = new TARGET::parser_regs; declare_registers(regs); + json::map parser_ctxt_json; + // Parser Handles are generated in the assembler. Since the assembler + // has no idea about multipipe program (since assembler is separately + // invoked for each pipe bfa) the parser handles generated can be same + // across multiple pipes. Here, we rely on the driver to prefix a pipe id + // (profile id) to make the handles unique. The upper 2 bits are + // reserved for this id. + parser_handle = next_handle(); + parser_handles[name] = parser_handle; // store parser handles + parser_ctxt_json["name"] = name; parser_ctxt_json["handle"] = parser_handle; + json::vector default_ports; output_default_ports(default_ports, port_use); + parser_ctxt_json["default_parser_id"] = std::move(default_ports); + write_config(dynamic_cast(*regs), parser_ctxt_json, false); + // FIXME -- rate limit config regs are per-pipe, not per parser, so if more than + // one parser wants to set different rate limits, there will be a problem + if (rate_limit) rate_limit.write_config(TopLevel::regs()->reg_pipe, gress); + cjson.push_back(std::move(parser_ctxt_json)); + gen_configuration_cache(*regs, ctxt_json["configuration_cache"]);) +} + +// output context.json format prior to multiple parser support +// TODO: remove after multi-parser support is fully-tested. +void Parser::output_legacy(json::map &ctxt_json) { + if (all.empty()) return; + for (auto st : all) st->pass2(this); + if (error_count > 0) return; + tcam_row_use = PARSER_TCAM_DEPTH; + SWITCH_FOREACH_TARGET( + options.target, auto *regs = new TARGET::parser_regs; declare_registers(regs); + parser_handle = next_handle(); + write_config(dynamic_cast(*regs), ctxt_json["parser"], true); + if (rate_limit) rate_limit.write_config(TopLevel::regs()->reg_pipe, gress); + gen_configuration_cache(*regs, ctxt_json["configuration_cache"]);) +} + +Parser::Checksum::Checksum(gress_t gress, pair_t data) : lineno(data.key.lineno), gress(gress) { + if (!CHECKTYPE2(data.key, tSTR, tCMD)) return; + if (!CHECKTYPE(data.value, tMAP)) return; + if (data.key.vec.size == 2) { + if ((unit = data.key[1].i) >= Target::PARSER_CHECKSUM_UNITS()) + error(lineno, "Ran out of %sgress parser checksum units (%d available)", + gress ? "e" : "in", Target::PARSER_CHECKSUM_UNITS()); + } else { + error(data.key.lineno, "Syntax error"); + } + for (auto &kv : MapIterChecked(data.value.map, true)) { + if (kv.key == "type") { + if (CHECKTYPE(kv.value, tSTR)) { + if (kv.value == "VERIFY") + type = 0; + else if (kv.value == "RESIDUAL") + type = 1; + else if (kv.value == "CLOT") + type = 2; + else + error(kv.value.lineno, "Unknown parser checksum type"); + } + if (kv.value == "clot") { + if (unit < 2 || unit > 4) + error(kv.value.lineno, "CLOT can only use checksum engine 2-4"); + } + } else if (kv.key == "start") { + if (CHECKTYPE(kv.value, tINT)) start = kv.value.i; + } else if (kv.key == "end") { + if (CHECKTYPE(kv.value, tINT)) end = kv.value.i; + } else if (kv.key == "addr") { + if (CHECKTYPE(kv.value, tINT)) addr = kv.value.i; + } else if (kv.key == "add") { + if (CHECKTYPE(kv.value, tINT)) add = kv.value.i; + } else if (kv.key == "dest") { + if (kv.value.type == tCMD && kv.value == "clot" && kv.value.vec.size == 2) + tag = kv.value[1].i; + else + dest = Phv::Ref(gress, 0, kv.value); + } else if (kv.key == "end_pos") { + if (CHECKTYPE(kv.value, tINT)) { + if (kv.value.i > PARSER_INPUT_BUFFER_SIZE) + error(kv.value.lineno, "Header end position is out of input buffer"); + if (kv.value.i < 0) error(kv.value.lineno, "Header end postion cannot be negative"); + dst_bit_hdr_end_pos = kv.value.i; + } + } else if (kv.key == "mask") { + if (CHECKTYPE(kv.value, tVEC)) { + for (int i = 0; i < kv.value.vec.size; i++) { + auto range = kv.value[i]; + unsigned lo = 0, hi = 0; + if (range.type == tRANGE) { + lo = range.range.lo; + hi = range.range.hi; + } else if (range.type == tINT) { + lo = hi = range.i; + } else { + error(kv.value.lineno, "Syntax error, expecting range or int"); + } + + if (lo > hi) error(kv.value.lineno, "Invalid parser checksum input"); + if ((hi + 1) > PARSER_INPUT_BUFFER_SIZE) + error(kv.value.lineno, "Parser checksum out of input buffer?"); + + for (unsigned byte = lo; byte <= hi; ++byte) { + if (kv.key == "mask") mask |= (1 << byte); + } + } + } + } else if (kv.key == "swap") { + if (CHECKTYPE(kv.value, tINT)) swap = kv.value.i; + } else if (kv.key == "mul_2") { + if (options.target == TOFINO) { + error(kv.value.lineno, "multiply by 2 feature is available for Tofino2 and higher"); + } + if (CHECKTYPE(kv.value, tINT)) mul_2 = kv.value.i; + } else if (kv.key == "shift") { + shift = get_bool(kv.value); + } else { + warning(kv.key.lineno, "ignoring unknown item %s in checksum", value_desc(kv.key)); + } + } +} + +bool Parser::Checksum::equiv(const Checksum &a) const { + if (unit != a.unit) return false; + if (tag != a.tag) return false; + if (dest && a.dest) { + if (dest != a.dest) return false; + } else if (dest || a.dest) { + return false; + } + return add == a.add && mask == a.mask && swap == a.swap && mul_2 == a.mul_2 && + dst_bit_hdr_end_pos == a.dst_bit_hdr_end_pos && start == a.start && end == a.end && + shift == a.shift && type == a.type; +} + +void Parser::Checksum::pass1(Parser *parser) { + if (parser->checksum_use.empty()) + parser->checksum_use.resize(Target::PARSER_CHECKSUM_UNITS(), {}); + if (addr >= 0) { + if (addr >= PARSER_CHECKSUM_ROWS) { + error(lineno, "invalid %sgress parser checksum address %d", gress ? "e" : "in", addr); + } else if (parser->checksum_use[unit][addr]) { + if (!equiv(*parser->checksum_use[unit][addr])) { + error(lineno, "incompatible %sgress parser checksum use at address %d", + gress ? "e" : "in", addr); + warning(parser->checksum_use[unit][addr]->lineno, "previous use"); + } + } else { + parser->checksum_use[unit][addr] = this; + } + } + if (dest.check() && dest->reg.parser_id() < 0) + error(dest.lineno, "%s is not accessable in the parser", dest->reg.name); + if (dest && dest->reg.size == 32) + error(dest.lineno, "checksum unit cannot write to 32-bit container"); + if (type == 0 && dest) { + if (dest->lo != dest->hi) + error(dest.lineno, "checksum verification destination must be single bit"); + else + dst_bit_hdr_end_pos = dest->lo; + if (options.target == JBAY && dest->reg.size == 8 && dest->reg.deparser_id() % 2) + dst_bit_hdr_end_pos += 8; + } else if (type == 1 && dest && dest.size() != dest->reg.size) { + error(dest.lineno, "residual checksum must write whole container"); + } +} + +void Parser::Checksum::pass2(Parser *parser) { + if (addr < 0) { + int avail = -1; + for (int i = 0; i < PARSER_CHECKSUM_ROWS; ++i) { + if (parser->checksum_use[unit][i]) { + if (equiv(*parser->checksum_use[unit][i])) { + addr = i; + break; + } + } else if (avail < 0) { + avail = i; + } + } + if (addr < 0) { + if (avail >= 0) { + parser->checksum_use[unit][addr = avail] = this; + } else { + error(lineno, + "Ran out of room in parser checksum control RAM of" + " %sgress unit %d (%d rows available)", + gress ? "e" : "in", unit, PARSER_CHECKSUM_ROWS); + } + } + } +} + +Parser::CounterInit::CounterInit(gress_t gress, pair_t data) + : gress(gress), lineno(data.key.lineno) { + if (!CHECKTYPE2(data.key, tSTR, tCMD)) return; + if (!CHECKTYPE(data.value, tMAP)) return; + + if (options.target == TOFINO) mask = 7; + + for (auto &kv : MapIterChecked(data.value.map, true)) { + if (kv.key == "add" && CHECKTYPE(kv.value, tINT)) { + add = kv.value.i; + if (add > 255) error(lineno, "Parser counter add value out of range (0-255)"); + } else if (kv.key == "max" && CHECKTYPE(kv.value, tINT)) { + max = kv.value.i; + if (max > 255) error(lineno, "Parser counter max value out of range (0-255)"); + } else if (kv.key == "rotate" && CHECKTYPE(kv.value, tINT)) { + rot = kv.value.i; + if (rot > 7) error(lineno, "Parser counter rotate value out of range (0-7)"); + } else if (kv.key == "mask" && CHECKTYPE(kv.value, tINT)) { + mask = kv.value.i; + if (options.target == TOFINO && mask > 7) { + error(lineno, "Parser counter mask value out of range (0-7)"); + } else if (options.target == JBAY && mask > 255) { + error(lineno, "Parser counter mask value out of range (0-255)"); + } + } else if (kv.key == "src") { + if (CHECKTYPE(kv.value, tSTR)) { + if (options.target == TOFINO) { + if (kv.value == "half_lo") + src = 0; + else if (kv.value == "half_hi") + src = 1; + else if (kv.value == "byte0") + src = 2; + else if (kv.value == "byte1") + src = 3; + else + error(lineno, "Unexpected counter load source"); + } else if (options.target != TOFINO) { + if (kv.value == "byte0") + src = 0; + else if (kv.value == "byte1") + src = 1; + else if (kv.value == "byte2") + src = 2; + else if (kv.value == "byte3") + src = 3; + else + error(lineno, "Unexpected counter load source"); + } + } + } else if (kv.key != "push" && kv.key != "update_with_top") { + error(lineno, "Syntax error in parser counter init expression"); + } + } +} + +bool Parser::CounterInit::equiv(const CounterInit &a) const { + return add == a.add && mask == a.mask && rot == a.rot && max == a.max && src == a.src; +} + +void Parser::CounterInit::pass2(Parser *parser) { + if (addr < 0) { + int avail = -1; + for (int i = 0; i < PARSER_CTRINIT_ROWS; ++i) { + if (parser->counter_init[i]) { + if (equiv(*parser->counter_init[i])) { + addr = i; + break; + } + } else if (avail < 0) { + avail = i; + } + } + if (addr < 0) { + if (avail >= 0) + parser->counter_init[addr = avail] = this; + else + error(lineno, + "Ran out of room in parser counter init RAM of" + " %sgress (%d rows available)", + gress ? "e" : "in", PARSER_CTRINIT_ROWS); + } + } +} + +Parser::PriorityUpdate::PriorityUpdate(const value_t &exp) { + lineno = exp.lineno; + if (!parse(exp)) error(lineno, "Syntax error in priority expression"); +} + +bool Parser::PriorityUpdate::parse(const value_t &exp, int what) { + enum { START, MASK, SHIFT, LOAD }; + if (exp.type == tCMD) { + if (exp[0] == ">>") { + return what < SHIFT && parse(exp[1], LOAD) && parse(exp[2], SHIFT); + } else if (exp[0] == "&") { + return what < SHIFT && parse(exp[1], MASK) && parse(exp[2], MASK); + } + } else if (exp.type == tINT) { + switch (what) { + case START: + case MASK: + if (mask >= 0) return false; + if ((mask = exp.i) < 0 || mask > 7) { + error(exp.lineno, "priority mask %d out of range", mask); + return false; + } + return true; + case SHIFT: + if (shift >= 0) return false; + if ((shift = exp.i) < 0 || shift > 15) { + error(exp.lineno, "priority shift %d out of range", shift); + return false; + } + return true; + default: + return false; + } + } else if (exp.type == tSTR && exp.s[0] == '@' && isdigit(exp.s[1])) { + char *end; + if (what == SHIFT || offset >= 0 || (offset = strtol(exp.s + 1, &end, 10)) < 0 || *end) + return false; + return true; + } + return false; +} + +void Parser::RateLimit::parse(const VECTOR(pair_t) & data) { + inc = dec = 1; + for (auto &kv : MapIterChecked(data)) { + if (kv.key == "inc") { + if (CHECKTYPE(kv.value, tINT)) inc = kv.value.i; + } else if (kv.key == "dec") { + if (CHECKTYPE(kv.value, tINT)) dec = kv.value.i; + } else if (kv.key == "max") { + if (CHECKTYPE(kv.value, tINT)) max = kv.value.i; + } else if (kv.key == "interval") { + if (CHECKTYPE(kv.value, tINT)) interval = kv.value.i; + } else { + warning(kv.key.lineno, "ignoring unknown item %s in bubble spec", value_desc(kv.key)); + } + } + if (max < 0) error(lineno, "no max limit in bubble spec"); +} + +Parser::State::Ref &Parser::State::Ref::operator=(const value_t &v) { + lineno = v.lineno; + ptr.clear(); + if (v.type == tSTR) { + name = v.s; + pattern.word0 = pattern.word1 = 0; + } else if (CHECKTYPE2M(v, tINT, tMATCH, "state reference")) { + name.clear(); + if (v.type == tINT) { + pattern.word0 = ~v.i; + pattern.word1 = v.i; + } else { + pattern = v.m; + } + if ((pattern.word0 | pattern.word1) > PARSER_STATE_MASK) { + error(lineno, "Parser state out of range"); + pattern.word0 &= PARSER_STATE_MASK; + pattern.word1 &= PARSER_STATE_MASK; + } else { + pattern.word1 |= ~(pattern.word0 | pattern.word1) & PARSER_STATE_MASK; + } + } + return *this; +} + +void Parser::State::Ref::check(gress_t gress, Parser *pa, State *state) { + if (ptr.empty()) { + if (name.size()) { + auto it = pa->states.find(name); + if (it != pa->states.end()) + ptr.push_back(it->second); + else if (name != "END" && name != "end") + error(lineno, "No state named %s in %sgress parser", name.c_str(), + gress ? "e" : "in"); + } else if (pattern) { + match_t tmp = pattern; + unsigned wc = tmp.word0 & tmp.word1; + if (wc && !state->stateno) { + warning(lineno, + "Using next state pattern in state without an explicit " + "state number"); + wc = 0; + } + tmp.word0 &= ~wc | state->stateno.word0; + tmp.word1 &= ~wc | state->stateno.word1; + for (auto *st : pa->all) { + if (st->gress != state->gress) continue; + if (st == state) continue; + if (tmp.matches(st->stateno)) ptr.push_back(st); + } + } + } +} + +const char *Parser::match_key_loc_name(int loc) { + if (options.target == TOFINO) { + if (loc == 0 || loc == 1) return "half"; + if (loc == 2) return "byte0"; + if (loc == 3) return "byte1"; + } else { + if (loc == 0) return "byte0"; + if (loc == 1) return "byte1"; + if (loc == 2) return "byte2"; + if (loc == 3) return "byte3"; + } + + error(-1, "Invalid match key loc"); + return nullptr; +} + +int Parser::match_key_loc(value_t &key, bool errchk) { + if (errchk && !CHECKTYPE(key, tSTR)) return -1; + int loc = Parser::match_key_loc(key.s); + if (loc < 0) error(key.lineno, "Invalid matcher location %s", key.s); + return loc; +} + +int Parser::match_key_loc(const char *key) { + if (options.target == TOFINO) { + if (!strcmp(key, "half") || !strcmp(key, "half0")) return 0; + if (!strcmp(key, "byte0")) return 2; + if (!strcmp(key, "byte1")) return 3; + } else { + if (!strcmp(key, "byte0")) return 0; + if (!strcmp(key, "byte1")) return 1; + if (!strcmp(key, "byte2")) return 2; + if (!strcmp(key, "byte3")) return 3; + } + + error(-1, "Invalid match key %s", key); + return -1; +} + +int Parser::match_key_size(const char *key) { + if (!strncmp(key, "half", 4)) return 16; + if (!strncmp(key, "byte", 4)) return 8; + + error(-1, "Invalid match key %s", key); + return -1; +} + +int Parser::State::MatchKey::move_down(int loc) { + int to = loc; + while (to >= 0 && ((specified >> to) & 1)) to--; + if (to < 0) return -1; + if (data[to].bit >= 0 && move_down(to) < 0) return -1; + data[to] = data[loc]; + data[loc].bit = -1; + return 0; +} + +int Parser::State::MatchKey::add_byte(int loc, int byte, bool use_saved) { + // FIXME: Parameter "byte" is an offset in the input packet buffer. + // It seems strange to specify a negative value when checking + // for the lower range (i.e. -64): when bytes are shifted + // out of the input buffer, they can't be read anymore. + // Should the lower range value be 0 instead? + if (options.target == TOFINO) { + if (byte <= -64 || byte >= 32) { + error(lineno, "Match key index out of range"); + return -1; + } + } else { + // Valid offset ranges: + // -63..31 : Input packet + // 60..63 : Scratch registers + if ((byte <= -64) || ((byte > 31) && (byte < 60)) || (byte > 63)) { + error(lineno, "Match key index out of range"); + return -1; + } + } + + if (loc >= 0) { + if ((specified >> loc) & 1) + error(lineno, "Multiple matches in %s matcher", Parser::match_key_loc_name(loc)); + specified |= (1 << loc); + if (data[loc].bit >= 0 && move_down(loc) < 0) return -1; + } else { + for (int i = 3; i >= 0; i--) + if (data[i].bit < 0) { + loc = i; + break; + } + if (loc < 0) { + error(lineno, "Too much data for parse matcher"); + return -1; + } + } + data[loc].bit = width; + data[loc].byte = use_saved ? USE_SAVED : byte; + width += 8; + return 0; +} + +int Parser::State::MatchKey::setup_match_el(int at, value_t &spec) { + switch (spec.type) { + case tINT: + return add_byte(at, spec.i); + case tRANGE: + if (spec.range.lo >= spec.range.hi) { + error(spec.lineno, "Invalid match range"); + return -1; + } + if (at >= 0) at += spec.range.hi - spec.range.lo; + for (int i = spec.range.hi; i >= spec.range.lo; i--) { + if (add_byte(at, i) < 0) return -1; + if (at >= 0) at--; + } + return 0; + case tMAP: + if (at >= 0) goto error; + for (int i = spec.map.size - 1; i >= 0; i--) + if (setup_match_el(Parser::match_key_loc(spec.map[i].key), spec.map[i].value) < 0) + return -1; + return 0; + case tSTR: + if (spec == "ctr_zero") { + if (ctr_zero >= 0) { + error(spec.lineno, "'ctr_zero' specified twice"); + return -1; + } + ctr_zero = width++; + return 0; + } else if (spec == "ctr_neg") { + if (ctr_neg >= 0) { + error(spec.lineno, "'ctr_neg' specified twice"); + return -1; + } + ctr_neg = width++; + return 0; + } else if (!strncmp(spec.s, "save_byte", 9)) { + if (options.target == TOFINO) + error(spec.lineno, "Tofino does not have scratch registers in the parser"); + + int i = spec.s[9] - '0'; + if (i < 0 || i > 4) error(spec.lineno, "Invalid parser save source %s", spec.s); + save = 1 << i; + width += 8; + return 0; + } else if (at < 0 && (at = Parser::match_key_loc(spec, false)) >= 0) { + if (options.target == TOFINO && at == 0 && add_byte(1, 0, true) < 0) return -1; + return add_byte(at, 0, true); + } + /* fall through */ + default: + error: + error(spec.lineno, "Syntax error in match spec"); + return -1; + } +} + +void Parser::State::MatchKey::setup(value_t &spec) { + lineno = spec.lineno; + if (spec.type == tVEC) { + /* allocate the keys bits for the least significant match bits first... */ + for (int i = spec.vec.size - 1; i >= 0; i--) + if (setup_match_el(-1, spec[i]) < 0) return; + } else { + setup_match_el(-1, spec); + } + + // For TOFINO, the first match byte pair must be an adjacent 16 bit pair. We + // check and re-arrange the bytes for a 16 bit extractor. In JBAY this check + // is not necessary as we can have independent byte extractors + if (Target::MATCH_BYTE_16BIT_PAIRS() && (data[0].byte & data[1].byte) != USE_SAVED) { + if (data[0].bit >= 0 && data[1].bit >= 0 && data[0].byte + 1 != data[1].byte) { + BUG_CHECK((data[0].byte | data[1].byte) != USE_SAVED); + int unused = -1; // unused slot + for (int i = 0; i < 4; i++) { + if (data[i].bit < 0) { + if (unused < 0) unused = i; + continue; + } + for (int j = 0; j < 4; j++) { + if (data[j].bit >= 0 && data[i].byte + 1 == data[j].byte) { + if (i == 1 && j == 0) { + std::swap(data[i], data[j]); + } else { + std::swap(data[0], data[i]); + std::swap(data[1], data[j]); + } + return; + } + } + } + if (unused >= 0) { + BUG_CHECK(unused > 1); + std::swap(data[1], data[unused]); + } else { + error(spec.lineno, "Must have a 16-bit pair in match bytes"); + } + } + if (data[0].bit < 0 && data[1].bit >= 0) { + /* if we're using half of the 16-bit match, use the upper (first) half */ + std::swap(data[0], data[1]); + } + } +} + +Parser::State::Match::Match(int l, gress_t gress, State *s, match_t m, VECTOR(pair_t) & data) + : lineno(l), state(s), match(m) { + for (auto &kv : data) { + if (kv.key == "counter") { + if (kv.value.type == tMAP) { + ctr_load = 1; + + bool from_ctr_init_ram = false; + + for (auto &kkv : MapIterChecked(kv.value.map, true)) { + if (kkv.key == "src") { + from_ctr_init_ram = true; + } else if (kkv.key == "push" && CHECKTYPE(kkv.value, tINT)) { + if (options.target == TOFINO) + error(kkv.key.lineno, "Tofino does not have counter stack"); + ctr_stack_push = kkv.value.i; + } else if (kkv.key == "update_with_top" && CHECKTYPE(kkv.value, tINT)) { + if (options.target == TOFINO) + error(kkv.key.lineno, "Tofino does not have counter stack"); + ctr_stack_upd_w_top = kkv.value.i; + } + } + + if (from_ctr_init_ram) { + ctr_ld_src = 1; + if (ctr_instr) { + error(kv.key.lineno, "Tofino does not allow multiple counters on a match"); + continue; + } + ctr_instr = new CounterInit(gress, kv); + } else { // load from immediate + for (auto &kkv : MapIterChecked(kv.value.map, true)) { + if (kkv.key == "imm" && CHECKTYPE(kkv.value, tINT)) + ctr_imm_amt = kkv.value.i; + else if (kkv.key != "push" && kkv.key != "update_with_top") + error(kkv.value.lineno, "Unknown parser counter init command"); + } + } + } else if (kv.value.type == tCMD) { + if (kv.value[0] == "inc" || kv.value[0] == "increment") { + if (CHECKTYPE(kv.value[1], tINT)) ctr_imm_amt = kv.value[1].i; + } else if (kv.value[0] == "dec" || kv.value[0] == "decrement") { + if (CHECKTYPE(kv.value[1], tINT)) ctr_imm_amt = ~kv.value[1].i + 1; + } else { + error(kv.value.lineno, "Unknown parser counter command"); + } + } else if (kv.value.type == tSTR) { + if (kv.value == "pop") { + if (options.target == TOFINO) + error(kv.key.lineno, "Tofino does not have counter stack"); + ctr_stack_pop = true; + } else { + error(kv.value.lineno, "Unknown parser counter command"); + } + } else { + error(kv.value.lineno, "Syntax error for parser counter"); + } + } else if (kv.key == "hdr_len_inc_stop") { + if (options.target == TOFINO) + error(kv.key.lineno, "Tofino does not support hdr_len_inc_stop"); + else if (hdr_len_inc_stop) + error(kv.key.lineno, "Mulitple hdr_len_inc_stop in match"); + hdr_len_inc_stop = HdrLenIncStop(kv.value); + } else if (kv.key == "priority") { + if (priority) + error(kv.key.lineno, "Mulitple priority updates in match"); + else + priority = PriorityUpdate(kv.value); + } else if (kv.key == "shift") { + if (shift) error(kv.key.lineno, "Multiple shift settings in match"); + if (!CHECKTYPE(kv.value, tINT)) continue; + if ((shift = kv.value.i) < 0 || shift > PARSER_INPUT_BUFFER_SIZE) + error(kv.value.lineno, "shift value %d out of range", shift); + } else if (kv.key == "intr_md") { + if (!CHECKTYPE(kv.value, tINT)) continue; + if ((intr_md_bits = kv.value.i) < 0) + error(kv.value.lineno, "intr_md value %d is -ve", intr_md_bits); + } else if (kv.key == "offset_inc") { + if (offset_inc) error(kv.key.lineno, "Multiple offset_inc settings in match"); + if (!CHECKTYPE(kv.value, tINT)) continue; + offset_inc = kv.value.i; + } else if (kv.key == "buf_req") { + if (buf_req >= 0) error(kv.key.lineno, "Multiple buf_req settings in match"); + if (!CHECKTYPE(kv.value, tINT)) continue; + if ((buf_req = kv.value.i) < 0 || shift > PARSER_INPUT_BUFFER_SIZE) + error(kv.value.lineno, "buf_req value %d out of range", shift); + } else if (kv.key == "next") { + if (next.lineno >= 0) { + error(kv.key.lineno, "Multiple next settings in match"); + error(next.lineno, "previously set here"); + } + next = kv.value; + } else if (kv.key == "load") { + if (load.lineno) { + error(kv.value.lineno, "Multiple load entries in match"); + error(load.lineno, "previous specified here"); + } else { + load.setup(kv.value); + } + } else if (kv.key == "save") { + if (options.target == TOFINO) + error(kv.key.lineno, "Tofino does not have scratch registers in the parser"); + + if (load.save) error(kv.value.lineno, "Multiple save entries in match"); + + if (CHECKTYPE(kv.value, tVEC)) { + for (int i = 0; i < kv.value.vec.size; i++) { + if (CHECKTYPE(kv.value[i], tSTR)) { + if (kv.value[i] == "byte0") + load.save |= 1 << 0; + else if (kv.value[i] == "byte1") + load.save |= 1 << 1; + else if (kv.value[i] == "byte2") + load.save |= 1 << 2; + else if (kv.value[i] == "byte3") + load.save |= 1 << 3; + else + error(lineno, "Unexpected parser save source"); + } + } + } + } else if (kv.key == "checksum") { + csum.emplace_back(gress, kv); + } else if (kv.key == "field_mapping") { + if (CHECKTYPE(kv.value, tMAP)) { + for (auto map : kv.value.map) { + auto ref = Phv::Ref(gress, 0, map.key); + auto fm = FieldMapping(ref, map.value); + field_mapping.emplace_back(fm); + } + } + } else if (kv.key == "handle") { + if (CHECKTYPE(kv.value, tINT)) value_set_handle = kv.value.i; + } else if (kv.key == "disable_partial_hdr_err") { + if (!CHECKTYPE(kv.value, tINT)) continue; + if (options.target != TOFINO2) + error(kv.key.lineno, "disable_partial_hdr_err only available for Tofino2"); + + if (disable_partial_hdr_err != -1) + error(kv.key.lineno, "Multiple disable_partial_hdr_err settings in match"); + if (kv.value.i < 0 || kv.value.i > 1) + error(kv.value.lineno, "disable_partial_hdr_err value %ld out of range", + kv.value.i); + disable_partial_hdr_err = kv.value.i; + } else if (kv.key == "partial_hdr_err_proc") { + if (!CHECKTYPE(kv.value, tINT)) continue; + error(kv.key.lineno, "partial_hdr_err_proc is unsupported"); + if (partial_hdr_err_proc != -1) + error(kv.key.lineno, "Multiple partial_hdr_err_proc settings in match"); + if (kv.value.i < 0 || kv.value.i > 1) + error(kv.value.lineno, "partial_hdr_err_proc value %ld out of range", kv.value.i); + partial_hdr_err_proc = kv.value.i; + } else if (kv.key.type == tCMD && kv.key == "clot" && kv.key.vec.size == 2) { + clots.push_back(new Clot(gress, kv.key.vec[1], kv.value)); + } else if (kv.key.type == tINT) { + save.push_back(new Save(gress, this, kv.key.i, kv.key.i, kv.value)); + } else if (kv.key.type == tRANGE) { + save.push_back(new Save(gress, this, kv.key.range.lo, kv.key.range.hi, kv.value)); + } else if (kv.value.type == tINT) { + set.push_back(new Set(gress, this, kv.key, kv.value.i)); + } else if (kv.value.type == tCMD && kv.value[0] == "rotate") { + if (CHECKTYPE(kv.value[1], tINT)) + set.push_back(new Set(gress, this, kv.key, kv.value[1].i, ROTATE)); + } else { + error(kv.key.lineno, "Syntax error"); + } + } + + for (auto c : csum) { + if (c.type == 1 && c.end) { + if (c.dst_bit_hdr_end_pos >= shift) // see MODEL-542 + error(c.lineno, "Residual checksum end_pos must be less than state shift amount"); + } + } +} + +Parser::State::Match::Match(int l, gress_t gress, State *n) : lineno(l) { + /* build a default match for a synthetic start state */ + offset_inc = shift = 0; + offset_rst = true; + next.name = n->name; + next.ptr.push_back(n); +} + +static value_t &extract_save_phv(value_t &data) { + if (data.type == tVEC) return data[0]; + if (data.type == tCMD && (data[0] == "offset" || data[0] == "rotate")) return data[1]; + return data; +} + +Parser::State::Match::Save::Save(gress_t gress, Match *m, int l, int h, value_t &data, int flgs) + : match(m), lo(l), hi(h), where(gress, 0, extract_save_phv(data)), flags(flgs) { + if (hi < lo || hi - lo > 3 || (hi - lo == 2 && !Target::PARSER_EXTRACT_BYTES())) + error(data.lineno, "Invalid parser extraction size"); + if (data.type == tVEC) { + if (data.vec.size > 2 || data.vec.size < 1) + error(data.lineno, "Can only extract into single or pair"); + if (data.vec.size == 2) second = Phv::Ref(gress, 0, data[1]); + } + if (data.type == tCMD) { + if (data[0] == "offset") + flags |= OFFSET; + else if (data[0] == "rotate") + flags |= ROTATE; + } +} + +Parser::State::Match::Set::Set(gress_t gress, Match *m, value_t &data, int v, int flgs) + : match(m), where(gress, 0, extract_save_phv(data)), what(v), flags(flgs) { + if (data.type == tCMD) { + if (data[0] == "offset") + flags |= OFFSET; + else if (data[0] == "rotate") + flags |= ROTATE; + } +} + +bool Parser::State::Match::Clot::parse_length(const value_t &exp, int what) { + enum { START, MASK, SHIFT, LOAD }; + if (exp.type == tCMD) { + if (exp[0] == ">>") { + return what < SHIFT && parse_length(exp[1], LOAD) && parse_length(exp[2], SHIFT); + } else if (exp[0] == "&") { + return what < SHIFT && parse_length(exp[1], MASK) && parse_length(exp[2], MASK); + } + } else if (exp.type == tINT) { + switch (what) { + case START: + case MASK: + if (length_mask >= 0) return false; + if ((length_mask = exp.i) < 0 || length_mask > 0x3f) { + error(exp.lineno, "length mask %d out of range", length_mask); + return false; + } + return true; + case SHIFT: + if (length_shift >= 0) return false; + if ((length_shift = exp.i) < 0 || length_shift > 15) { + error(exp.lineno, "length shift %d out of range", length_shift); + return false; + } + return true; + default: + return false; + } + } else if (exp.type == tSTR && exp.s[0] == '@' && isdigit(exp.s[1])) { + char *end; + if (what == SHIFT || length >= 0 || (length = strtol(exp.s + 1, &end, 10)) < 0 || *end) + return false; + load_length = true; + return true; + } + return false; +} + +Parser::State::Match::Clot::Clot(gress_t gress, const value_t &tag, const value_t &data) + : lineno(tag.lineno) { + if (CHECKTYPE2(tag, tINT, tSTR)) { + if (tag.type == tINT) { + this->tag = tag.i; + name = std::to_string(tag.i); + } else { + this->tag = -1; + name = tag.s; + } + } + Parser::clots[gress][name].push_back(this); + if (!CHECKTYPE3(data, tINT, tRANGE, tMAP)) return; + if (data.type == tINT) { + start = data.i; + length = 1; + } else if (data.type == tRANGE) { + start = data.range.lo; + length = data.range.hi - data.range.lo + 1; + } else { + for (auto &kv : data.map) { + if (kv.key == "start") { + if (CHECKTYPE(kv.value, tINT)) start = kv.value.i; + } else if (kv.key == "length") { + if (kv.value.type == tINT) { + length = kv.value.i; + } else if (!parse_length(kv.value) || !load_length) { + error(kv.value.lineno, "Syntax error"); + } + if (length_mask < 0) length_mask = 0x3f; + if (length_shift < 0) length_shift = 0; + } else if (kv.key == "max_length") { + if (CHECKTYPE(kv.value, tINT)) max_length = kv.value.i; + } else if (kv.key == "checksum") { + if (CHECKTYPE(kv.value, tINT)) csum_unit = kv.value.i; + } else if (kv.key == "stack_depth") { + if (CHECKTYPE(kv.value, tINT)) stack_depth = kv.value.i; + } else if (kv.key == "stack_inc") { + if (CHECKTYPE(kv.value, tINT)) stack_inc = kv.value.i; + } else { + error(kv.key.lineno, "Unknown CLOT key %s", value_desc(kv.key)); + } + } + } + if (start < 0) error(data.lineno, "No start in clot %s", name.c_str()); + if (length < 0) error(data.lineno, "No length in clot %s", name.c_str()); + if (max_length < 0) { + if (load_length) + max_length = 64; + else + max_length = length; + } else if (!load_length && max_length != length) { + error(data.lineno, "Inconsistent constant length and max_length in clot"); + } + // Create objects for each element in the stack. Only the first element + // creates the additional stack elements, and this should only be done + // for clot instances in parser loops. + for (int i = stack_inc; i < stack_depth; i += stack_inc) new Clot(gress, *this, i); +} + +/// Clone a clot to create a new stack instance. Should only be used +/// for clot extrcts in non-unrolled parser loops. +Parser::State::Match::Clot::Clot(gress_t gress, const Clot &src, int instance) { + if (src.tag >= 0) { + this->tag = src.tag + instance; + name = std::to_string(this->tag); + } else { + this->tag = -1; + name = src.name + "." + std::to_string(instance); + } + Parser::clots[gress][name].push_back(this); + lineno = src.lineno; + load_length = src.load_length; + start = src.start; + length = src.length; + length_shift = src.length_shift; + length_mask = src.length_mask; + max_length = src.max_length; + csum_unit = src.csum_unit; + stack_depth = src.stack_depth; +} + +Parser::State::Match::FieldMapping::FieldMapping(Phv::Ref &ref, const value_t &a) { + if (CHECKTYPE(a, tCMD)) { + where = ref; + container_id = a.vec[0].s; + lo = a.vec[1].range.lo; + hi = a.vec[1].range.hi; + } else { + error(a.lineno, "Syntax error"); + } +} + +Parser::State::Match::HdrLenIncStop::HdrLenIncStop(const value_t &data) { + if (CHECKTYPE(data, tINT)) { + if (data.i < 0 || data.i > PARSER_INPUT_BUFFER_SIZE) + error(data.lineno, "hdr_len_inc_stop %" PRId64 " out of range", data.i); + lineno = data.lineno; + final_amt = data.i; + } +} + +Parser::State::State(int l, const char *n, gress_t gr, match_t sno, const VECTOR(pair_t) & data) + : name(n), gress(gr), stateno(sno), def(0), lineno(l) { + VECTOR(pair_t) default_data = EMPTY_VECTOR_INIT; + bool have_default = data["default"] != 0; + for (auto &kv : data) { + if (kv.key.type == tINT && kv.value.type == tMAP) { + match_t m = {~(unsigned)kv.key.i, (unsigned)kv.key.i}; + match.push_back(new Match(kv.key.lineno, gress, this, m, kv.value.map)); + } else if (kv.key.type == tBIGINT && kv.value.type == tMAP) { + match_t m = {~(unsigned)kv.key.bigi.data[0], (unsigned)kv.key.bigi.data[0]}; + match.push_back(new Match(kv.key.lineno, gress, this, m, kv.value.map)); + } else if (kv.key == "value_set" && kv.value.type == tMAP) { + match_t m = {0, 0}; + match.push_back(new Match(kv.key.lineno, gress, this, m, kv.value.map)); + if (kv.key.type == tCMD) { + if (CHECKTYPE(kv.key[1], tSTR)) match.back()->value_set_name = kv.key[1].s; + if (kv.key.vec.size > 2 && CHECKTYPE(kv.key[2], tINT)) + match.back()->value_set_size = kv.key[2].i; + else + match.back()->value_set_size = 1; + } else { + match.back()->value_set_size = 1; + } + } else if (kv.key.type == tMATCH) { + if (!CHECKTYPE(kv.value, tMAP)) continue; + match.push_back(new Match(kv.key.lineno, gress, this, kv.key.m, kv.value.map)); + } else if (kv.key == "match") { + if (key.lineno) { + error(kv.value.lineno, "Multiple match entries in state %s", n); + error(key.lineno, "previous specified here"); + } else { + key.setup(kv.value); + } + } else if (kv.key == "option") { + if (kv.value == "ignore_max_depth") + ignore_max_depth = true; + else + error(kv.value.lineno, "Unknown state option %s", value_desc(kv.value)); + } else if (kv.key == "default") { + if (!CHECKTYPE(kv.value, tMAP)) continue; + if (def) { + error(kv.key.lineno, "Multiple defaults in state %s", n); + error(def->lineno, "previous specified here"); + } else { + match_t m = {0, 0}; + def = new Match(kv.key.lineno, gress, this, m, kv.value.map); + } + } else if (!have_default) { + VECTOR_add(default_data, kv); + } else { + error(kv.key.lineno, "Syntax error"); + } + } + if (default_data.size) { + BUG_CHECK(!def); + match_t m = {0, 0}; + def = new Match(default_data[0].key.lineno, gress, this, m, default_data); + } + VECTOR_fini(default_data); +} + +bool Parser::State::can_be_start() { + if (match.size()) return false; + if (!def) return true; + // if (def->counter || def->offset || def->shift) return false; + // if (def->counter_reset || def->offset_reset) return false; + // if (def->save.size() || def->set.size()) return false; + return true; +} + +void Parser::State::unmark_reachable(Parser *pa, bitvec &unreach) { + if (!unreach[all_idx]) return; + unreach[all_idx] = 0; + for (auto m : match) m->unmark_reachable(pa, this, unreach); + if (def) def->unmark_reachable(pa, this, unreach); +} + +void Parser::State::Match::unmark_reachable(Parser *pa, Parser::State *state, bitvec &unreach) { + for (auto succ : next) succ->unmark_reachable(pa, unreach); +} + +/********* pass 1 *********/ + +void Parser::State::Match::pass1(Parser *pa, State *state) { + next.check(state->gress, pa, state); + for (auto s : save) { + if (!s->where.check()) continue; + if (s->where->reg.parser_id() < 0) + error(s->where.lineno, "%s is not accessable in the parser", s->where->reg.name); + if (options.target == TOFINO && s->lo >= 32 && s->lo < 54) + error(s->where.lineno, "byte 32-53 of input buffer cannot be used for output"); + if (options.target == JBAY && s->lo >= 32 && s->lo < 48) + error(s->where.lineno, "byte 32-47 of input buffer cannot be used for output"); + pa->phv_use[state->gress][s->where->reg.uid] = 1; + int size = s->where.size(); + if (s->second) { + if (!s->second.check()) continue; + if (s->second->reg.parser_id() < 0) + error(s->second.lineno, "%s is not accessable in the parser", s->second->reg.name); + else if (s->second->lo >= 32 && s->second->lo < 54) + error(s->where.lineno, "byte 32-53 of input buffer cannot be used for output"); + else if (s->second->reg.parser_id() != s->where->reg.parser_id() + 1 || + (s->where->reg.parser_id() & 1)) + error(s->second.lineno, "Can only write into even/odd register pair"); + else if (s->second->lo || s->second->hi != size - 1) + error(s->second.lineno, "Can only write data into whole phv registers in parser"); + else + size *= 2; + } + if (!Target::PARSER_EXTRACT_BYTES() && s->where.size() != s->where->reg.size) + error(s->where.lineno, "Can only write data into whole phv registers in parser"); + else if ((s->hi - s->lo + 1) * 8 != size) + error(s->where.lineno, "Data to write doesn't match phv register size"); + } + for (auto s : set) { + if (!s->where.check()) continue; + if (s->where->reg.parser_id() < 0) + error(s->where.lineno, "%s is not accessable in the parser", s->where->reg.name); + pa->phv_use[state->gress][s->where->reg.uid] = 1; + } + if (value_set_size == 0) { + uint64_t match_mask = bitMask(state->key.width); + uint64_t not_covered = match_mask & ~(match.word0 | match.word1); + if (not_covered != 0) { + warning(lineno, + "Match pattern does not cover all bits of match key, " + "assuming the rest are don't care"); + match.word0 |= not_covered; + match.word1 |= not_covered; + } + if ((match.word1 & ~match.word0 & ~match_mask) != 0) + error(lineno, "Matching on bits not in the match of state %s", state->name.c_str()); + for (auto m : state->match) { + if (m == this) break; + if (m->match == match) { + warning(lineno, "Can't match parser state due to previous match"); + warning(m->lineno, "here"); + break; + } + } + } + for (auto &c : csum) c.pass1(pa); +} + +bool Parser::State::Match::Set::merge(gress_t gress, const Set &a) { + auto orig = where; + if (where->reg != a.where->reg) return false; + if (!(where->hi < a.where->lo || a.where->hi < where->lo)) { + warning(where.lineno, "Phv slices %s and %s overlapping", where.name(), a.where.name()); + } + what = ((what << where->lo) | (a.what << a.where->lo)) >> (std::min(where->lo, a.where->lo)); + where = Phv::Ref(where->reg, gress, std::min(where->lo, a.where->lo), + std::max(where->hi, a.where->hi)); + LOG1("Merging phv slices " << orig << " + " << a.where << " = " << where); + return true; +} + +void Parser::State::pass1(Parser *pa) { + for (auto m : match) m->pass1(pa, this); + if (def) def->pass1(pa, this); + for (auto code : MatchIter(stateno)) { + if (pa->state_use[code]) { + error(lineno, "%sgress state %s uses state code %d, already in use", gress ? "E" : "In", + name.c_str(), code); + for (auto *state : pa->all) { + if (state != this && state->gress == gress && state->stateno.matches(code)) + error(state->lineno, "also used by state %s", state->name.c_str()); + } + } + pa->state_use[code] = 1; + } + + for (auto m : match) + for (auto succ : m->next) succ->pred.insert(m); + + if (def) + for (auto succ : def->next) succ->pred.insert(def); +} + +/********* pass 2 *********/ + +void Parser::State::MatchKey::preserve_saved(unsigned saved) { + for (int i = 3; i >= 0; i--) { + if (!((saved >> i) & 1)) continue; + if (data[i].bit < 0 || data[i].byte == USE_SAVED) continue; + if ((specified >> i) & 1) { + error(lineno, + "match in %s matcher conflicts with previous state save " + "action", + Parser::match_key_loc_name(i)); + } else if (move_down(i) < 0) { + error(lineno, + "Ran out of matching space due to preserved values from " + "previous states"); + break; + } + } +} + +void Parser::State::Match::pass2(Parser *pa, State *state) { + for (auto &c : csum) c.pass2(pa); + + if (ctr_instr) ctr_instr->pass2(pa); + + if (clots.size() > 0) { + if (options.target == TOFINO) + error(clots[0]->lineno, "clots not supported on tofino"); + else if (clots.size() > 2) + error(clots[2]->lineno, "no more than two clots per state"); + } +} + +void Parser::State::pass2(Parser *pa) { + if (!stateno) { + unsigned s; + for (s = 0; pa->state_use[s]; s++) { + } + if (s > PARSER_STATE_MASK) { + error(lineno, "Can't allocate state number for %sgress state %s", gress ? "e" : "in", + name.c_str()); + } else { + stateno.word0 = s ^ PARSER_STATE_MASK; + stateno.word1 = s; + pa->state_use[s] = 1; + } + } + unsigned def_saved = 0; + if (def && def->load.lineno >= 0) { + for (int i = 0; i < 4; i++) + if (def->load.data[i].bit >= 0) def_saved |= 1 << i; + if (def_saved && def->next) def->next->key.preserve_saved(def_saved); + } + for (auto m : match) { + m->pass2(pa, this); + unsigned saved = def_saved; + if (m->load.lineno) { + for (int i = 0; i < 4; i++) + if (m->load.data[i].bit >= 0) + saved |= 1 << i; + else if (def && def->load.lineno && def->load.data[i].bit >= 0) + m->load.data[i] = def->load.data[i]; + } + if (saved) { + if (m->next) + m->next->key.preserve_saved(saved); + else if (def && def->next) + def->next->key.preserve_saved(saved); + } + } +} + +/********* output *********/ + +/// Extractor config tracking and register config code +/// Different tofino models have very different ways in which their parser extractors are +/// managed, but all are common in that there are multiple extractions that can happen in +/// parallel in a single parser match tcam row. We manage this by having a target-specific +/// 'output_map' object passed via a void * to target-sepcific write_output_config methods +/// along with an `unsigned used` mask that tracks which or how many extractors have been +/// used, so as to issue errors for conflicting uses. +/// +/// The `setup_phv_output_map` method creates the target specific output_map object that +/// will be passed to subsequent `write_output_config` calls to deal with each individual +/// extract. Finally, `mark_unused_output_map` is called to deal with any register setup +/// needed for unused extractors. They're called 'outputs' as the are concerned with +/// outputting PHV values from the parser. +/// +/// PHV outputs are split into 'saves' and 'sets' which come from different syntax in the +/// asm source. 'saves' copy data from the input buffer into PHVs, while 'sets' write +/// constants into the PHVs. Different targets have different constraints on how flexible +/// they are for saves vs sets, so some want to do saves first and other sets +/// - tofino1: do saves first (why? sets seem more constrained, but there's an issue +/// with ganging smaller extractors to write larger PHVs) +/// - tofino2: do sets first as some extractors can only do saves +/// +/// FIXME -- should probably refactor this into a more C++ style base class pointer with +/// derived classes for each target. Should move the 'used' mask into that object as well. +/// Alternately, could move the entire `setup` to `mark_unused` process into a target specific +/// method. + +std::set Parser::State::Match::get_all_preds() { + std::set visited; + return get_all_preds_impl(visited); +} + +std::set Parser::State::Match::get_all_preds_impl( + std::set &visited) { + if (visited.count(this)) return {}; + + visited.insert(this); + + std::set rv; + + for (auto p : this->state->pred) { + rv.insert(p); + auto pred = p->get_all_preds_impl(visited); + rv.insert(pred.begin(), pred.end()); + } + + return rv; +} + +/* If the bitvec contains one of a pair of 8-bit PHVs, add the other, as they need + * to be owened together in the parser ingress/egress ownership */ +bitvec expand_parser_groups(bitvec phvs) { + for (int i : phvs) + if (Phv::reg(i)->size == 8) phvs[i ^ 1] = 1; + return phvs; +} + +/* remove PHVs from the bitvec which are not accessable in the parser + * FIXME -- should just have a static const bitvec of the valid ones and & with it */ +bitvec remove_nonparser(bitvec phvs) { + for (int i : phvs) + if (Phv::reg(i)->parser_id() < 0) phvs[i] = 0; + return phvs; +} + +void setup_jbay_ownership(bitvec phv_use[2], checked_array<128, ubits<1>> &left, + checked_array<128, ubits<1>> &right, checked_array<256, ubits<1>> &main_i, + checked_array<256, ubits<1>> &main_e) { + for (int i : phv_use[EGRESS]) { + if (Phv::reg(i)->size == 8) { + if (phv_use[INGRESS][i ^ 1]) + error(0, "Can't use %s in ingress and %s in egress in Tofino2 parser", + Phv::reg(i ^ 1)->name, Phv::reg(i)->name); + } + } + + std::set left_egress_owner_ids, right_egress_owner_ids; + std::set all_egress_owner_ids; + + for (int i : phv_use[EGRESS]) { + auto id = Phv::reg(i)->parser_id(); + if (id < 0) + error(0, "Can't access %s in parser", Phv::reg(i)->name); + else if (id < 128) + left_egress_owner_ids.insert(id); + else + right_egress_owner_ids.insert(id - 128); + + all_egress_owner_ids.insert(id); + + if (Phv::reg(i)->size == 32) { + if (++id < 128) + left_egress_owner_ids.insert(id); + else + right_egress_owner_ids.insert(id - 128); + + all_egress_owner_ids.insert(id); + } + } + + for (auto id : left_egress_owner_ids) left[id] = 1; + for (auto id : right_egress_owner_ids) right[id] = 1; + for (auto id : all_egress_owner_ids) main_i[id] = main_e[id] = 1; +} + +void setup_jbay_clear_on_write(bitvec phv_allow_clear_on_write, checked_array<128, ubits<1>> &left, + checked_array<128, ubits<1>> &right, + checked_array<256, ubits<1>> &main_i, + checked_array<256, ubits<1>> &main_e) { + for (int i : phv_allow_clear_on_write) { + auto id = Phv::reg(i)->parser_id(); + + if (id < 0) + error(0, "Can't access %s in parser", Phv::reg(i)->name); + else if (id < 128) + left[id] = 1; + else + right[id - 128] = 1; + + main_i[id] = main_e[id] = 1; + + if (Phv::reg(i)->size == 32) { + if (++id < 128) + left[id] = 1; + else + right[id - 128] = 1; + + main_i[id] = main_e[id] = 1; + } + } +} + +void setup_jbay_no_multi_write(bitvec phv_allow_bitwise_or, bitvec phv_allow_clear_on_write, + checked_array<256, ubits<1>> &nmw_i, + checked_array<256, ubits<1>> &nmw_e) { + std::set allow_multi_write_ids; + + for (int i : phv_allow_bitwise_or) { + auto id = Phv::reg(i)->parser_id(); + allow_multi_write_ids.insert(id); + + if (Phv::reg(i)->size == 32) allow_multi_write_ids.insert(++id); + } + + for (int i : phv_allow_clear_on_write) { + auto id = Phv::reg(i)->parser_id(); + allow_multi_write_ids.insert(id); + + if (Phv::reg(i)->size == 32) allow_multi_write_ids.insert(++id); + } + + for (int i = 0; i < 256; i++) { + if (!allow_multi_write_ids.count(i)) nmw_i[i] = nmw_e[i] = 1; + } +} + +// WARNING: This function will print all parser paths. In some programs based on +// the complexity of parser graphs, this can result in a path explosion as it +// visits all possible paths and can lead to the function taking an unreasonably +// large amount of time to execute. +// +// The intention for this function is for DEBUG purposes only and should not be +// checked in with it being called from anywhere for logging due to above +// potential worst case issue. +// +// Function also checks for cycles in the parser graph. +// For debug, call function on a parser object and run assembler with -Tparser:1 +void Parser::print_all_paths() { + // Check for cycles in states + ordered_set vstates; + int count = 0; + std::function visit_states = [&](State *s, std::string sstr) { + count++; + // To limit execution uncomment and set variable + // if (count > COUNT_STATE_PATHS) exit(1); + if (s == nullptr) { + LOG1("State Path : " << sstr << " => END"); + return; + } + // Check for previously visited states to show cycles in parser state + // graph + if (vstates.count(s->name)) { + LOG1("****Revisiting " << s->name << " through path : " << sstr + << ". Parser graph has a cycle"); + return; + } + if (!sstr.empty()) sstr += " => "; + sstr += s->name; + vstates.insert(s->name); + + LOG1("State Path (" << count << ") : depth (" << vstates.size() << ") :" << sstr); + + for (auto m : s->match) { + std::stringstream ss; + ss << m->match; + std::string sstr2 = sstr + ("(" + ss.str() + ")"); + for (auto ns : m->next) { + visit_states(ns, sstr2); + } + } + vstates.erase(s->name); + }; + if (states.size() > 0) visit_states(states.begin()->second, ""); +} diff --git a/backends/tofino/bf-asm/parser-tofino-jbay.h b/backends/tofino/bf-asm/parser-tofino-jbay.h new file mode 100644 index 00000000000..74dd41aab77 --- /dev/null +++ b/backends/tofino/bf-asm/parser-tofino-jbay.h @@ -0,0 +1,722 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef PARSER_TOFINO_JBAY_H_ +#define PARSER_TOFINO_JBAY_H_ + +#include +#include +#include + +#include "backends/tofino/bf-asm/target.h" +#include "lib/bitvec.h" +#include "parser.h" +#include "phv.h" +#include "sections.h" +#include "ubits.h" + +enum { + /* global constants related to parser */ + PARSER_STATE_MASK = 0xff, + PARSER_TCAM_DEPTH = 256, + PARSER_CHECKSUM_ROWS = 32, + PARSER_CTRINIT_ROWS = 16, + PARSER_INPUT_BUFFER_SIZE = 32, + PARSER_SRC_MAX_IDX = 63, + PARSER_MAX_CLOTS = 64, + PARSER_MAX_CLOT_LENGTH = 64, +}; + +/** + * @brief Representation of the Tofino 1/2 parser in assembler + * @ingroup parde + */ +class Parser : public BaseParser, public Contextable { + void write_config(RegisterSetBase ®s, json::map &json, bool legacy = true) override; + template + void write_config(REGS &, json::map &, bool legacy = true); + struct CounterInit { + gress_t gress; + int lineno = -1, addr = -1; + int add = 0, mask = 255, rot = 0, max = 255, src = -1; + CounterInit(gress_t, pair_t); + void pass1(Parser *) {} + void pass2(Parser *); + template + void write_config(REGS &, gress_t, int); + bool equiv(const CounterInit &) const; + }; + struct PriorityUpdate { + int lineno = -1, offset = -1, shift = -1, mask = -1; + PriorityUpdate() {} + explicit PriorityUpdate(const value_t &data); + bool parse(const value_t &exp, int what = 0); + explicit operator bool() const { return lineno >= 0; } + template + void write_config(REGS &); + }; + struct RateLimit { + int lineno = -1; + int inc = -1, dec = -1, max = -1, interval = -1; + void parse(const VECTOR(pair_t) &); + explicit operator bool() const { return lineno >= 0; } + template + void write_config(REGS &, gress_t); + }; + + public: + struct Checksum; + + struct State { + struct Ref { + int lineno; + std::string name; + match_t pattern; + std::vector ptr; + Ref() : lineno(-1) { pattern.word0 = pattern.word1 = 0; } + Ref &operator=(const value_t &); + explicit Ref(value_t &v) { *this = v; } + operator bool() const { return ptr.size() > 0; } + State *operator->() const { + BUG_CHECK(ptr.size() == 1); + return ptr[0]; + } + State *operator*() const { + BUG_CHECK(ptr.size() == 1); + return ptr[0]; + } + bool operator==(const Ref &a) const { return name == a.name && pattern == a.pattern; } + void check(gress_t, Parser *, State *); + std::vector::const_iterator begin() const { return ptr.begin(); } + std::vector::const_iterator end() const { return ptr.end(); } + }; + struct MatchKey { + int lineno; + struct { + short bit, byte; + } data[4]; + enum { USE_SAVED = 0x7fff }; /* magic number can be stored in 'byte' field */ + short specified; + short ctr_zero, ctr_neg; + short width; + short save = 0; + MatchKey() : lineno(0), specified(0), ctr_zero(-1), ctr_neg(-1), width(0) { + for (auto &a : data) a.bit = a.byte = -1; + } + void setup(value_t &); + int setup_match_el(int, value_t &); + void preserve_saved(unsigned mask); + template + void write_config(REGS &, json::vector &); + + private: + int add_byte(int, int, bool use_saved = false); + int move_down(int); + }; + struct OutputUse { + unsigned b8 = 0, b16 = 0, b32 = 0; + OutputUse &operator+=(const OutputUse &a) { + b8 += a.b8; + b16 += a.b16; + b32 += a.b32; + return *this; + } + }; + struct Match { + int lineno; + State *state = nullptr; + match_t match; + std::string value_set_name; + int value_set_size = 0; + int value_set_handle = -1; + int offset_inc = 0, shift = 0, buf_req = -1; + int disable_partial_hdr_err = -1, partial_hdr_err_proc = -1; + bool offset_rst = false; + int intr_md_bits = 0; + + int ctr_imm_amt = 0, ctr_ld_src = 0, ctr_load = 0; + bool ctr_stack_push = false, ctr_stack_upd_w_top = false, ctr_stack_pop = false; + + CounterInit *ctr_instr = nullptr; + + PriorityUpdate priority; + + Ref next; + MatchKey load; + + int row = -1; + /// Data for narrow to wide extraction analysis, flag and + /// vector of affected PHV locations + bool has_narrow_to_wide_extract = false; + // 32b narrow to wide extractions using 2x16 extractions + std::vector narrow_to_wide_32b_16; + // 32b narrow to wide extractions using 4x8 extractions + std::vector narrow_to_wide_32b_8; + // 16b narrow to wide extractions using 2x8 extractions + std::vector narrow_to_wide_16b_8; + + enum flags_t { OFFSET = 1, ROTATE = 2 }; + + struct Save { + Match *match; + int lo, hi; + Phv::Ref where, second; + int flags; + Save(gress_t, Match *m, int l, int h, value_t &data, int flgs = 0); + template + int write_output_config(REGS &, void *, unsigned &, int, int) const; + }; + std::vector save; + + struct Set { + Match *match = nullptr; + Phv::Ref where; + unsigned what; + int flags; + Set(gress_t gress, Match *m, value_t &data, int v, int flgs = 0); + template + void write_output_config(REGS &, void *, unsigned &, int, int) const; + bool merge(gress_t, const Set &a); + bool operator==(const Set &a) const { + return where == a.where && what == a.what && flags == a.flags; + } + }; + std::vector set; + + struct Clot { + int lineno, tag; + std::string name; + bool load_length = false; + int start = -1, length = -1, length_shift = -1, length_mask = -1; + int max_length = -1; + int csum_unit = -1; + int stack_depth = 1; + int stack_inc = 1; + Clot(gress_t gress, const value_t &tag, const value_t &data); + Clot(const Clot &) = delete; + Clot(Clot &&) = delete; + bool parse_length(const value_t &exp, int what = 0); + template + void write_config(PO_ROW &, int, bool) const; + + private: + Clot(gress_t, const Clot &, int); + }; + std::vector clots; + std::vector csum; + + struct FieldMapping { + Phv::Ref where; + std::string container_id; + int lo = -1; + int hi = -1; + FieldMapping(Phv::Ref &ref, const value_t &a); + }; + std::vector field_mapping; + + struct HdrLenIncStop { + int lineno = -1; + unsigned final_amt = 0; + HdrLenIncStop() {} + explicit HdrLenIncStop(const value_t &data); + explicit operator bool() const { return lineno >= 0; } + template + void write_config(PO_ROW &) const; + } hdr_len_inc_stop; + + Match(int lineno, gress_t, State *s, match_t m, VECTOR(pair_t) & data); + Match(int lineno, gress_t, State *n); + ~Match() { + if (ctr_instr) delete ctr_instr; + } + void unmark_reachable(Parser *, State *state, bitvec &unreach); + void pass1(Parser *pa, State *state); + void pass2(Parser *pa, State *state); + template + int write_load_config(REGS &, Parser *, State *, int) const; + template + void write_lookup_config(REGS &, State *, int) const; + template + void write_counter_config(EA_REGS &) const; + template + void write_common_row_config(REGS &, Parser *, State *, int, Match *, json::map &); + template + void write_row_config(REGS &, Parser *, State *, int, Match *, json::map &); + template + void write_config(REGS &, Parser *, State *, Match *, json::map &); + template + void write_config(REGS &, json::vector &); + + template + void write_saves(REGS ®s, Match *def, void *output_map, int &max_off, unsigned &used, + int csum_8b, int csum_16b); + template + void write_sets(REGS ®s, Match *def, void *output_map, unsigned &used, int csum_8b, + int csum_16b); + + std::set get_all_preds(); + std::set get_all_preds_impl(std::set &visited); + }; + + std::string name; + gress_t gress; + match_t stateno; + MatchKey key; + std::vector match; + Match *def; + std::set pred; + bool ignore_max_depth = false; + int lineno = -1; + int all_idx = -1; + + State(State &&) = default; + State(int lineno, const char *name, gress_t, match_t stateno, const VECTOR(pair_t) & data); + bool can_be_start(); + void unmark_reachable(Parser *, bitvec &); + void pass1(Parser *); + void pass2(Parser *); + template + int write_lookup_config(REGS &, Parser *, State *, int, const std::vector &); + template + void write_config(REGS &, Parser *, json::vector &); + }; + + struct Checksum { + int lineno = -1, addr = -1, unit = -1; + gress_t gress; + Phv::Ref dest; + int tag = -1; + unsigned add = 0, mask = 0, swap = 0, mul_2 = 0; + unsigned dst_bit_hdr_end_pos = 0; + bool start = false, end = false, shift = false; + unsigned type = 0; // 0 = verify, 1 = residual, 2 = clot + Checksum(gress_t, pair_t); + bool equiv(const Checksum &) const; + void pass1(Parser *); + void pass2(Parser *); + template + void write_config(REGS &, Parser *); + template + void write_output_config(REGS &, Parser *, State::Match *, void *, unsigned &) const; + + private: + template + void write_tofino_row_config(ROW &row); + template + void write_row_config(ROW &row); + }; + + public: + void input(VECTOR(value_t) args, value_t data); + void process(); + void output(json::map &) override; + void output_legacy(json::map &); + gress_t gress; + std::string name; + std::map states; + std::vector all; + std::map match_to_row; + bitvec port_use; + int parser_no; // used to print cfg.json + bitvec state_use; + State::Ref start_state[4]; + int priority[4] = {0}; + int pri_thresh[4] = {0, 0, 0, 0}; + int tcam_row_use = 0; + Phv::Ref parser_error; + // the ghost "parser" extracts 32-bit value + // this information is first extracted in AsmParser and passed to + // individual Parser, because currently parse_merge register is programmed + // in Parser class. + // FIXME -- should move all merge reg handling into AsmParser. + std::vector ghost_parser; + unsigned ghost_pipe_mask = 0xf; // only set for JBAY + bitvec (&phv_use)[2]; + bitvec phv_allow_bitwise_or, phv_allow_clear_on_write; + bitvec phv_init_valid; + int hdr_len_adj = 0, meta_opt = 0; + std::vector> checksum_use; + std::array counter_init = {}; + static std::map>> clots; + static std::array, PARSER_MAX_CLOTS> clot_use; + static unsigned max_handle; + int parser_handle = -1; + RateLimit rate_limit; + + Parser(bitvec (&phv_use)[2], gress_t gr, int idx) + : gress(gr), parser_no(idx), phv_use(phv_use) { + if (gress == INGRESS) { + parser_depth_max_bytes = Target::PARSER_DEPTH_MAX_BYTES_INGRESS(); + parser_depth_min_bytes = Target::PARSER_DEPTH_MIN_BYTES_INGRESS(); + } else { + parser_depth_max_bytes = Target::PARSER_DEPTH_MAX_BYTES_EGRESS(); + parser_depth_min_bytes = Target::PARSER_DEPTH_MIN_BYTES_EGRESS(); + } + } + + template + void gen_configuration_cache(REGS &, json::vector &cfg_cache); + static int clot_maxlen(gress_t gress, unsigned tag) { + auto &vec = clot_use[tag]; + return vec.empty() ? -1 : vec.at(0)->max_length; + } + static int clot_maxlen(gress_t gress, std::string tag) { + if (clots.count(gress) && clots.at(gress).count(tag)) + return clots.at(gress).at(tag).at(0)->max_length; + return -1; + } + static int clot_tag(gress_t gress, std::string tag) { + if (clots.count(gress) && clots.at(gress).count(tag)) + return clots.at(gress).at(tag).at(0)->tag; + return -1; + } + + static const char *match_key_loc_name(int loc); + static int match_key_loc(const char *key); + static int match_key_loc(value_t &key, bool errchk = true); + static int match_key_size(const char *key); + + // Parser Handle Setup + // ____________________________________________________ + // | Table Type | Pipe Id | Parser Handle | PVS Handle | + // 31 24 20 12 0 + // PVS Handle = 12 bits + // Parser Handle = 8 bits + // Pipe ID = 4 bits + // Table Type = 8 bits (Parser type is 15) + static unsigned next_handle() { + // unique_table_offset is to support multiple pipe. + // assume parser type is 15, table type used 0 - 6 + return max_handle++ << 12 | unique_table_offset << 20 | 15 << 24; + } + // Store parser names to their handles. Used by phase0 match tables to link + // parser handle + static std::map parser_handles; + static unsigned get_parser_handle(std::string phase0Table) { + for (auto p : Parser::parser_handles) { + auto parser_name = p.first; + if (phase0Table.find(parser_name) != std::string::npos) return p.second; + } + return 0; + } + + template + void *setup_phv_output_map(REGS &, gress_t, int); + + State *get_start_state() { + std::vector startNames = {"start", "START", "$entry_point.start", + "$entry_point"}; + for (auto n : startNames) { + if (states.count(n)) return states.at(n); + } + return nullptr; + } + + int get_prsr_max_dph(); + int get_header_stack_size_from_valid_bits(std::vector sets); + + // Debug + void print_all_paths(); + + private: + template + void mark_unused_output_map(REGS &, void *, unsigned); + void define_state(gress_t gress, pair_t &kv); + void output_default_ports(json::vector &vec, bitvec port_use); + int state_prsr_dph_max(const State *s); + int state_prsr_dph_max(const State *s, std::map> &visited, + int curr_dph_bits); + int parser_depth_max_bytes, parser_depth_min_bytes; +}; + +class AsmParser : public BaseAsmParser { + std::vector parser[2]; // INGRESS, EGRESS + bitvec phv_use[2]; // ingress/egress only + std::vector ghost_parser; // the ghost "parser" extracts 32-bit value. This 32-bit + // can be from a single 32-bit container or multiple + // smaller one. + unsigned ghost_pipe_mask = 0xf; // only set for JBAY + void start(int lineno, VECTOR(value_t) args) override; + void input(VECTOR(value_t) args, value_t data) override; + void process() override; + void output(json::map &) override; + void init_port_use(bitvec &port_use, const value_t &arg); + + public: + AsmParser() : BaseAsmParser("parser"){}; + ~AsmParser() {} + + // For gtest + std::vector test_get_parser(gress_t gress); +}; + +template +void Parser::PriorityUpdate::write_config(REGS &action_row) { + if (offset >= 0) { + action_row.pri_upd_type = 1; + action_row.pri_upd_src = offset; + action_row.pri_upd_en_shr = shift; + action_row.pri_upd_val_mask = mask; + } else { + action_row.pri_upd_type = 0; + action_row.pri_upd_en_shr = 1; + action_row.pri_upd_val_mask = mask; + } +} + +// for jbay (tofino1 is specialized) +template <> +void Parser::RateLimit::write_config(::Tofino::regs_pipe ®s, gress_t gress); +template +void Parser::RateLimit::write_config(REGS ®s, gress_t gress) { + if (gress == INGRESS) { + auto &ctrl = regs.pardereg.pgstnreg.parbreg.left.i_phv_rate_ctrl; + ctrl.inc = inc; + ctrl.interval = interval; + ctrl.max = max; + } else if (gress == EGRESS) { + auto &ctrl = regs.pardereg.pgstnreg.parbreg.right.e_phv_rate_ctrl; + ctrl.inc = inc; + ctrl.interval = interval; + ctrl.max = max; + } +} + +template +void Parser::State::MatchKey::write_config(REGS &, json::vector &) { + // FIXME -- TBD -- probably needs to be different for tofino/jbay, so there will be + // FIXME -- template specializations for this in those files +} + +template +void Parser::State::Match::write_saves(REGS ®s, Match *def, void *output_map, int &max_off, + unsigned &used, int csum_8b, int csum_16b) { + if (offset_inc) + for (auto s : save) s->flags |= OFFSET; + for (auto s : save) + max_off = + std::max(max_off, s->write_output_config(regs, output_map, used, csum_8b, csum_16b)); + if (def) + for (auto &s : def->save) + max_off = std::max(max_off, + s->write_output_config(regs, output_map, used, csum_8b, csum_16b)); +} + +template +void Parser::State::Match::write_sets(REGS ®s, Match *def, void *output_map, unsigned &used, + int csum_8b, int csum_16b) { + if (offset_inc) + for (auto s : set) s->flags |= ROTATE; + for (auto s : set) s->write_output_config(regs, output_map, used, csum_8b, csum_16b); + if (def) + for (auto s : def->set) s->write_output_config(regs, output_map, used, csum_8b, csum_16b); +} + +template +void Parser::State::Match::write_common_row_config(REGS ®s, Parser *pa, State *state, int row, + Match *def, json::map &ctxt_json) { + int max_off = -1; + write_lookup_config(regs, state, row); + + auto &ea_row = regs.memory[state->gress].ml_ea_row[row]; + if (ctr_instr || ctr_load || ctr_imm_amt || ctr_stack_pop) { + write_counter_config(ea_row); + } else if (def) { + def->write_counter_config(ea_row); + } + if (shift) + max_off = std::max(max_off, int(ea_row.shift_amt = shift) - 1); + else if (def) + max_off = std::max(max_off, int(ea_row.shift_amt = def->shift) - 1); + max_off = std::max(max_off, write_load_config(regs, pa, state, row)); + if (auto &next = (!this->next && def) ? def->next : this->next) { + std::vector prev; + for (auto n : next) { + max_off = std::max(max_off, n->write_lookup_config(regs, pa, state, row, prev)); + prev.push_back(n); + } + const match_t &n = next.pattern ? next.pattern : next->stateno; + ea_row.nxt_state = n.word1; + ea_row.nxt_state_mask = ~(n.word0 & n.word1) & PARSER_STATE_MASK; + } else { + ea_row.done = 1; + } + + auto &action_row = regs.memory[state->gress].po_action_row[row]; + for (auto &c : csum) { + action_row.csum_en[c.unit] = 1; + action_row.csum_addr[c.unit] = c.addr; + } + if (offset_inc || offset_rst) { + action_row.dst_offset_inc = offset_inc; + action_row.dst_offset_rst = offset_rst; + } else if (def) { + action_row.dst_offset_inc = def->offset_inc; + action_row.dst_offset_rst = def->offset_rst; + } + if (priority) priority.write_config(action_row); + if (hdr_len_inc_stop) hdr_len_inc_stop.write_config(action_row); + + void *output_map = pa->setup_phv_output_map(regs, state->gress, row); + unsigned used = 0; + int csum_8b = 0; + int csum_16b = 0; + for (auto &c : csum) { + c.write_output_config(regs, pa, this, output_map, used); + if (c.type == 0 && c.dest) { + if (c.dest->reg.size == 8) + ++csum_8b; + else if (c.dest->reg.size == 16) + ++csum_16b; + } + } + + if (options.target == TOFINO) { + write_sets(regs, def, output_map, used, csum_8b, csum_16b); + write_saves(regs, def, output_map, max_off, used, csum_8b, csum_16b); + } else { + write_sets(regs, def, output_map, used, 0, 0); + write_saves(regs, def, output_map, max_off, used, 0, 0); + } + + int clot_unit = 0; + for (auto *c : clots) c->write_config(action_row, clot_unit++, offset_inc > 0); + if (def) + for (auto *c : def->clots) c->write_config(action_row, clot_unit++, offset_inc > 0); + pa->mark_unused_output_map(regs, output_map, used); + + if (buf_req < 0) { + buf_req = max_off + 1; + BUG_CHECK(buf_req <= 32); + } + ea_row.buf_req = buf_req; +} + +template +void Parser::State::Match::write_row_config(REGS ®s, Parser *pa, State *state, int row, + Match *def, json::map &ctxt_json) { + write_common_row_config(regs, pa, state, row, def, ctxt_json); +} + +template +void Parser::State::Match::write_config(REGS ®s, Parser *pa, State *state, Match *def, + json::map &ctxt_json) { + int row, count = 0; + do { + if ((row = --pa->tcam_row_use) < 0) { + if (row == -1) + error(state->lineno, "Ran out of tcam space in %sgress parser", + state->gress ? "e" : "in"); + return; + } + ctxt_json["tcam_rows"].to().push_back(row); + write_row_config(regs, pa, state, row, def, ctxt_json); + pa->match_to_row[this] = row; + } while (++count < value_set_size); +} + +template +void Parser::State::Match::write_config(REGS ®s, json::vector &vec) { + int select_statement_bit = 0; + for (auto f : field_mapping) { + json::map container_cjson; + container_cjson["container_width"] = Parser::match_key_size(f.container_id.c_str()); + + int container_hardware_id = Parser::match_key_loc(f.container_id.c_str()); + container_cjson["container_hardware_id"] = container_hardware_id; + + container_cjson["mask"] = (1 << (f.hi - f.lo + 1)) - 1; + json::vector field_mapping_cjson; + for (auto i = f.lo; i <= f.hi; i++) { + json::map field_map; + field_map["register_bit"] = i; + field_map["field_name"] = f.where.name(); + field_map["start_bit"] = i; + field_map["select_statement_bit"] = select_statement_bit++; + field_mapping_cjson.push_back(field_map.clone()); + } + container_cjson["field_mapping"] = field_mapping_cjson.clone(); + vec.push_back(container_cjson.clone()); + } +} + +template +void Parser::State::write_config(REGS ®s, Parser *pa, json::vector &ctxt_json) { + LOG2(gress << " state " << name << " (" << stateno << ')'); + for (auto i : match) { + bool uses_pvs = false; + json::map state_cjson; + state_cjson["parser_name"] = name; + i->write_config(regs, state_cjson["match_registers"]); + if (i->value_set_size > 0) uses_pvs = true; + i->write_config(regs, pa, this, def, state_cjson); + state_cjson["uses_pvs"] = uses_pvs; + if (def) def->write_config(regs, pa, this, 0, state_cjson); + if (uses_pvs) { + state_cjson["pvs_name"] = i->value_set_name; + if (i->value_set_handle < 0) + error(lineno, "Invalid handle for parser value set %s", i->value_set_name.c_str()); + auto pvs_handle_full = i->value_set_handle; + state_cjson["pvs_handle"] = pvs_handle_full; + } + for (auto idx : MatchIter(stateno)) { + state_cjson["parser_state_id"] = idx; + ctxt_json.push_back(state_cjson.clone()); + } + } +} + +template +void Parser::Checksum::write_tofino_row_config(ROW &row) { + row.add = add; + if (dest) + row.dst = dest->reg.parser_id(); + else if (tag >= 0) + row.dst = tag; + row.dst_bit_hdr_end_pos = dst_bit_hdr_end_pos; + row.hdr_end = end; + int rsh = 0; + for (auto &el : row.mask) el = (mask >> rsh++) & 1; + row.shr = shift; + row.start = start; + rsh = 0; + for (auto &el : row.swap) el = (swap >> rsh++) & 1; + row.type = type; +} + +template +void Parser::Checksum::write_row_config(ROW &row) { + write_tofino_row_config(row); + int rsh = 0; + for (auto &el : row.mul_2) el = (mul_2 >> rsh++) & 1; +} + +// Used with JBay +bitvec expand_parser_groups(bitvec phvs); +bitvec remove_nonparser(bitvec phvs); +void setup_jbay_ownership(bitvec phv_use[2], checked_array<128, ubits<1>> &left, + checked_array<128, ubits<1>> &right, checked_array<256, ubits<1>> &main_i, + checked_array<256, ubits<1>> &main_e); +void setup_jbay_no_multi_write(bitvec phv_allow_bitwise_or, bitvec phv_allow_clear_on_write, + checked_array<256, ubits<1>> &nmw_i, + checked_array<256, ubits<1>> &nmw_e); +void setup_jbay_clear_on_write(bitvec phv_allow_clear_on_write, checked_array<128, ubits<1>> &left, + checked_array<128, ubits<1>> &right, + checked_array<256, ubits<1>> &main_i, + checked_array<256, ubits<1>> &main_e); + +#endif /* PARSER_TOFINO_JBAY_H_ */ diff --git a/backends/tofino/bf-asm/parser.h b/backends/tofino/bf-asm/parser.h new file mode 100644 index 00000000000..e49e79025b4 --- /dev/null +++ b/backends/tofino/bf-asm/parser.h @@ -0,0 +1,45 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_PARSER_H_ +#define BACKENDS_TOFINO_BF_ASM_PARSER_H_ + +#include "asm-types.h" +#include "backends/tofino/bf-asm/json.h" +#include "backends/tofino/bf-asm/target.h" +#include "sections.h" +#include "vector.h" + +/** + * @brief Base class of Tofino parser in assembler + * + * For Tofino 1/2, the class Parser is derived. + */ +class BaseParser : virtual public Configurable { + protected: + int lineno = -1; +}; + +/** + * @brief Base class of parser assembly section + */ +class BaseAsmParser : public Section { + public: + explicit BaseAsmParser(const char *name_) : Section(name_) {} +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_PARSER_H_ */ diff --git a/backends/tofino/bf-asm/phase0.cpp b/backends/tofino/bf-asm/phase0.cpp new file mode 100644 index 00000000000..4e182ae28c1 --- /dev/null +++ b/backends/tofino/bf-asm/phase0.cpp @@ -0,0 +1,93 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "parser-tofino-jbay.h" + +DEFINE_TABLE_TYPE(Phase0MatchTable) + +void Phase0MatchTable::setup(VECTOR(pair_t) & data) { + for (auto &kv : MapIterChecked(data)) { + if (common_setup(kv, data, P4Table::MatchEntry)) { + } else if (auto *fmt = get(data, "format")) { + if (CHECKTYPEPM(*fmt, tMAP, fmt->map.size > 0, "non-empty map")) + format.reset(new Format(this, fmt->map)); + } else if (kv.key == "size") { + if (CHECKTYPE(kv.value, tINT)) size = kv.value.i; + } else if (kv.key == "constant_value") { + if (CHECKTYPE(kv.value, tINT)) constant_value = kv.value.i; + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } + if (gress != INGRESS || stage->stageno != 0) + error(lineno, "Phase 0 match table can only be in stage 0 ingress"); +} + +void Phase0MatchTable::pass1() { + LOG1("### Phase 0 match table " << name() << " pass1 " << loc()); + MatchTable::pass1(); + if (actions) actions->pass1(this); +} + +void Phase0MatchTable::pass2() { LOG1("### Phase 0 match table " << name() << " pass2 " << loc()); } + +void Phase0MatchTable::pass3() { LOG1("### Phase 0 match table " << name() << " pass3 " << loc()); } + +template +void Phase0MatchTable::write_regs_vt(REGS &) { + LOG1("### Phase 0 match table " << name() << " write_regs " << loc()); +} + +void Phase0MatchTable::gen_tbl_cfg(json::vector &out) const { + json::map &tbl = *base_tbl_cfg(out, "match_entry", p4_table ? p4_table->size : size); + common_tbl_cfg(tbl); + tbl["statistics_table_refs"] = json::vector(); + tbl["meter_table_refs"] = json::vector(); + tbl["selection_table_refs"] = json::vector(); + tbl["stateful_table_refs"] = json::vector(); + tbl["action_data_table_refs"] = json::vector(); + json::map &match_attributes = tbl["match_attributes"] = json::map(); + json::map &stage_tbl = *add_stage_tbl_cfg(match_attributes, "phase_0_match", size); + match_attributes["match_type"] = "phase_0_match"; + stage_tbl["stage_number"] = -1; + // Associate the phase0 table with corresponding parser. This is used in a + // multi parser scenario which has multiple phase0 tables + // and the handle is used by the driver to link the phase0 table to the + // parser. + auto parser_handle = Parser::get_parser_handle(name()); + if (parser_handle > 0) stage_tbl["parser_handle"] = parser_handle; + stage_tbl.erase("logical_table_id"); + stage_tbl.erase("default_next_table"); + stage_tbl.erase("has_attached_gateway"); + auto &mra = stage_tbl["memory_resource_allocation"] = json::map(); + mra["memory_type"] = "ingress_buffer"; + json::map tmp; + (tmp["vpns"] = json::vector()).push_back(INT64_C(0)); + (tmp["memory_units"] = json::vector()).push_back(INT64_C(0)); + (mra["memory_units_and_vpns"] = json::vector()).push_back(std::move(tmp)); + // Driver looks at the pack format to determine the fields and their + // positions. Since phase0 is only mimicking a table, the driver expects to + // have a single entry within the pack format. + bool pad_zeros = false; + bool print_fields = true; + add_pack_format(stage_tbl, format.get(), pad_zeros, print_fields); + if (actions) actions->gen_tbl_cfg(tbl["actions"]); + if (context_json) stage_tbl.merge(*context_json); +} diff --git a/backends/tofino/bf-asm/phv.cpp b/backends/tofino/bf-asm/phv.cpp new file mode 100644 index 00000000000..b7fcf49d0a9 --- /dev/null +++ b/backends/tofino/bf-asm/phv.cpp @@ -0,0 +1,496 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "phv.h" + +#include +#include + +#include "lib/log.h" +#include "misc.h" + +Phv Phv::phv; +const Phv::Register Phv::Slice::invalid("", Phv::Register::NORMAL, 0, ~0, 0); + +void Phv::init_phv(target_t target_type) { + if (target) { + BUG_CHECK(target->type() == target_type); // sanity check + return; + } + switch (target_type) { +#define INIT_FOR_TARGET(TARGET) \ + case Target::TARGET::tag: \ + target = new Target::TARGET::Phv; \ + break; + FOR_ALL_TARGETS(INIT_FOR_TARGET) + default: + BUG(); + } +#undef INIT_FOR_TARGET + target->init_regs(*this); +} + +void Phv::start(int lineno, VECTOR(value_t) args) { + if (options.target == NO_TARGET) { + error(lineno, "No target specified prior to PHV section"); + return; + } + init_phv(options.target); + // The only argument to phv is the thread. We allow phv section with no thread argument + // which defines aliases for all threads. Does that really make sense when threads can't + // share registers? We never use this capability in the compiler. + if (args.size > 1 || (args.size == 1 && args[0] != "ingress" && args[0] != "egress" && + (args[0] != "ghost" || options.target < JBAY))) + error(lineno, "phv can only be ingress%s or egress", + (options.target >= JBAY ? ", ghost" : 0)); +} + +int Phv::addreg(gress_t gress, const char *name, const value_t &what, int stage, int max_stage) { + std::string phv_name = name; + remove_name_tail_range(phv_name); + if (stage == -1 && what.type == tMAP) { + int rv = 0; + for (auto &kv : what.map) { + auto &key = kv.key.type == tCMD && kv.key.vec.size > 1 && kv.key == "stage" ? kv.key[1] + : kv.key; + if (CHECKTYPE2(key, tINT, tRANGE)) { + if (key.type == tINT) + rv |= addreg(gress, name, kv.value, key.i); + else + rv |= addreg(gress, name, kv.value, key.range.lo, key.range.hi); + } + } + int size = -1; + PerStageInfo *prev = 0; + for (auto &ch : names[gress].at(name)) { + if (prev) { + if (prev->max_stage >= ch.first) { + if (prev->max_stage != INT_MAX) + error(what.lineno, "Overlapping assignments in stages %d..%d for %s", + ch.first, prev->max_stage, name); + prev->max_stage = ch.first - 1; + } + } + prev = &ch.second; + if (size < 0) { + size = ch.second.slice->size(); + } else if (size != ch.second.slice->size() && size > 0) { + error(what.lineno, "Inconsitent sizes for %s", name); + size = 0; + } + } + if (prev && prev->max_stage >= Target::NUM_MAU_STAGES()) prev->max_stage = INT_MAX; + add_phv_field_sizes(gress, phv_name, size); + return rv; + } + if (!CHECKTYPE2M(what, tSTR, tCMD, "register or slice")) return -1; + auto reg = what.type == tSTR ? what.s : what[0].s; + if (const Slice *sl = get(gress, stage, reg)) { + if (sl->valid) { + phv_use[gress][sl->reg.uid] = true; + user_defined[&sl->reg].first = gress; + if (max_stage != INT_MAX) { + /* a name that spans across stages - add it to all stages */ + for (int i = stage; i <= max_stage; i++) { + user_defined[&sl->reg].second[i].insert(name); + } + } else { + for (int i = 0; i <= Target::NUM_MAU_STAGES(); i++) { + user_defined[&sl->reg].second[i].insert(name); + } + } + LOG5(" Adding " << name << " to user_defined"); + } + auto ® = names[gress][name]; + if (what.type == tSTR) { + reg[stage].slice = *sl; + } else if (what.vec.size != 2) { + error(what.lineno, "Syntax error, expecting bit or slice"); + return -1; + } else if (!CHECKTYPE2M(what[1], tINT, tRANGE, "bit or slice")) { + return -1; + } else if (what[1].type == tINT) { + reg[stage].slice = Slice(*sl, what[1].i, what[1].i); + } else { + reg[stage].slice = Slice(*sl, what[1].range.lo, what[1].range.hi); + } + reg[stage].max_stage = max_stage; + if (!reg[stage].slice.valid) { + auto slice = reg[stage].slice; + error(what.lineno, "Invalid register slice - %s[%d:%d]", slice.reg.name, slice.hi, + slice.lo); + return -1; + } + if (stage == -1) { + add_phv_field_sizes(gress, phv_name, reg[stage].slice->size()); + if (is_pov(phv_name)) { + phv_pov_names[sl->reg.mau_id()][reg[stage].slice.lo] = phv_name; + } + } + return 0; + } else { + error(what.lineno, "No register named %s", reg); + return -1; + } +} + +void Phv::input(VECTOR(value_t) args, value_t data) { + if (!CHECKTYPE(data, tMAP)) return; + gress_t gress = + args[0] == "ingress" ? INGRESS + : args[0] == "egress" ? EGRESS + : args[0] == "ghost" && options.target >= JBAY + ? GHOST + : (error(args[1].lineno, "Invalid thread %s", value_desc(args[1])), INGRESS); + for (auto &kv : data.map) { + if (!CHECKTYPE(kv.key, tSTR)) continue; + if (kv.key == "context_json") { + if (!CHECKTYPE(kv.value, tMAP)) continue; + field_context_json.merge(*toJson(kv.value.map)); + } else { + if (get(gress, INT_MAX, kv.key.s) || (!args.size && get(EGRESS, INT_MAX, kv.key.s)) || + (!args.size && get(GHOST, INT_MAX, kv.key.s))) { + error(kv.key.lineno, "Duplicate phv name '%s'", kv.key.s); + continue; + } + if (!addreg(gress, kv.key.s, kv.value) && args.size == 0) { + addreg(EGRESS, kv.key.s, kv.value); + if (options.target >= JBAY) addreg(GHOST, kv.key.s, kv.value); + } + } + } +} + +Phv::Ref::Ref(gress_t g, int stage, const value_t &n) + : gress_(g), stage(stage), lo(-1), hi(-1), lineno(n.lineno) { + if (CHECKTYPE2M(n, tSTR, tCMD, "phv or register reference or slice")) { + if (n.type == tSTR) { + name_ = n.s; + } else { + name_ = n[0].s; + if (PCHECKTYPE2M(n.vec.size == 2, n[1], tINT, tRANGE, "register slice")) { + if (n[1].type == tINT) { + lo = hi = n[1].i; + } else { + lo = n[1].range.lo; + hi = n[1].range.hi; + if (lo > hi) { + lo = n[1].range.hi; + hi = n[1].range.lo; + } + } + } + } + } +} + +Phv::Ref::Ref(const Phv::Register &r, gress_t gr, int l, int h) + : gress_(gr), name_(r.name), stage(0), lo(l), hi(h < 0 ? l : h), lineno(-1) {} + +bool Phv::Ref::merge(const Phv::Ref &r) { + if (r.name_ != name_ || r.gress_ != gress_) return false; + if (lo < 0) return true; + if (r.lo < 0) { + lo = hi = -1; + return true; + } + if (r.hi + 1 < lo || hi + 1 < r.lo) return false; + if (r.lo < lo) lo = r.lo; + if (r.hi > hi) { + lineno = r.lineno; + hi = r.hi; + } + return true; +} + +void merge_phv_vec(std::vector &vec, const Phv::Ref &r) { + int merged = -1; + for (int i = 0; (unsigned)i < vec.size(); i++) { + if (merged >= 0) { + if (vec[merged].merge(vec[i])) { + vec.erase(vec.begin() + i); + --i; + } + } else if (vec[i].merge(r)) { + merged = i; + } + } + if (merged < 0) vec.push_back(r); +} + +void merge_phv_vec(std::vector &v1, const std::vector &v2) { + for (auto &r : v2) merge_phv_vec(v1, r); +} + +std::vector split_phv_bytes(const Phv::Ref &r) { + std::vector rv; + const auto &sl = *r; + for (unsigned byte = sl.lo / 8U; byte <= sl.hi / 8U; byte++) { + int lo = byte * 8 - sl.lo; + int hi = lo + 7; + if (lo < 0) lo = 0; + if (hi >= static_cast(sl.size())) hi = sl.size() - 1; + rv.emplace_back(r, lo, hi); + } + return rv; +} + +std::vector split_phv_bytes(const std::vector &v) { + std::vector rv; + for (auto &r : v) append(rv, split_phv_bytes(r)); + return rv; +} + +std::string Phv::Ref::toString() const { + std::stringstream str; + str << *this; + return str.str(); +} + +void Phv::Ref::dbprint(std::ostream &out) const { + out << name_; + if (lo >= 0) { + out << '[' << hi; + if (hi != lo) out << ":" << lo; + out << ']'; + } + Slice sl(**this); + if (sl.valid) { + out << '['; + sl.dbprint(out); + out << ']'; + } +} + +std::string Phv::Ref::desc() const { return toString(); } + +std::string Phv::Slice::toString() const { + std::stringstream str; + str << *this; + return str.str(); +} + +void Phv::Slice::dbprint(std::ostream &out) const { + if (valid) { + out << reg.name; + if (lo != 0 || hi != reg.size - 1) { + out << '[' << hi; + if (hi != lo) out << ":" << lo; + out << ']'; + } + } else { + out << ""; + } +} + +std::string Phv::db_regset(const bitvec &s) { + std::string rv; + for (int reg : s) { + if (!rv.empty()) rv += ", "; + rv += Phv::reg(reg)->name; + } + return rv; +} + +// For snapshot, the driver (generate pd script) generates a buffer of all phv +// fields and indexes through the buffer with a position offset to determine its +// location. It assumes the phv fields are arranged with the pov fields at the +// end. To maintain this ordering while generating the position offsets for each +// phv field, we initially generate 2 separate maps for normal and pov phv +// fields. We loop through the normap phv map first and then the pov phv map +// adding field sizes. The fields are byte aligned and put into 8/16/32 bit +// containers. +int Phv::get_position_offset(gress_t gress, std::string name) { + int position_offset = 0; + for (auto f : phv_field_sizes[gress]) { + if (f.first == name) return position_offset; + auto bytes_to_add = (f.second + 7) / 8U; + if (bytes_to_add == 3) bytes_to_add++; + position_offset += bytes_to_add; + } + for (auto f : phv_pov_field_sizes[gress]) { + if (f.first == name) return position_offset; + // POV should be single bit + BUG_CHECK(f.second == 1); + position_offset += 1; + } + return 0; +} + +// Output function sets the 'phv_allocation' node in context json Contains info +// on phv containers per gress (INGRESS/EGRESS) per stage Currently the phv +// containers are assumed to be present in all stages hence are replicated in +// each stage. Support for liveness indication for each container must be added +// (in assembly syntax/compiler) to set per stage phv containers correctly. +void Phv::output(json::map &ctxt_json) { + bool warn_once = false; + json::vector &phv_alloc = ctxt_json["phv_allocation"]; + for (int i = 0; i <= Target::NUM_MAU_STAGES(); i++) { + json::map phv_alloc_stage; + json::vector &phv_alloc_stage_ingress = phv_alloc_stage["ingress"] = json::vector(); + json::vector &phv_alloc_stage_egress = phv_alloc_stage["egress"] = json::vector(); + for (auto &slot : phv.user_defined) { + unsigned phv_number = slot.first->uid; + unsigned phv_container_size = slot.first->size; + gress_t gress = slot.second.first; + auto stage_usernames = slot.second.second[i]; + json::map phv_container; + phv_container["phv_number"] = phv_number; + phv_container["container_type"] = slot.first->type_to_string(); + json::vector &phv_records = phv_container["records"] = json::vector(); + for (auto field_name : stage_usernames) { + LOG5("Output phv record for field : " << field_name); + unsigned phv_lsb = 0, phv_msb = 0; + unsigned field_lo = 0; + int field_size = 0; + json::map phv_record; + auto sl = get(gress, i, field_name); + if (!sl) continue; + phv_lsb = sl->lo; + phv_msb = sl->hi; + field_lo = remove_name_tail_range(field_name, &field_size); + auto field_width = get_phv_field_size(gress, field_name); + if (field_size == 0) field_size = field_width; + phv_record["position_offset"] = get_position_offset(gress, field_name); + phv_record["field_name"] = field_name; + phv_record["field_msb"] = field_lo + field_size - 1; + phv_record["field_lsb"] = field_lo; + auto field_width_bytes = (field_width + 7) / 8U; + phv_record["field_width"] = field_width_bytes; + phv_record["phv_msb"] = phv_msb; + phv_record["phv_lsb"] = phv_lsb; + // FIXME-P4C: 'is_compiler_generated' is set to false for all + // fields except POV as there is no sure way of knowing from + // current assembly syntax whether the field is in the header or + // generated by the compiler. This will require additional + // assembly syntax to convey the same. Driver does not use + // is_compiler_generated (other than requiring it). p4i does + // use it for display purposes. + phv_record["is_compiler_generated"] = false; + phv_record["is_pov"] = false; + if (is_pov(field_name)) { + phv_record["is_pov"] = true; + phv_record["is_compiler_generated"] = true; + phv_record["field_width"] = 0; + phv_record["position_offset"] = 0; + /* Now that we know that this record is representing a POV, overwrite the + * phv_record to call it "POV" and get rid of "$valid" */ + phv_record["field_name"] = "POV"; + json::vector &pov_headers = phv_record["pov_headers"] = json::vector(); + json::map pov_header; + pov_header["bit_index"] = phv_lsb; + pov_header["position_offset"] = get_position_offset(gress, field_name); + pov_header["header_name"] = field_name; + // FIXME: Checks for reserved POV bits, not supported? + pov_header["hidden"] = false; + ; + pov_headers.push_back(std::move(pov_header)); + } + // Pass through per-field context_json information from the compiler. + if (field_context_json.count(slot.first->name)) { + auto add_phv_record_items = [&](int live_stage, std::string live_string) { + if (live_stage == -1) { + phv_record[live_string] = "parser"; + return; + } + if (live_stage == Target::NUM_MAU_STAGES()) { + phv_record[live_string] = "deparser"; + return; + } + phv_record[live_string] = live_stage; + }; + auto container_json = field_context_json[slot.first->name]; + BUG_CHECK(container_json); + bool field_added = false; + if (!container_json->as_vector()) { + // FIXME -- should be flexible about parsing context_json -- continue + // to accept a map instead of a vector here. + if (!warn_once) { + // FIXME -- would be nice to have the bfa lineno here. + warning(-1, "Invalid/obsolete phv context_json:, ignoring"); + warn_once = true; + } + continue; + } + for (auto &field_json : *container_json->as_vector()) { + auto live_start = -1, live_end = Target::NUM_MAU_STAGES(); + auto container_field_json = field_json->as_map(); + if (container_field_json->count("name")) { + if ((*container_field_json)["name"] != field_name) continue; + } else { + continue; + } + if (container_field_json->count("live_start")) { + auto live_start_json = (*container_field_json)["live_start"]; + if (auto n = live_start_json->as_number()) live_start = n->val; + } + if (container_field_json->count("live_end")) { + auto live_end_json = (*container_field_json)["live_end"]; + if (auto n = live_end_json->as_number()) live_end = n->val; + } + if (i >= live_start && i <= live_end) { + add_phv_record_items(live_start, "live_start"); + add_phv_record_items(live_end, "live_end"); + phv_record["mutually_exclusive_with"] = json::vector(); + if (container_field_json->count("mutually_exclusive_with")) { + auto mutex_json = + (*container_field_json)["mutually_exclusive_with"]; + if (json::vector *mutex_json_vec = mutex_json->as_vector()) + phv_record["mutually_exclusive_with"] = + std::move(*mutex_json_vec); + } + field_added = true; + // Skip duplicates + if (!std::any_of(phv_records.begin(), phv_records.end(), + [&phv_record](std::unique_ptr &r) { + return *r == phv_record; + })) + phv_records.push_back(phv_record.clone()); + } + } + if (!field_added) { + auto live_start = -1, live_end = Target::NUM_MAU_STAGES(); + add_phv_record_items(live_start, "live_start"); + add_phv_record_items(live_end, "live_end"); + phv_record["mutually_exclusive_with"] = json::vector(); + phv_records.push_back(phv_record.clone()); + } + } else { + phv_records.push_back(std::move(phv_record)); + } + } + phv_container["word_bit_width"] = phv_container_size; + // Ghost phv's are considered as ingress phv's + if (phv_records.size() > 0) { + if ((gress == INGRESS) || (gress == GHOST)) { + phv_alloc_stage_ingress.push_back(std::move(phv_container)); + } else if (gress == EGRESS) { + phv_alloc_stage_egress.push_back(std::move(phv_container)); + } + } + } + phv_alloc_stage["stage_number"] = i; + phv_alloc.push_back(std::move(phv_alloc_stage)); + } + // FIXME: Fix json clone method to do above loops more efficiently + // for (int i = 0; i < Target::NUM_MAU_STAGES(); i++) { + // phv_alloc_stage["stage_number"] = i; + // phv_alloc.push_back(std::move(phv_alloc_stage.clone())); } +} + +#include "jbay/phv.cpp" // NOLINT(build/include) +#include "tofino/phv.cpp" // NOLINT(build/include) diff --git a/backends/tofino/bf-asm/phv.h b/backends/tofino/bf-asm/phv.h new file mode 100644 index 00000000000..133a72365c6 --- /dev/null +++ b/backends/tofino/bf-asm/phv.h @@ -0,0 +1,327 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_PHV_H_ +#define BACKENDS_TOFINO_BF_ASM_PHV_H_ + +#include +#include + +#include "backends/tofino/bf-asm/json.h" +#include "backends/tofino/bf-asm/target.h" +#include "bfas.h" +#include "lib/bitvec.h" +#include "match_source.h" +#include "misc.h" +#include "sections.h" + +class Phv : public Section { + void start(int lineno, VECTOR(value_t) args) override; + void input(VECTOR(value_t) args, value_t data) override; + void output(json::map &) override; + Phv() : Section("phv") {} + Phv(const Phv &) = delete; + Phv &operator=(const Phv &) = delete; + ~Phv() {} + static Phv phv; // singleton class + Target::Phv *target = nullptr; + FOR_ALL_TARGETS(FRIEND_TARGET_CLASS, ::Phv) + + public: + struct Register { + char name[8]; + enum type_t { NORMAL, TAGALONG, CHECKSUM, MOCHA, DARK } type; + // uid is used for "phv_number" in the context.json, but otherwise is just + // a unique id for the register, encoded differently for different targets + unsigned short index = 0, uid = 0, size = 0; + Register() { type = NORMAL; } + Register(const Register &) = delete; + Register &operator=(const Register &) = delete; + Register(const char *n, type_t t, unsigned i, unsigned u, unsigned s) + : type(t), index(i), uid(u), size(s) { + strncpy(name, n, sizeof(name)); + name[7] = 0; + } + bool operator==(const Register &a) const { return uid == a.uid; } + bool operator!=(const Register &a) const { return uid != a.uid; } + bool operator<(const Register &a) const { return uid < a.uid; } + virtual int parser_id() const { return -1; } + virtual int mau_id() const { return -1; } + virtual int ixbar_id() const { return -1; } + virtual int deparser_id() const { return -1; } + /// return a string representation based on the container type + const char *type_to_string() const { + switch (type) { + case NORMAL: + return "normal"; + case TAGALONG: + return "tagalong"; + case CHECKSUM: + return "checksum"; + case MOCHA: + return "mocha"; + case DARK: + return "dark"; + } + return ""; + } + }; + class Slice : public IHasDbPrint { + static const Register invalid; + + public: + const Register ® + int lo = -1, hi = -1; + bool valid; + Slice() : reg(invalid), valid(false) {} + Slice(const Register &r, int l, int h) : reg(r), lo(l), hi(h) { + valid = lo >= 0 && hi >= lo && hi < reg.size; + } + Slice(const Register &r, int b) : reg(r), lo(b), hi(b) { + valid = lo >= 0 && hi >= lo && hi < reg.size; + } + Slice(const Slice &s, int l, int h) : reg(s.reg), lo(s.lo + l), hi(s.lo + h) { + valid = lo >= 0 && hi >= lo && hi <= s.hi && hi < reg.size; + } + Slice(const Slice &) = default; + explicit operator bool() const { return valid; } + Slice &operator=(const Slice &a) { + new (this) Slice(a.reg, a.lo, a.hi); + return *this; + } + const Slice *operator->() const { return this; } + bool operator==(const Slice &s) const { + return valid && s.valid && reg.uid == s.reg.uid && lo == s.lo && hi == s.hi; + } + bool operator<(const Slice &a) const { + if (reg.uid < a.reg.uid) return true; + if (reg.uid > a.reg.uid) return false; + if (lo < a.lo) return true; + if (lo > a.lo) return false; + return (hi < a.hi); + } + bool overlaps(const Slice &a) const { + return valid && a.valid && reg.uid == a.reg.uid && lo <= a.hi && a.lo <= hi; + } + unsigned size() const { return valid ? hi - lo + 1 : 0; } + std::string toString() const; + void dbprint(std::ostream &out) const; + }; + + protected: + // registers indexed according to MAU id + std::vector regs; + std::map> phv_pov_names; + struct PerStageInfo { + int max_stage = INT_MAX; + Slice slice; + }; + std::map> names[3]; + + private: + typedef std::map> user_stagenames_t; + std::map, ptrless> + user_defined; + bitvec phv_use[3]; + std::map phv_field_sizes[3]; + std::map phv_pov_field_sizes[3]; + + // Maps P4-level field names (i.e. returned by stack_asm_name_to_p4()) to a + // map to be embedded in the field's context_json "records" node. + json::map field_context_json; + + void init_phv(target_t); + bool is_pov(std::string name) { + // There are 2 types of POV bits we are interested in + // Either ending with .$valid or .$deparse... + return (name.find(".$valid") != std::string::npos || + name.find(".$deparse") != std::string::npos); + } + void gen_phv_field_size_map(); + int addreg(gress_t gress, const char *name, const value_t &what, int stage = -1, + int max_stage = INT_MAX); + int get_position_offset(gress_t gress, std::string name); + void add_phv_field_sizes(gress_t gress, std::string name, int size) { + auto &phv_field_map = is_pov(name) ? phv_pov_field_sizes : phv_field_sizes; + phv_field_map[gress][name] += size; + } + int get_phv_field_size(gress_t gress, std::string name) { + if (phv_field_sizes[gress].count(name) > 0) return phv_field_sizes[gress][name]; + if (phv_pov_field_sizes[gress].count(name) > 0) return phv_pov_field_sizes[gress][name]; + return 0; + } + + public: + static const Slice *get(gress_t gress, int stage, const std::string &name) { + phv.init_phv(options.target); + auto phvIt = phv.names[gress].find(name); + if (phvIt == phv.names[gress].end()) return 0; + auto &per_stage = phvIt->second; + auto it = per_stage.upper_bound(stage); + if (it == per_stage.begin()) { + if (it == per_stage.end() || stage != -1) return 0; + } else { + --it; + } + if (stage > it->second.max_stage) return 0; + return &it->second.slice; + } + static const Slice *get(gress_t gress, int stg, const char *name) { + return get(gress, stg, std::string(name)); + } + class Ref : public MatchSource { + protected: + gress_t gress_; + std::string name_; + int stage = -1; + int lo = -1, hi = -1; + + public: + int lineno; + Ref() : gress_(INGRESS), lineno(-1) {} + Ref(gress_t g, int stage, const value_t &n); + Ref(gress_t g, int stage, int line, const std::string &n, int l, int h) + : gress_(g), name_(n), stage(stage), lo(l), hi(h), lineno(line) {} + Ref(const Ref &r, int l, int h) + : gress_(r.gress_), + name_(r.name_), + stage(r.stage), + lo(r.lo < 0 ? l : r.lo + l), + hi(r.lo < 0 ? h : r.lo + h), + lineno(r.lineno) { + BUG_CHECK(r.hi < 0 || hi <= r.hi); + } + Ref(const Register &r, gress_t gr, int lo = -1, int hi = -1); + explicit operator bool() const { return lineno >= 0; } + Slice operator*() const { + if (auto *s = phv.get(gress_, stage, name_)) { + if (hi >= 0) return Slice(*s, lo, hi); + return *s; + } else { + error(lineno, "No phv record %s (%s, stage %d)", name_.c_str(), + gress_ == INGRESS ? "INGRESS" : "EGRESS", stage); + phv.get(gress_, stage, name_); + return Slice(); + } + } + bool operator<(const Ref &r) const { + return (**this).reg.parser_id() < (*r).reg.parser_id(); + } + Slice operator->() const { return **this; } + bool operator==(const Ref &a) const { + if (name_ == a.name_ && lo == a.lo && hi == a.hi) return true; + return **this == *a; + } + bool check(bool err = true) const { + if (auto *s = phv.get(gress_, stage, name_)) { + if (hi >= 0 && !Slice(*s, lo, hi).valid) { + error(lineno, "Invalid slice of %s", name_.c_str()); + return false; + } + return true; + } else if (lineno >= 0 && err) { + error(lineno, "No phv record %s", name_.c_str()); + } + return false; + } + gress_t gress() const { return gress_; } + const char *name() const override { return name_.c_str(); } + std::string desc() const; + int lobit() const { return lo < 0 ? 0 : lo; } + int hibit() const { return hi < 0 ? (**this).size() - 1 : hi; } + unsigned size() const override { + if (lo >= 0) return hi - lo + 1; + if (auto *s = phv.get(gress_, stage, name_)) return s->size(); + return 0; + } + bool merge(const Ref &r); + std::string toString() const override; + void dbprint(std::ostream &out) const; + + int get_lineno() const override { return lineno; } + int fieldlobit() const override { return lobit(); } + int fieldhibit() const override { return hibit(); } + int slicelobit() const override { return (**this).lo; } + int slicehibit() const override { return (**this).hi; } + }; + // Return register using mau_id as @arg index + static const Register *reg(int idx) { + BUG_CHECK(idx >= 0 && size_t(idx) < phv.regs.size()); + return phv.regs[idx]; + } + + static const Register *reg(std::string name) { + for (auto ® : phv.regs) + if (reg->name == name) return reg; + return nullptr; + } + + // Return the number registers + static int num_regs() { return phv.regs.size(); } + + // Return POV name allocated in @arg reg at @arg index + static const std::string get_pov_name(int reg, int index) { + if (phv.phv_pov_names.count(reg) && phv.phv_pov_names.at(reg).count(index)) + return phv.phv_pov_names[reg][index]; + return " "; + } + + static const bitvec &use(gress_t gress) { return phv.phv_use[gress]; } + static void setuse(gress_t gress, const bitvec &u) { phv.phv_use[gress] |= u; } + static void unsetuse(gress_t gress, const bitvec &u) { phv.phv_use[gress] -= u; } + static std::string db_regset(const bitvec &s); + static unsigned mau_groupsize(); + + // Return all field names in @arg reg at @arg stage + static const std::set &aliases(const Register *reg, int stage) { + static std::set empty; + if (!phv.user_defined.count(reg)) return empty; + auto &m = phv.user_defined.at(reg).second; + auto it = m.upper_bound(stage); + if (it == m.begin()) return empty; + return (--it)->second; + } + + // For use by gtests + static void test_clear() { + phv.target = nullptr; + phv.regs.clear(); + phv.phv_pov_names.clear(); + phv.names[INGRESS].clear(); + phv.names[EGRESS].clear(); + phv.names[GHOST].clear(); + } +}; + +extern void merge_phv_vec(std::vector &vec, const Phv::Ref &r); +extern void merge_phv_vec(std::vector &v1, const std::vector &v2); +extern std::vector split_phv_bytes(const Phv::Ref &r); +extern std::vector split_phv_bytes(const std::vector &v); + +class Target::Phv { + friend class ::Phv; + virtual void init_regs(::Phv &) = 0; + virtual target_t type() const = 0; + virtual unsigned mau_groupsize() const = 0; +}; + +inline unsigned Phv::mau_groupsize() { return phv.target->mau_groupsize(); } + +#include "jbay/phv.h" +#include "tofino/phv.h" + +#endif /* BACKENDS_TOFINO_BF_ASM_PHV_H_ */ diff --git a/backends/tofino/bf-asm/power_ctl.h b/backends/tofino/bf-asm/power_ctl.h new file mode 100644 index 00000000000..cbae87a075b --- /dev/null +++ b/backends/tofino/bf-asm/power_ctl.h @@ -0,0 +1,65 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_POWER_CTL_H_ +#define BACKENDS_TOFINO_BF_ASM_POWER_CTL_H_ + +#include "misc.h" + +/* power_ctl is weirdly encoded! + * As far as I can tell, it actually walks like this: + * -[1:0] dimension controls hi-lo for each 8/16/32b type. In other words, + * [0] = 8b[31~0], 16b[47~0], 32b[31~0] and [1] = 8b[63~32], 16b[95~48], 32[63~32]. + * -Within the wider dimension, [13:0] = 112b vector, where [31:0] = control for + * 32b section (array slice 3~0), [63:32] = control for 8b section (array slice 7~4), + * [111:64] = control for 16b section (array slice 13~8) + * + * Yes, Jay's decription of how the [1~0][13~0] translates to 224b is correct. + * The [1~0] index discriminates phv words going to the left side alu's [0] + * vs the right side ones [1]. Within each container size, the bottom 32 + * (or 48 for 16b) are on the left and the top half ones are on the right. + * Pat + * + * CSR DESCRIPTION IS WRONG!!! + */ + +template +void set_power_ctl_reg(checked_array<2, checked_array<16, ubits>> &power_ctl, int reg) { + int side = 0; + switch (reg / (I * 8)) { + case 1: // 8 bit + reg -= I * 8; + side = reg / (I * 4); + reg = (reg % (I * 4)) + (I * 4); + break; + case 2: + case 3: // 16 bit + reg -= I * 16; + side = reg / (I * 6); + reg = (reg % (I * 6)) + (I * 8); + break; + case 0: // 32 bit + side = reg / (I * 4); + reg = (reg % (I * 4)); + break; + default: + BUG(); + } + power_ctl[side][reg / I] |= 1U << reg % I; +} + +#endif /* BACKENDS_TOFINO_BF_ASM_POWER_CTL_H_ */ diff --git a/backends/tofino/bf-asm/primitives.cpp b/backends/tofino/bf-asm/primitives.cpp new file mode 100644 index 00000000000..d9ee4a885a7 --- /dev/null +++ b/backends/tofino/bf-asm/primitives.cpp @@ -0,0 +1,165 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include + +#include "backends/tofino/bf-asm/json.h" +#include "bfas.h" +#include "lib/log.h" +#include "sections.h" + +class Primitives : public Section { + int lineno = -1; + std::unique_ptr _primitives = nullptr; + std::string _primitivesFileName; + + Primitives() : Section("primitives") {} + + void input(VECTOR(value_t) args, value_t data) { + lineno = data.lineno; + if (!CHECKTYPE(data, tSTR)) return; + _primitivesFileName = data.s; + } + + void process() { + if (_primitivesFileName.empty()) return; + std::ifstream inputFile(_primitivesFileName); + if (!inputFile && _primitivesFileName[0] != '/') + inputFile.open(asmfile_dir + "/" + _primitivesFileName); + if (!inputFile) { + warning(lineno, "%s: can't read file", _primitivesFileName.c_str()); + } else { + inputFile >> _primitives; + if (!inputFile) { + warning(lineno, "%s: not valid primitives json representation", + _primitivesFileName.c_str()); + _primitives.reset(new json::map()); + } + } + } + + bool merge_actions(json::vector &_prim_actions, json::vector &ctxt_actions) { + bool merged = false; + for (auto &_prim_action : _prim_actions) { + for (auto &ctxt_action : ctxt_actions) { + if (*ctxt_action->to()["name"] == + *_prim_action->to()["name"]) { + ctxt_action->to().merge(_prim_action->to()); + merged = true; + auto aname = ctxt_action->to()["name"]->to(); + LOG3("Merged primitive action : " << aname); + break; + } + } + } + return merged; + } + + // If primitives json is present this function will merge the primitives + // nodes in the correct table->actions->action node The 'primitives' section + // is run last so we have already populated the context json tables at this + // stage. We check for the following tree structures to merge the action + // nodes + // Structure 1 (Match Tables) + // tables + // | + // |--> table0 + // |--> name + // |--> actions + // | + // |--> action0 + // | + // |--> name + // |--> primitives (merge here) + // Structure 2 (ALPM Tables) + // tables + // | + // |--> table0 + // |--> name + // |--> match_attributes + // | + // |--> pre_classifier + // | + // |--> actions + // | + // |--> action0 + // | + // |--> name + // |--> primitives (merge here) + // We can have multiple tables with the same name but one without + // and other with actions node e.g. stateful & its associated match table. + // In this case we want to merge with match table since it has the actions + // node + void output(json::map &ctxtJson) { + if (_primitives) { + json::vector &prim_tables = _primitives->to()["tables"]; + json::vector &ctxt_tables = ctxtJson["tables"]; + for (auto &prim_table : prim_tables) { + json::string prim_table_name = + prim_table->to()["name"]->to(); + bool is_merged = false; + json::string ctxt_table_name; + for (auto &ctxt_table : ctxt_tables) { + ctxt_table_name = ctxt_table->to()["name"]->to(); + if (prim_table_name == ctxt_table_name) { + if ((ctxt_table->to().count("actions") > 0) && + (prim_table->to().count("actions") > 0)) { + json::vector &prim_table_actions = + prim_table->to()["actions"]; + json::vector &ctxt_table_actions = + ctxt_table->to()["actions"]; + is_merged = merge_actions(prim_table_actions, ctxt_table_actions); + break; + } else if ((ctxt_table->to().count("match_attributes") > 0) && + (prim_table->to().count("match_attributes") > 0)) { + json::map &prim_table_ma = + prim_table->to()["match_attributes"]; + json::map &ctxt_table_ma = + ctxt_table->to()["match_attributes"]; + if ((ctxt_table_ma.to().count("pre_classifier") > 0) && + (prim_table_ma.to().count("pre_classifier") > 0)) { + json::map &prim_table_pc = + prim_table_ma.to()["pre_classifier"]; + json::map &ctxt_table_pc = + ctxt_table_ma.to()["pre_classifier"]; + if ((ctxt_table_pc.to().count("actions") > 0) && + (prim_table_pc.to().count("actions") > 0)) { + json::vector &prim_table_actions = + prim_table_pc.to()["actions"]; + json::vector &ctxt_table_actions = + ctxt_table_pc.to()["actions"]; + LOG3("Merging primitive actions on table: " << prim_table_name); + is_merged = + merge_actions(prim_table_actions, ctxt_table_actions); + break; + } + } + } + } + } + if (!is_merged) { + warning(lineno, "No table named %s found to merge primitive info", + prim_table_name.c_str()); + } + } + } + } + + static Primitives singleton_primitives; +} Primitives::singleton_primitives; diff --git a/backends/tofino/bf-asm/proxy_hash.cpp b/backends/tofino/bf-asm/proxy_hash.cpp new file mode 100644 index 00000000000..13395de2d5c --- /dev/null +++ b/backends/tofino/bf-asm/proxy_hash.cpp @@ -0,0 +1,188 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "input_xbar.h" + +void ProxyHashMatchTable::setup(VECTOR(pair_t) & data) { + common_init_setup(data, false, P4Table::MatchEntry); + for (auto &kv : MapIterChecked(data, {"meter", "stats", "stateful"})) { + if (common_setup(kv, data, P4Table::MatchEntry)) { + } else if (kv.key == "proxy_hash_group") { + if (CHECKTYPE(kv.value, tINT)) { + proxy_hash_group = kv.value.i; + } + } else if (kv.key == "proxy_hash_algorithm") { + if (CHECKTYPE(kv.value, tSTR)) { + proxy_hash_alg = kv.value.s; + } + } else if (kv.key == "search_bus" || kv.key == "result_bus") { + // already dealt with in Table::setup_layout via common_init_setup + } else { + common_sram_setup(kv, data); + } + } +} + +bool ProxyHashMatchTable::verify_match_key() { + for (auto &match_key : match) { + if (!dynamic_cast(match_key)) { + error(match_key->get_lineno(), "A proxy hash table %s has a non hash key", name()); + continue; + } + } + auto match_format = format->field("match"); + if (match_format && match.empty()) BUG_CHECK("Proxy hash table has no match"); + return error_count == 0; +} + +int ProxyHashMatchTable::determine_pre_byteswizzle_loc(MatchSource *ms, int lo, int hi, int word) { + return (ms->slicelobit() + lo) / 8; +} + +void ProxyHashMatchTable::pass1() { + LOG1("### Proxy Hash match table " << name() << " pass1 " << loc()); + SRamMatchTable::pass1(); +} + +void ProxyHashMatchTable::setup_ways() { + SRamMatchTable::setup_ways(); + for (auto &row : layout) { + int first_way = -1; + for (auto &unit : row.memunits) { + int way = way_map.at(unit).way; + if (first_way < 0) { + first_way = way; + } else if (ways[way].group_xme != ways[first_way].group_xme) { + error(row.lineno, + "Ways %d and %d of table %s share address bus on row %d, " + "but use different hash groups", + first_way, way, name(), row.row); + break; + } + } + } +} + +void ProxyHashMatchTable::setup_word_ixbar_group() { + word_ixbar_group.resize(match_in_word.size()); + for (size_t i = 0; i < match_in_word.size(); i++) { + // Basically the value per row/bus of rams.row.vh_xbar.exactmatch_row_vh_xbar_ctl, + // based on the diagram in uArch section 6.2.3 Exact Match Row Vertical/Horizontal (VH) + // Xbars + word_ixbar_group[i] = BYTE_XBAR_GROUPS + proxy_hash_group; + } +} + +void ProxyHashMatchTable::pass2() { + LOG1("### Proxy Hash match table " << name() << " pass2 " << loc()); + for (auto &ixb : input_xbar) ixb->pass2(); + setup_word_ixbar_group(); + + if (actions) actions->pass2(this); + if (gateway) gateway->pass2(); + if (idletime) idletime->pass2(); + if (format) format->pass2(this); + for (auto &hd : hash_dist) hd.pass2(this); +} + +void ProxyHashMatchTable::pass3() { + LOG1("### Proxy Hash match table " << name() << " pass3 " << loc()); +} + +template +void ProxyHashMatchTable::write_regs_vt(REGS ®s) { + LOG1("### Proxy Hash match table " << name() << " write_regs " << loc()); + SRamMatchTable::write_regs(regs); + + for (auto &row : layout) { + auto &rams_row = regs.rams.array.row[row.row]; + for (auto &unit : row.memunits) { + auto &way = way_map[unit]; + auto &ram = rams_row.ram[unit.col]; + ram.match_nibble_s0q1_enable = version_nibble_mask.getrange(way.word * 32U, 32); + ram.match_nibble_s1q0_enable = UINT64_C(0xffffffff); + } + } +} + +/** + * The purpose of this function is to add the proxy_hash_function cJSON node. This is used + * by the driver in order to build the match key for the proxy hash table. + * + * By using the group from the proxy hash table, only pull the relevant bits for the proxy + * hash lookup. + */ +void ProxyHashMatchTable::add_proxy_hash_function(json::map &stage_tbl) const { + bitvec hash_matrix_use; + for (auto *match_key : match) { + hash_matrix_use.setrange(match_key->fieldlobit(), match_key->size()); + } + + json::map &proxy_hash_function = stage_tbl["proxy_hash_function"] = json::map(); + json::vector &hash_bits = proxy_hash_function["hash_bits"] = json::vector(); + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + auto *hash_group = input_xbar[0]->get_hash_group(proxy_hash_group); + if (hash_group) { + for (unsigned id : bitvec(hash_group->tables)) { + auto hash_table = input_xbar[0]->get_hash_table(id); + gen_hash_bits(hash_table, InputXbar::HashTable(InputXbar::HashTable::EXACT, id), + hash_bits, proxy_hash_group, hash_matrix_use); + } + proxy_hash_function["hash_function_number"] = proxy_hash_group; + proxy_hash_function["ghost_bit_to_hash_bit"] = json::vector(); + proxy_hash_function["ghost_bit_info"] = json::vector(); + } +} + +void ProxyHashMatchTable::gen_tbl_cfg(json::vector &out) const { + unsigned size = get_number_entries(); + json::map &tbl = *base_tbl_cfg(out, "match", size); + json::map &stage_tbl = *add_common_sram_tbl_cfgs(tbl, "exact", "proxy_hash_match"); + stage_tbl["memory_resource_allocation"] = nullptr; + // FIXME: stash_allocation being null is a placeholder until implemented. + stage_tbl["stash_allocation"] = nullptr; + add_pack_format(stage_tbl, format.get(), true, false); + json::map &match_attributes = tbl["match_attributes"]; + match_attributes["uses_dynamic_key_masks"] = false; + if (ways.size() > 0) { + json::vector &way_stage_tables = stage_tbl["ways"] = json::vector(); + unsigned way_number = 0; + for (auto &way : ways) { + json::map way_tbl; + way_tbl["stage_number"] = stage->stageno; + way_tbl["way_number"] = way_number++; + way_tbl["stage_table_type"] = "hash_way"; + auto fmt_width = get_format_width(); + BUG_CHECK(fmt_width); + way_tbl["size"] = way.rams.size() / fmt_width * format->groups() * 1024; + add_pack_format(way_tbl, format.get(), false); + way_tbl["memory_resource_allocation"] = gen_memory_resource_allocation_tbl_cfg(way); + way_stage_tables.push_back(std::move(way_tbl)); + } + } + add_proxy_hash_function(stage_tbl); + stage_tbl["proxy_hash_algorithm"] = proxy_hash_alg; + int proxy_hash_width = 0; + for (auto m : match) { + proxy_hash_width += m->size(); + } + stage_tbl["proxy_hash_bit_width"] = proxy_hash_width; +} + +DEFINE_TABLE_TYPE(ProxyHashMatchTable) diff --git a/backends/tofino/bf-asm/reflow.cpp b/backends/tofino/bf-asm/reflow.cpp new file mode 100644 index 00000000000..83a7f25d9fb --- /dev/null +++ b/backends/tofino/bf-asm/reflow.cpp @@ -0,0 +1,113 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include + +void output_normal(std::ostream &out, std::vector &lines) { + for (auto &l : lines) out << l << '\n'; + lines.clear(); +} + +void strip_trail_ws(std::string &s) { + auto end = s.find_last_not_of(" \t\r\n"); + if (end != std::string::npos) s.resize(end + 1); +} +void strip_lead_ws(std::string &s) { + auto start = s.find_first_not_of(" \t\r\n"); + if (start != std::string::npos) s.erase(0, start); +} + +void output_1line(std::ostream &out, std::vector &lines) { + bool first = true; + for (auto &l : lines) { + if (first) { + strip_trail_ws(l); + first = false; + } else { + strip_trail_ws(l); + strip_lead_ws(l); + out << ' '; + } + out << l; + } + out << '\n'; + lines.clear(); +} + +size_t output_len(std::vector &lines) { + size_t rv = 0; + for (auto &l : lines) { + size_t len = l.find_last_not_of(" \t\r\n"), plen; + if (len == std::string::npos) len = l.size(); + if (rv == 0 && (plen = l.find_first_not_of(" \t\r\n")) != std::string::npos) + len -= plen - 1; + rv += len; + } + return rv; +} + +void reflow(std::istream &in, std::ostream &out) { + std::string line; + char looking = 0; + std::vector save; + const auto npos = std::string::npos; + while (getline(in, line)) { + if (line.find('{') != npos && line.find('}') == npos) { + output_normal(out, save); + looking = '}'; + save.push_back(line); + } else if (line.find('[') != npos && line.find(']') == npos) { + output_normal(out, save); + looking = ']'; + save.push_back(line); + } else if (looking) { + save.push_back(line); + if (line.find(looking) != std::string::npos) { + output_1line(out, save); + looking = 0; + } else if (output_len(save) > 100) { + output_normal(out, save); + looking = 0; + } + } else { + out << line << '\n'; + } + } + output_normal(out, save); + out << std::flush; +} + +int main(int ac, char **av) { + if (ac == 2) { + std::ifstream in(av[1]); + if (in) { + reflow(in, std::cout); + } else { + std::cerr << "Can't open " << av[1] << std::endl; + return 1; + } + } else if (ac == 1) { + reflow(std::cin, std::cout); + } else { + std::cerr << "usage: " << av[0] << " [file]" << std::endl; + return 1; + } + return 0; +} diff --git a/backends/tofino/bf-asm/register_reference.h b/backends/tofino/bf-asm/register_reference.h new file mode 100644 index 00000000000..12536d2dbbd --- /dev/null +++ b/backends/tofino/bf-asm/register_reference.h @@ -0,0 +1,111 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_REGISTER_REFERENCE_H_ +#define BACKENDS_TOFINO_BF_ASM_REGISTER_REFERENCE_H_ + +#include +#include + +#include "lib/log.h" + +/* used by `dump_unread` methods to hold a concatenation of string literals for printing. + * Allocated on the stack, the `pfx` chain prints the calling context */ +struct prefix { + const prefix *pfx; + const char *str; // should always be a string literal + prefix(const prefix *p, const char *s) : pfx(p), str(s) {} +}; + +inline std::ostream &operator<<(std::ostream &out, const prefix *p) { + if (p) { + if (p->pfx) out << p->pfx << '.'; + out << p->str; + } + return out; +} + +/* Class to link register trees together into a larger dag that will expand into a tree + * when dumped as binary (so trees that appear in mulitple places will be duplicated) + * 'name' is the json file name to use when dumping as cfg.json, and the name for logging + * 'tree' is the subtree to dump as binary at the appropriate offset + */ +template +class register_reference { + REG *tree = nullptr; + std::string name; + + public: + mutable bool read = false, write = false, disabled_ = false; + register_reference() {} + register_reference(const register_reference &) = default; + register_reference(register_reference &&) = default; + register_reference &operator=(const register_reference &) & = default; + register_reference &operator=(register_reference &&) & = default; + ~register_reference() {} + + register_reference &set(const char *a, REG *r) { + if (disabled_) LOG1("ERROR: Writing disabled register value in " << this); + if (write) LOG1("WARNING: Overwriting \"" << name << "\" with \"" << a << "\" in " << this); + name = a; + tree = r; + log(); + write = true; + return *this; + } + const char *c_str() const { return name.c_str(); } + REG *operator->() const { + read = true; + return tree; + } + explicit operator bool() const { return tree != nullptr; } + bool modified() const { return write; } + void set_modified(bool v = true) { write = v; } + void rewrite() { write = false; } + // friend std::ostream &operator<<(std::ostream &out, const register_reference &u); + void enable() { disabled_ = false; } + bool disabled() const { return disabled_; } + bool disable_if_unmodified() { return false; } + bool disable_if_zero() { return false; } + bool disable_if_reset_value() { return false; } + bool disable() { + if (!name.empty()) { + LOG1("ERROR: Disabling modified register in " << this); + return false; + } + tree = nullptr; + disabled_ = true; + return true; + } + void log() const { LOG1(this << " = \"" << name << "\""); } +}; + +template +inline std::ostream &operator<<(std::ostream &out, const register_reference *u) { + print_regname(out, u, u + 1); + return out; +} +template +inline std::ostream &operator<<(std::ostream &out, const register_reference &u) { + if (!*u.c_str()) + out << 0; + else + out << '"' << u.c_str() << '"'; + return out; +} + +#endif /* BACKENDS_TOFINO_BF_ASM_REGISTER_REFERENCE_H_ */ diff --git a/backends/tofino/bf-asm/rvalue_reference_wrapper.h b/backends/tofino/bf-asm/rvalue_reference_wrapper.h new file mode 100644 index 00000000000..a86e4e946e3 --- /dev/null +++ b/backends/tofino/bf-asm/rvalue_reference_wrapper.h @@ -0,0 +1,33 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_RVALUE_REFERENCE_WRAPPER_H_ +#define BACKENDS_TOFINO_BF_ASM_RVALUE_REFERENCE_WRAPPER_H_ + +template +class rvalue_reference_wrapper { + T *ref; + + public: + typedef T type; + rvalue_reference_wrapper(T &&r) : ref(&r) {} // NOLINT(runtime/explicit) + template + rvalue_reference_wrapper(U &&r) : ref(&r) {} // NOLINT(runtime/explicit) + T &&get() { return std::move(*ref); } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_RVALUE_REFERENCE_WRAPPER_H_ */ diff --git a/backends/tofino/bf-asm/salu_inst.cpp b/backends/tofino/bf-asm/salu_inst.cpp new file mode 100644 index 00000000000..a4e370fb2f8 --- /dev/null +++ b/backends/tofino/bf-asm/salu_inst.cpp @@ -0,0 +1,1056 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include + +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "instruction.h" +#include "lib/hex.h" +#include "phv.h" + +namespace StatefulAlu { + +struct operand : public IHasDbPrint { + struct Base : public IHasDbPrint { + int lineno; + explicit Base(int line) : lineno(line) {} + Base(const Base &a) : lineno(a.lineno) {} + virtual ~Base() {} + virtual Base *clone() const = 0; + virtual void dbprint(std::ostream &) const = 0; + virtual bool equiv(const Base *) const = 0; + virtual const char *kind() const = 0; + virtual Base *lookup(Base *&) { return this; } + virtual bool phvRead(std::function) { return false; } + virtual void pass1(StatefulTable *) {} + } *op; + struct Const : public Base { + int64_t value; + Const *clone() const override { return new Const(*this); } + Const(int line, int64_t v) : Base(line), value(v) {} + void dbprint(std::ostream &out) const override { out << value; } + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return value == a->value; + } else { + return false; + } + } + const char *kind() const override { return "constant"; } + }; + // Operand representing a constant stored in the register file + struct Regfile : public Base { + int index = -1; + Regfile *clone() const override { return new Regfile(*this); } + Regfile(int line, int index) : Base(line), index(index) {} + Regfile(int line, const value_t &n) : Base(line) { + if (PCHECKTYPE2M(n.vec.size == 2, n[1], tINT, tBIGINT, "SALU regfile row reference")) + index = get_int64(n[1], sizeof(index) / 8, "regfile row index out of bounds"); + } + void dbprint(std::ostream &out) const override { out << index; } + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return index == a->index; + } else { + return false; + } + } + const char *kind() const override { return "register file constant"; } + }; + struct Phv : public Base { + virtual Phv *clone() const = 0; + explicit Phv(int lineno) : Base(lineno) {} + virtual int phv_index(StatefulTable *tbl) = 0; + }; + struct PhvReg : public Phv { + ::Phv::Ref reg; + PhvReg *clone() const override { return new PhvReg(*this); } + PhvReg(gress_t gress, int stage, const value_t &v) : Phv(v.lineno), reg(gress, stage, v) {} + void dbprint(std::ostream &out) const override { out << reg; } + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return reg == a->reg; + } else { + return false; + } + } + const char *kind() const override { return "phv_reg"; } + void pass1(StatefulTable *tbl) override { + if (!reg.check()) return; + int size = tbl->format->begin()->second.size / 8; + if (tbl->input_xbar.empty()) { + error(lineno, "No input xbar for salu instruction operand for phv"); + return; + } + BUG_CHECK(tbl->input_xbar.size() == 1, "%s does not have one input xbar", tbl->name()); + int byte = tbl->find_on_ixbar(*reg, tbl->input_xbar[0]->match_group()); + int base = options.target == TOFINO ? 8 : 0; + if (byte < 0) + error(lineno, "Can't find %s on the input xbar", reg.name()); + else if (byte != base && byte != base + size) + error(lineno, "%s must be at %d or %d on ixbar to be used in stateful table %s", + reg.desc().c_str(), base * 8, (base + size) * 8, tbl->name()); + else if (int(reg->size()) > size * 8) + error(lineno, "%s is too big for stateful table %s", reg.desc().c_str(), + tbl->name()); + else + tbl->phv_byte_mask |= ((1U << (reg->size() + 7) / 8U) - 1) << (byte - base); + } + int phv_index(StatefulTable *tbl) override { + int base = options.target == TOFINO ? 8 : 0; + return tbl->find_on_ixbar(*reg, tbl->input_xbar[0]->match_group()) > base; + } + bool phvRead(std::function fn) override { + fn(*reg); + return true; + } + }; + // Operand which directly accesses phv(hi/lo) from Input Xbar + struct PhvRaw : public Phv { + int pi = -1; + unsigned mask = ~0U; + PhvRaw *clone() const override { return new PhvRaw(*this); } + PhvRaw(gress_t gress, const value_t &v) : Phv(v.lineno) { + if (v == "phv_lo") + pi = 0; + else if (v == "phv_hi") + pi = 1; + else + BUG(); + if (v.type == tCMD && PCHECKTYPE(v.vec.size == 2, v[1], tRANGE)) { + if ((v[1].range.lo & 7) || ((v[1].range.hi + 1) & 7)) + error(lineno, "only byte slices allowed on %s", v[0].s); + mask = (1U << (v[1].range.hi + 1) / 8U) - (1U << (v[1].range.lo / 8U)); + } + } + void dbprint(std::ostream &out) const override { out << (pi ? "phv_hi" : "phv_lo"); } + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return pi == a->pi; + } else { + return false; + } + } + const char *kind() const override { return "phv_ixb"; } + void pass1(StatefulTable *tbl) override { + int size = tbl->format->begin()->second.size / 8U; + if (mask == ~0U) + mask = (1U << size) - 1; + else if (mask & ~((1U << size) - 1)) + error(lineno, "slice out of range for %d byte value", size); + tbl->phv_byte_mask |= mask << (size * pi); + } + int phv_index(StatefulTable *tbl) override { return pi; } + bool phvRead(std::function) override { return true; } + }; + struct Memory : public Base { + Table *tbl; + Table::Format::Field *field; + Memory *clone() const override { return new Memory(*this); } + Memory(int line, Table *t, Table::Format::Field *f) : Base(line), tbl(t), field(f) {} + void dbprint(std::ostream &out) const override { out << tbl->format->find_field(field); } + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return field == a->field; + } else { + return false; + } + } + const char *kind() const override { return "memory"; } + }; + struct MathFn; + bool neg = false; + uint64_t mask = uint32_t(-1); + operand() : op(0) {} + operand(const operand &a) : op(a.op ? a.op->clone() : 0) {} + operand(operand &&a) : op(a.op) { a.op = 0; } + operand &operator=(const operand &a) { + if (&a != this) { + delete op; + op = a.op ? a.op->clone() : 0; + } + return *this; + } + operand &operator=(operand &&a) { + if (&a != this) { + delete op; + op = a.op; + a.op = 0; + } + return *this; + } + ~operand() { delete op; } + operand(Table *tbl, const Table::Actions::Action *act, const value_t &v, bool can_mask = false); + bool valid() const { return op != 0; } + explicit operator bool() const { return op != 0; } + bool operator==(operand &a) { + return op == a.op || (op && a.op && op->lookup(op)->equiv(a.op->lookup(a.op))); + } + bool phvRead(std::function fn) { + return op ? op->lookup(op)->phvRead(fn) : false; + } + void dbprint(std::ostream &out) const { + if (neg) out << '-'; + if (op) + op->dbprint(out); + else + out << "(null)"; + } + Base *operator->() { return op->lookup(op); } + template + T *to() { + return dynamic_cast(op); + } +}; + +struct operand::MathFn : public Base { + operand of; + MathFn *clone() const override { return new MathFn(*this); } + MathFn(int line, operand of) : Base(line), of(of) {} + void dbprint(std::ostream &out) const override { + out << "math(" << of << ")"; + ; + } + bool equiv(const Base *a_) const override { + if (auto *a = dynamic_cast(a_)) { + return of.op == a->of.op; + } else { + return false; + } + } + const char *kind() const override { return "math fn"; } + bool phvRead(std::function fn) { return of->phvRead(fn); } + void pass1(StatefulTable *tbl) override { of->pass1(tbl); } +}; + +operand::operand(Table *tbl, const Table::Actions::Action *act, const value_t &v_, bool can_mask) + : op(nullptr) { + const value_t *v = &v_; + if (options.target == TOFINO) can_mask = false; + if (can_mask && v->type == tCMD && *v == "&" && v->vec.size == 3) { + if (v->vec[2].type == tINT || v->vec[2].type == tBIGINT) { + mask = get_int64(v->vec[2], 64, "mask too large"); + v = &v->vec[1]; + } else if (v->vec[1].type == tINT || v->vec[1].type == tBIGINT) { + mask = get_int64(v->vec[1], 64, "mask too large"); + v = &v->vec[2]; + } else { + error(v->lineno, "mask must be a constant"); + } + } + if (v->type == tCMD && *v == "-") { + neg = true; + v = &v->vec[1]; + } + if (v->type == tINT || v->type == tBIGINT) { + auto i = get_int64(*v, 64, "Integer too large"); + op = new Const(v->lineno, i); + return; + } + if (v->type == tCMD && *v == "register_param") { + op = new Regfile(v->lineno, *v); + return; + } + if (v->type == tSTR) { + if (auto f = tbl->format->field(v->s)) { + op = new Memory(v->lineno, tbl, f); + return; + } + } + if (v->type == tCMD) { + BUG_CHECK(v->vec.size > 0 && v->vec[0].type == tSTR); + if (auto f = tbl->format->field(v->vec[0].s)) { + if (v->vec.size > 1 && CHECKTYPE(v->vec[1], tRANGE) && v->vec[1].range.lo != 0) + error(v->vec[1].lineno, "Can't slice memory field %s in stateful action", + v->vec[0].s); + op = new Memory(v->lineno, tbl, f); + return; + } + } + if ((v->type == tCMD) && (v->vec[0] == "math_table")) { + // operand *opP = new operand(tbl, act, v->vec[1]); + op = new MathFn(v->lineno, operand(tbl, act, v->vec[1])); + return; + } + if (*v == "phv_lo" || *v == "phv_hi") { + op = new PhvRaw(tbl->gress, *v); + return; + } + if (::Phv::Ref(tbl->gress, tbl->stage->stageno, *v).check(false)) + op = new PhvReg(tbl->gress, tbl->stage->stageno, *v); +} + +enum salu_slot_use { + CMP0, + CMP1, + CMP2, + CMP3, + ALU2LO, + ALU1LO, + ALU2HI, + ALU1HI, + ALUOUT0, + ALUOUT1, + ALUOUT2, + ALUOUT3, + MINMAX, + // aliases + CMPLO = CMP0, + CMPHI = CMP1, + ALUOUT = ALUOUT0, +}; + +// Abstract interface class for SALU Instructions +// SALU Instructions - AluOP, BitOP, CmpOP, OutOP +struct SaluInstruction : public Instruction { + explicit SaluInstruction(int lineno) : Instruction(lineno) {} + // Stateful ALU's dont access PHV's directly + static int decode_predicate(const value_t &exp); +}; + +int SaluInstruction::decode_predicate(const value_t &exp) { + if (exp == "cmplo") return Target::STATEFUL_PRED_MASK() & STATEFUL_PREDICATION_ENCODE_CMPLO; + if (exp == "cmphi") return Target::STATEFUL_PRED_MASK() & STATEFUL_PREDICATION_ENCODE_CMPHI; + if (exp == "cmp0") return Target::STATEFUL_PRED_MASK() & STATEFUL_PREDICATION_ENCODE_CMP0; + if (Target::STATEFUL_CMP_UNITS() > 1 && exp == "cmp1") + return Target::STATEFUL_PRED_MASK() & STATEFUL_PREDICATION_ENCODE_CMP1; + if (Target::STATEFUL_CMP_UNITS() > 2 && exp == "cmp2") + return Target::STATEFUL_PRED_MASK() & STATEFUL_PREDICATION_ENCODE_CMP2; + if (Target::STATEFUL_CMP_UNITS() > 3 && exp == "cmp3") + return Target::STATEFUL_PRED_MASK() & STATEFUL_PREDICATION_ENCODE_CMP3; + if (exp == "!") return Target::STATEFUL_PRED_MASK() ^ decode_predicate(exp[1]); + if (exp == "&") { + auto rv = decode_predicate(exp[1]); + for (int i = 2; i < exp.vec.size; ++i) rv &= decode_predicate(exp[i]); + return rv; + } + if (exp == "|") { + auto rv = decode_predicate(exp[1]); + for (int i = 2; i < exp.vec.size; ++i) rv |= decode_predicate(exp[i]); + return rv; + } + if (exp == "^") { + auto rv = decode_predicate(exp[1]); + for (int i = 2; i < exp.vec.size; ++i) rv ^= decode_predicate(exp[i]); + return rv; + } + if (exp.type == tINT && exp.i >= 0 && exp.i <= Target::STATEFUL_PRED_MASK()) return exp.i; + error(exp.lineno, "Unexpected expression %s in predicate", value_desc(&exp)); + return -1; +} + +struct AluOP : public SaluInstruction { + const struct Decode : public Instruction::Decode { + std::string name; + unsigned opcode; + enum operands_t { NONE, A, B, AandB } operands = AandB; + const Decode *swap_args; + Decode(const char *n, int opc, bool assoc = false, const char *alias_name = 0) + : Instruction::Decode(n, STATEFUL_ALU), + name(n), + opcode(opc), + swap_args(assoc ? this : 0) { + if (alias_name) alias(alias_name, STATEFUL_ALU); + } + Decode(const char *n, int opc, Decode *sw, const char *alias_name = 0, + operands_t use = AandB) + : Instruction::Decode(n, STATEFUL_ALU), + name(n), + opcode(opc), + operands(use), + swap_args(sw) { + if (sw && !sw->swap_args) sw->swap_args = this; + if (alias_name) alias(alias_name, STATEFUL_ALU); + } + Decode(const char *n, int opc, const char *alias_name) + : Instruction::Decode(n, STATEFUL_ALU), name(n), opcode(opc), swap_args(0) { + if (alias_name) alias(alias_name, STATEFUL_ALU); + } + Decode(const char *n, int opc, bool assoc, operands_t use) + : Instruction::Decode(n, STATEFUL_ALU), + name(n), + opcode(opc), + operands(use), + swap_args(assoc ? this : 0) {} + Decode(const char *n, int opc, const char *alias_name, operands_t use) + : Instruction::Decode(n, STATEFUL_ALU), + name(n), + opcode(opc), + operands(use), + swap_args(0) { + if (alias_name) alias(alias_name, STATEFUL_ALU); + } + Decode(const char *n, int opc, Decode *sw, operands_t use) + : Instruction::Decode(n, STATEFUL_ALU), + name(n), + opcode(opc), + operands(use), + swap_args(sw) { + if (sw && !sw->swap_args) sw->swap_args = this; + } + Decode(const char *n, target_t targ, int opc) + : Instruction::Decode(n, targ, STATEFUL_ALU), name(n), opcode(opc), swap_args(0) {} + + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + } *opc; + int predication_encode = STATEFUL_PREDICATION_ENCODE_UNCOND; + enum dest_t { LO, HI }; + dest_t dest = LO; + operand srca, srcb; + AluOP(const Decode *op, int l) : SaluInstruction(l), opc(op) {} + std::string name() override { return opc->name; }; + Instruction *pass1(Table *tbl, Table::Actions::Action *) override; + void pass2(Table *tbl, Table::Actions::Action *) override {} + bool salu_alu() const override { return true; } + bool equiv(Instruction *a_) override; + bool phvRead(std::function fn) override { + return srca.phvRead(fn) | srcb.phvRead(fn); + } + void dbprint(std::ostream &out) const override { + out << "INSTR: " << opc->name << " pred=0x" << hex(predication_encode) << " " + << (dest ? "hi" : "lo") << ", " << srca << ", " << srcb; + } + template + void write_regs(REGS ®s, Table *tbl, Table::Actions::Action *act); + FOR_ALL_REGISTER_SETS(DECLARE_FORWARD_VIRTUAL_INSTRUCTION_WRITE_REGS) +}; + +static AluOP::Decode opADD("add", 0x1c, true), opSUB("sub", 0x1e), opSADDU("saddu", 0x10, true), + opSADDS("sadds", 0x11, true), opSSUBU("ssubu", 0x12), opSSUBS("ssubs", 0x13), + opMINU("minu", 0x14, true), opMINS("mins", 0x15, true), opMAXU("maxu", 0x16, true), + opMAXS("maxs", 0x17, true), opNOP("nop", 0x18, true, AluOP::Decode::NONE), + opSUBR("subr", 0x1f, &opSUB), opSSUBRU("ssubru", 0x1a, &opSSUBU), + opSSUBRS("ssubrs", 0x1b, &opSSUBS), + + opSETZ("setz", 0x00, true, AluOP::Decode::NONE), opNOR("nor", 0x01, true), + opANDCA("andca", 0x02), opNOTA("nota", 0x03, "not", AluOP::Decode::A), + opANDCB("andcb", 0x04, &opANDCA), opNOTB("notb", 0x05, &opNOTA, AluOP::Decode::B), + opXOR("xor", 0x06, true), opNAND("nand", 0x07, true), opAND("and", 0x08, true), + opXNOR("xnor", 0x09, true), opB("alu_b", 0x0a, "b", AluOP::Decode::B), opORCA("orca", 0x0b), + opA("alu_a", 0x0c, &opB, "a", AluOP::Decode::A), opORCB("orcb", 0x0d, &opORCA), + opOR("or", 0x0e, true), opSETHI("sethi", 0x0f, true, AluOP::Decode::NONE); + +Instruction *AluOP::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + AluOP *rv = new AluOP(this, op[0].lineno); + auto operands = this->operands; + int idx = 1; + // Check optional predicate operand + if (idx < op.size) { + if (op[idx].type == tINT) { + // Predicate is an integer. no warning for odd values + rv->predication_encode = op[idx++].i; + } else if (op[idx].startsWith("cmp") || op[idx] == "!" || op[idx] == "&" || + op[idx] == "|" || op[idx] == "^") { + // Predicate is an expression + rv->predication_encode = decode_predicate(op[idx++]); + if (rv->predication_encode == STATEFUL_PREDICATION_ENCODE_NOOP) + warning(op[idx - 1].lineno, "Instruction predicate is always false"); + else if (rv->predication_encode == STATEFUL_PREDICATION_ENCODE_UNCOND) + warning(op[idx - 1].lineno, "Instruction predicate is always true"); + } + } + if (idx < op.size && op[idx] == "lo") { + rv->dest = LO; + idx++; + } else if (idx < op.size && op[idx] == "hi") { + rv->dest = HI; + idx++; + } else if (idx == op.size && name == "nop") { + // allow nop without even a destination -- assume lo + rv->dest = LO; + } else { + error(rv->lineno, "invalid destination for %s instruction", op[0].s); + } + if (operands == NONE) { + if (idx < op.size) error(rv->lineno, "too many operands for %s instruction", op[0].s); + return rv; + } + if (idx < op.size && operands != B) rv->srca = operand(tbl, act, op[idx++]); + if (idx < op.size && operands != A) rv->srcb = operand(tbl, act, op[idx++]); + if (swap_args && (rv->srca.to() || rv->srca.to() || + (rv->srcb.to() && + (rv->srca.to() || rv->srca.to())))) { + operands = (rv->opc = swap_args)->operands; + std::swap(rv->srca, rv->srcb); + } + if (idx < op.size) + error(rv->lineno, "too many operands for %s instruction", op[0].s); + else if ((!rv->srca && operands != B) || (!rv->srcb && operands != A)) + error(rv->lineno, "not enough operands for %s instruction", op[0].s); + if (auto mf = rv->srca.to()) { + error(rv->lineno, "Can't reference math table in %soperand of %s instruction", + operands != A ? "first " : "", op[0].s); + if (!mf->of.to() && !mf->of.to()) + error(rv->lineno, "Math table input must come from Phv or memory"); + } + if (rv->srca.to()) + error(rv->lineno, "Can't reference phv in %soperand of %s instruction", + operands != A ? "first " : "", op[0].s); + if (rv->srcb.to()) + error(rv->lineno, "Can't reference memory in %soperand of %s instruction", + operands != A ? "first " : "", op[0].s); + if (auto mf = rv->srcb.to()) { + rv->slot = ALU2LO; + if (rv->dest != LO) error(rv->lineno, "Can't reference math table in alu-hi"); + if (!mf->of.to() && !mf->of.to()) + error(rv->lineno, "Math table input must come from Phv or memory"); + } + if (rv->srca.neg) { + if (auto k = rv->srca.to()) + k->value = -k->value; + else + error(rv->lineno, "Can't negate operand of %s instruction", op[0].s); + } + if (rv->srcb.neg) { + if (auto k = rv->srcb.to()) + k->value = -k->value; + else + error(rv->lineno, "Can't negate operand of %s instruction", op[0].s); + } + return rv; +} + +bool AluOP::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) + return opc == a->opc && predication_encode == a->predication_encode && dest == a->dest && + srca == a->srca && srcb == a->srcb; + return false; +} + +Instruction *AluOP::pass1(Table *tbl_, Table::Actions::Action *act) { + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + if (slot < 0 && act->slot_use[slot = (dest ? ALU1HI : ALU1LO)]) slot = dest ? ALU2HI : ALU2LO; + auto k1 = srca.to(); + auto k2 = srcb.to(); + // Check cases when both constants would be stored in the register file on different rows + // Two constants that do not fit as immediate constants + if (k1 && k2 && !k1->equiv(k2)) + error(lineno, "can only have one constant in an SALU instruction"); + if (!k1) k1 = k2; + if (k1 && (k1->value < Target::STATEFUL_ALU_CONST_MIN() || + k1->value > Target::STATEFUL_ALU_CONST_MAX())) { + if (k1->value >= (INT64_C(1) << tbl->alu_size()) || + k1->value < (INT64_C(~0u) << (tbl->alu_size() - 1))) { + error(lineno, + "value %" PRIi64 + " of the constant operand" + " out of range for %d bit stateful ALU", + k1->value, tbl->alu_size()); + } else if (k1->value >= (INT64_C(1) << (Target::STATEFUL_REGFILE_CONST_WIDTH() - 1))) { + // constants have a limited width, and are always signed, so need to make + // sure they wrap properly + k1->value -= INT64_C(1) << Target::STATEFUL_REGFILE_CONST_WIDTH(); + if (k2 && k2 != k1) k2->value = k1->value; + } + } + auto r1 = srca.to(); + auto r2 = srcb.to(); + if (r1 && r2 && !r1->equiv(r2)) + error(lineno, "can only have one register file reference in an SALU instruction"); + if (!r1) r1 = r2; + if (r1) { + int64_t v1 = tbl->get_const_val(r1->index); + if (v1 >= (INT64_C(1) << tbl->alu_size()) || v1 < (INT64_C(~0u) << (tbl->alu_size() - 1))) { + error(lineno, + "initial value %" PRIi64 + " of the register file operand" + " out of range for %d bit stateful ALU", + v1, tbl->alu_size()); + } + } + if (k1 && r1) + error(lineno, + "can have either a constant or a register file reference" + " in an SALU instruction"); + if (srca) srca->pass1(tbl); + if (srcb) srcb->pass1(tbl); + return this; +} + +Instruction *genNoop(StatefulTable *tbl, Table::Actions::Action *act) { + VECTOR(value_t) args = EMPTY_VECTOR_INIT; + BUG_CHECK(tbl->format->begin() != tbl->format->end(), "No tbl->format!"); + args.add("or").add("lo").add(0).add(tbl->format->begin()->first.c_str()); + auto *rv = Instruction::decode(tbl, act, args); + VECTOR_fini(args); + return rv; +} + +struct BitOP : public SaluInstruction { + const struct Decode : public Instruction::Decode { + std::string name; + unsigned opcode; + Decode(const char *n, unsigned opc) + : Instruction::Decode(n, STATEFUL_ALU), name(n), opcode(opc) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + } *opc; + int predication_encode = STATEFUL_PREDICATION_ENCODE_UNCOND; + BitOP(const Decode *op, int lineno) : SaluInstruction(lineno), opc(op) {} + std::string name() override { return opc->name; }; + Instruction *pass1(Table *, Table::Actions::Action *) override { + slot = ALU1LO; + return this; + } + void pass2(Table *, Table::Actions::Action *) override {} + bool salu_alu() const override { return true; } + bool equiv(Instruction *a_) override; + bool phvRead(std::function fn) override { return false; } + void dbprint(std::ostream &out) const override { out << "INSTR: " << opc->name; } + template + void write_regs(REGS ®s, Table *tbl, Table::Actions::Action *act); + FOR_ALL_REGISTER_SETS(DECLARE_FORWARD_VIRTUAL_INSTRUCTION_WRITE_REGS) +}; + +static BitOP::Decode opSET_BIT("set_bit", 0x0), opSET_BITC("set_bitc", 0x1), + opCLR_BIT("clr_bit", 0x2), opCLR_BITC("clr_bitc", 0x3), opREAD_BIT("read_bit", 0x4), + opREAD_BITC("read_bitc", 0x5), opSET_BIT_AT("set_bit_at", 0x6), + opSET_BITC_AT("set_bitc_at", 0x7), opCLR_BIT_AT("clr_bit_at", 0x8), + opCLR_BITC_AT("clr_bitc_at", 0x9); + +Instruction *BitOP::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + BitOP *rv = new BitOP(this, op[0].lineno); + if (op.size > 1) error(rv->lineno, "too many operands for %s instruction", op[0].s); + return rv; +} + +bool BitOP::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) return opc == a->opc; + return false; +} + +struct CmpOP : public SaluInstruction { + const struct Decode : public Instruction::Decode { + std::string name; + unsigned opcode; + Decode(const char *n, unsigned opc, bool type) + : Instruction::Decode(n, STATEFUL_ALU, type), name(n), opcode(opc) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + } *opc; + int type = 0; + operand::Memory *srca = 0; + uint32_t maska = 0xffffffffU; + operand::Phv *srcb = 0; + uint32_t maskb = 0xffffffffU; + operand::Base *srcc = 0; // operand::Const or operand::Regfile + bool srca_neg = false, srcb_neg = false; + bool learn = false, learn_not = false; + CmpOP(const Decode *op, int lineno) : SaluInstruction(lineno), opc(op) {} + std::string name() override { return opc->name; }; + Instruction *pass1(Table *tbl, Table::Actions::Action *) override; + void pass2(Table *tbl, Table::Actions::Action *) override {} + bool equiv(Instruction *a_) override; + bool phvRead(std::function fn) override { + bool rv = false; + if (srca) rv |= srca->phvRead(fn); + if (srcb) rv |= srcb->phvRead(fn); + if (srcc) rv |= srcc->phvRead(fn); + return rv; + } + void dbprint(std::ostream &out) const override { + out << "INSTR: " << opc->name << " cmp" << slot; + if (srca) { + out << ", " << (srca_neg ? "-" : "") << *srca; + if (maska != 0xffffffffU) out << " & 0x" << hex(maska); + } + if (srcb) { + out << ", " << (srcb_neg ? "-" : "") << *srcb; + if (maskb != 0xffffffffU) out << " & 0x" << hex(maskb); + } + if (srcc) out << ", " << *srcc; + if (learn) out << ", learn"; + if (learn_not) out << ", learn_not"; + } + template + void write_regs(REGS ®s, Table *tbl, Table::Actions::Action *act); + FOR_ALL_REGISTER_SETS(DECLARE_FORWARD_VIRTUAL_INSTRUCTION_WRITE_REGS) +}; + +static CmpOP::Decode opEQU("equ", 0, false), opNEQ("neq", 1, false), opGRT("grt", 0, true), + opLEQ("leq", 1, true), opGEQ("geq", 2, true), opLSS("lss", 3, true); + +Instruction *CmpOP::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + auto rv = new CmpOP(this, op[0].lineno); + if (auto *p = strchr(op[0].s, '.')) { + if (type_suffix && !strcmp(p, ".s")) + rv->type = 1; + else if (type_suffix && !strcmp(p, ".u")) + rv->type = 2; + else if (type_suffix && !strcmp(p, ".uus")) + rv->type = 3; + else + error(rv->lineno, "Invalid type %s for %s instruction", p + 1, name.c_str()); + } else if (type_suffix) { + error(rv->lineno, "Missing type for %s instruction", name.c_str()); + } + if (op.size < 1 || op[1].type != tSTR) { + error(rv->lineno, "invalid destination for %s instruction", op[0].s); + return rv; + } + unsigned unit; + int len; + if (op[1] == "lo") { + rv->slot = CMPLO; + } else if (op[1] == "hi") { + rv->slot = CMPHI; + } else if ((sscanf(op[1].s, "p%u%n", &unit, &len) >= 1 || + sscanf(op[1].s, "cmp%u%n", &unit, &len) >= 1) && + unit < Target::STATEFUL_CMP_UNITS() && op[1].s[len] == 0) { + rv->slot = CMP0 + unit; + } else { + error(rv->lineno, "invalid destination for %s instruction", op[0].s); + } + for (int idx = 2; idx < op.size; ++idx) { + if (!rv->learn) { + if (op[idx] == "learn") { + rv->learn = true; + continue; + } + if (op[idx] == "!" && op[idx].type == tCMD && op[idx].vec.size == 2 && + op[idx][1] == "learn") { + rv->learn = rv->learn_not = true; + continue; + } + } + operand src(tbl, act, op[idx], true); + if (!rv->srca && (rv->srca = src.to())) { + rv->srca_neg = src.neg; + rv->maska = src.mask; + src.op = nullptr; + } else if (!rv->srcb && (rv->srcb = src.to())) { + rv->srcb_neg = src.neg; + rv->maskb = src.mask; + src.op = nullptr; + } else if (!rv->srcc && (rv->srcc = src.to())) { + auto *srcc = src.to(); + if (src.neg) srcc->value = -srcc->value; + if (src.mask != ~0U) srcc->value &= src.mask; + src.op = nullptr; + } else if (!rv->srcc && (rv->srcc = src.to())) { + if (src.neg || src.mask != ~0U) + error(src->lineno, "Register file operand cannot be negated or masked"); + src.op = nullptr; + } else if (src) { + error(src->lineno, "Can't have more than one %s operand to an SALU compare", + src->kind()); + } + } + return rv; +} + +bool CmpOP::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) + return opc == a->opc && slot == a->slot && srca == a->srca && maska == a->maska && + srcb == a->srcb && maskb == a->maskb && srcc == a->srcc && learn == a->learn && + learn_not == a->learn_not; + return false; +} + +Instruction *CmpOP::pass1(Table *tbl_, Table::Actions::Action *act) { + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + if (srca) srca->pass1(tbl); + if (srcb) srcb->pass1(tbl); + if (srcc) srcc->pass1(tbl); + return this; +} + +struct TMatchOP : public SaluInstruction { + const struct Decode : public Instruction::Decode { + std::string name; + Decode(const char *n, target_t target) + : Instruction::Decode(n, target, STATEFUL_ALU), name(n) {} + Decode(const char *n, std::set target) + : Instruction::Decode(n, target, STATEFUL_ALU), name(n) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + } *opc; + operand::Memory *srca = 0; + uint64_t mask = 0; + operand::Phv *srcb = 0; + bool learn = false, learn_not = false; + TMatchOP(const Decode *op, int lineno) : SaluInstruction(lineno), opc(op) {} + std::string name() override { return opc->name; }; + Instruction *pass1(Table *tbl, Table::Actions::Action *) override; + void pass2(Table *tbl, Table::Actions::Action *) override {} + bool equiv(Instruction *a_) override; + bool phvRead(std::function fn) override { + return srcb ? srcb->phvRead(fn) : false; + } + void dbprint(std::ostream &out) const override { + out << "INSTR: " << opc->name << " cmp" << slot; + if (srca) out << ", " << *srca; + if (mask) out << ", 0x" << hex(mask); + if (srcb) out << ", " << *srcb; + if (learn) out << ", learn"; + if (learn_not) out << ", learn_not"; + } + template + void write_regs(REGS ®s, Table *tbl, Table::Actions::Action *act); + FOR_ALL_REGISTER_SETS(DECLARE_FORWARD_VIRTUAL_INSTRUCTION_WRITE_REGS) +}; + +static TMatchOP::Decode opTMatch("tmatch", { + JBAY, + }); + +Instruction *TMatchOP::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + auto rv = new TMatchOP(this, op[0].lineno); + if (op.size < 1 || op[1].type != tSTR) { + error(rv->lineno, "invalid destination for %s instruction", op[0].s); + return rv; + } + unsigned unit; + int len; + if ((sscanf(op[1].s, "p%u%n", &unit, &len) >= 1 || + sscanf(op[1].s, "cmp%u%n", &unit, &len) >= 1) && + unit < Target::STATEFUL_TMATCH_UNITS() && op[1].s[len] == 0) { + rv->slot = CMP0 + unit; + } else { + error(rv->lineno, "invalid destination for %s instruction", op[0].s); + } + for (int idx = 2; idx < op.size; ++idx) { + if (!rv->learn) { + if (op[idx] == "learn") { + rv->learn = true; + continue; + } + if (op[idx] == "!" && op[idx].type == tCMD && op[idx].vec.size == 2 && + op[idx][1] == "learn") { + rv->learn = rv->learn_not = true; + continue; + } + } + if (op[idx].type == tINT || op[idx].type == tBIGINT) { + if (rv->mask) + error(op[idx].lineno, "Can't have more than one mask operand to an SALU tmatch"); + rv->mask = get_int64(op[idx], 64, "Integer too large"); + } else if (op[idx].type == tSTR) { + if (auto f = tbl->format->field(op[idx].s)) { + if (rv->srca) { + error(op[idx].lineno, + "Can't have more than one memory operand to an " + "SALU tmatch"); + delete rv->srca; + } + rv->srca = new operand::Memory(op[idx].lineno, tbl, f); + } else if (rv->srcb) { + error(op[idx].lineno, "Can't have more than one phv operand to an SALU tmatch"); + } else if (op[idx] == "phv_lo" || op[idx] == "phv_hi") { + rv->srcb = new operand::PhvRaw(tbl->gress, op[idx]); + } else { + rv->srcb = new operand::PhvReg(tbl->gress, tbl->stage->stageno, op[idx]); + } + } + } + if (!rv->srca || !rv->srcb || !rv->mask) + error(rv->lineno, "Not enough operands to SALU tmatch"); + return rv; +} + +bool TMatchOP::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) + return opc == a->opc && slot == a->slot && srca == a->srca && srcb == a->srcb && + mask == a->mask && learn == a->learn && learn_not == a->learn_not; + return false; +} + +Instruction *TMatchOP::pass1(Table *tbl_, Table::Actions::Action *act) { + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + if (srca) srca->pass1(tbl); + if (srcb) srcb->pass1(tbl); + if (tbl->tmatch_use[slot].op) { + if (mask != tbl->tmatch_use[slot].op->mask) { + error(lineno, "Incompatable tmatch masks in stateful actions %s and %s", + tbl->tmatch_use[slot].act->name.c_str(), act->name.c_str()); + error(tbl->tmatch_use[slot].op->lineno, "previous use"); + } + } else { + tbl->tmatch_use[slot].act = act; + tbl->tmatch_use[slot].op = this; + } + return this; +} + +// Output ALU instruction +struct OutOP : public SaluInstruction { + struct Decode : public Instruction::Decode { + explicit Decode(const char *n) : Instruction::Decode(n, STATEFUL_ALU) {} + Instruction *decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const override; + }; + int predication_encode = STATEFUL_PREDICATION_ENCODE_UNCOND; + operand src; + int output_mux = -1; + bool lmatch = false; + int lmatch_pred = 0; + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void decode_output_mux, + (register_type, Table *tbl, value_t &op)) + void decode_output_mux(Table *tbl, value_t &op) { + SWITCH_FOREACH_TARGET(options.target, decode_output_mux(TARGET(), tbl, op);); + } + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, int decode_output_option, (register_type, value_t &op)) + int decode_output_option(value_t &op) { + SWITCH_FOREACH_TARGET(options.target, return decode_output_option(TARGET(), op);); + } + OutOP(const Decode *op, int lineno) : SaluInstruction(lineno) {} + std::string name() override { return "output"; }; + Instruction *pass1(Table *tbl, Table::Actions::Action *) override; + void pass2(Table *tbl, Table::Actions::Action *) override {} + bool salu_output() const override { return true; } + bool equiv(Instruction *a_) override; + bool phvRead(std::function fn) override { + return src ? src->phvRead(fn) : false; + } + void dbprint(std::ostream &out) const override { + out << "INSTR: output " << "pred=0x" << hex(predication_encode) << " word" + << (slot - ALUOUT0) << " mux=" << output_mux; + } + template + void write_regs(REGS ®s, Table *tbl, Table::Actions::Action *act); + FOR_ALL_REGISTER_SETS(DECLARE_FORWARD_VIRTUAL_INSTRUCTION_WRITE_REGS) +}; + +static OutOP::Decode opOUTPUT("output"); + +bool OutOP::equiv(Instruction *a_) { + if (auto *a = dynamic_cast(a_)) + return predication_encode == a->predication_encode && slot == a->slot && + output_mux == a->output_mux; + return false; +} + +Instruction *OutOP::Decode::decode(Table *tbl, const Table::Actions::Action *act, + const VECTOR(value_t) & op) const { + OutOP *rv = new OutOP(this, op[0].lineno); + int idx = 1; + // Check optional predicate operand + if (idx < op.size) { + // Predicate is an integer + if (op[idx].type == tINT) { + rv->predication_encode = op[idx++].i; + // Predicate is an expression + } else if (op[idx].startsWith("cmp") || op[idx] == "!" || op[idx] == "&" || + op[idx] == "|" || op[idx] == "^") { + rv->predication_encode = decode_predicate(op[idx++]); + if (rv->predication_encode == STATEFUL_PREDICATION_ENCODE_NOOP) + warning(op[idx - 1].lineno, "Instruction predicate is always false"); + else if (rv->predication_encode == STATEFUL_PREDICATION_ENCODE_UNCOND) + warning(op[idx - 1].lineno, "Instruction predicate is always true"); + } + } + rv->slot = ALUOUT; + // Check for destination + if (idx < op.size && op[idx].startsWith("word")) { + int unit = -1; + char *end; + if (op[idx].type == tSTR) { + if (isdigit(op[idx].s[4])) { + unit = strtol(op[idx].s + 4, &end, 10); + if (*end) unit = -1; + } + } else if (op[idx].vec.size == 2 && op[idx][1].type == tINT) { + unit = op[idx][1].i; + } + if (unit >= Target::STATEFUL_OUTPUT_UNITS()) + error(op[idx].lineno, "Invalid output dest %s", value_desc(op[idx])); + else + rv->slot = unit + ALUOUT0; + idx++; + } + // Check mux operand + if (idx < op.size) { + rv->src = operand(tbl, act, op[idx], false); + // DANGER -- decoding the output mux here (as part of input parsing) requires that + // the phv section be before the section we're currently parsing in the .bfa file. + // That's always the case with compiler output, but do we want to require it for + // hand-written code? Could reorg stuff to do this in pass1 instead. + rv->decode_output_mux(tbl, op[idx]); + if (rv->output_mux < 0) + error(op[idx].lineno, "invalid operand '%s' for '%s' instruction", value_desc(op[idx]), + op[0].s); + idx++; + } else { + error(rv->lineno, "too few operands for %s instruction", op[0].s); + } + while (idx < op.size) { + if (rv->decode_output_option(op[idx]) < 0) break; + ++idx; + } + if (idx < op.size) error(rv->lineno, "too many operands for %s instruction", op[0].s); + + return rv; +} + +Instruction *OutOP::pass1(Table *tbl_, Table::Actions::Action *act) { + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + if (src) src->pass1(tbl); + if (output_mux == STATEFUL_PREDICATION_OUTPUT) { + if (act->pred_comb_sel >= 0 && act->pred_comb_sel != predication_encode) + error(lineno, "Only one output of predication allowed"); + act->pred_comb_sel = predication_encode; + } + if (lmatch) { + if (tbl->output_lmatch) { + auto *other = dynamic_cast(tbl->output_lmatch); + BUG_CHECK(other); + if (lmatch_pred != other->lmatch_pred) { + error(lineno, "Conflict lmatch output use in stateful %s", tbl->name()); + error(other->lineno, "conflicting use here"); + } + } + tbl->output_lmatch = this; + } + return this; +} + +#include "jbay/salu_inst.cpp" // NOLINT(build/include) +#include "tofino/salu_inst.cpp" // NOLINT(build/include) + +} // end namespace StatefulAlu + +bool StatefulTable::p4c_5192_workaround(const Actions::Action *act) const { + // when trying to output bits 96..127 + // of either memory or phv input in an SALU in 128-bit mode, the model asserts + // Not clear if this is a hardware limitation or a model bug. + // RMT_ASSERTS on lines 547 and 565 of model/src/shared/mau-stateful-alu.cpp + // Workaround is to use 64x2 mode instead which is otherwise equivalent, except + // for possible problems if minmax is used + using namespace StatefulAlu; + if (format->log2size != 7 || is_dual_mode()) return false; // only apply in 128-bit mode + for (auto &inst : act->instr) { + if (auto *out = dynamic_cast(inst.get())) { + if (out->slot > ALUOUT1 && (out->output_mux == 1 || out->output_mux == 3)) { + return true; + } + } + } + return false; +} diff --git a/backends/tofino/bf-asm/sections.h b/backends/tofino/bf-asm/sections.h new file mode 100644 index 00000000000..ea44ce80f07 --- /dev/null +++ b/backends/tofino/bf-asm/sections.h @@ -0,0 +1,104 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_SECTIONS_H_ +#define BACKENDS_TOFINO_BF_ASM_SECTIONS_H_ + +#include + +#include + +#include "asm-types.h" +#include "backends/tofino/bf-asm/json.h" +#include "bfas.h" +#include "map.h" + +/// A Section represents a top level section in assembly +/// Current sections include: +/// version, phv, parser, deparser, stage, dynhash, primitives +class Section : virtual public Parsable, virtual public Contextable { + static std::map *sections; + std::string name; + bool isInput = false; + static Section *get(const char *name) { return ::get(sections, name); } + + protected: + explicit Section(const char *name_) : name(name_) { + if (!sections) sections = new std::map(); + if (get(name_)) { + fprintf(stderr, "Duplicate section handler for %s\n", name_); + exit(1); + } + (*sections)[name] = this; + } + virtual ~Section() { + sections->erase(name); + if (sections->empty()) { + delete sections; + sections = 0; + } + } + /// process the arguments on the same line as the heading + virtual void start(int lineno, VECTOR(value_t) args) {} + /// optionally process the data if not done during parsing + virtual void process() {} + + public: + static int start_section(int lineno, char *name, VECTOR(value_t) args) { + if (Section *sec = get(name)) { + int prev_error_count = error_count; + sec->isInput = true; + sec->start(lineno, args); + return error_count > prev_error_count; + } else { + warning(lineno, "Unknown section %s, ignoring\n", name); + return 1; + } + } + static void asm_section(char *name, VECTOR(value_t) args, value_t data) { + if (Section *sec = get(name)) sec->input(args, data); + } + static void process_all() { + if (sections) + for (auto &it : *sections) it.second->process(); + } + static void output_all(json::map &ctxtJson) { + if (sections) { + for (auto &it : *sections) { + // Skip primitives to be called last + if (it.first == "primitives") continue; + it.second->output(ctxtJson); + } + auto &s = *sections; + if (s.count("primitives")) s["primitives"]->output(ctxtJson); + } + } + static bool no_sections_in_assembly() { + if (sections) { + for (auto &it : *sections) { + if (it.second->isInput) return false; + } + } + return true; + } + static bool section_in_assembly(const char *name) { return get(name)->isInput; } + + public: // for gtest + static Section *test_get(const char *name) { return get(name); } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_SECTIONS_H_ */ diff --git a/backends/tofino/bf-asm/selection.cpp b/backends/tofino/bf-asm/selection.cpp new file mode 100644 index 00000000000..e4fafd0441a --- /dev/null +++ b/backends/tofino/bf-asm/selection.cpp @@ -0,0 +1,449 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "data_switchbox.h" +#include "input_xbar.h" +#include "lib/algorithm.h" +#include "misc.h" + +void SelectionTable::setup(VECTOR(pair_t) & data) { + setup_layout(layout, data); + VECTOR(pair_t) p4_info = EMPTY_VECTOR_INIT; + for (auto &kv : MapIterChecked(data, true)) { + if (kv.key == "input_xbar") { + if (CHECKTYPE(kv.value, tMAP)) + input_xbar.emplace_back(InputXbar::create(this, false, kv.key, kv.value.map)); + } else if (kv.key == "mode") { + mode_lineno = kv.value.lineno; + if (CHECKTYPEPM(kv.value, tCMD, kv.value.vec.size == 2 && kv.value[1].type == tINT, + "hash mode and int param")) { + if (kv.value[0] == "resilient") + resilient_hash = true; + else if (kv.value[0] == "fair") + resilient_hash = false; + else + error(kv.value.lineno, "Unknown hash mode %s", kv.value[0].s); + param = kv.value[1].i; + } + } else if (kv.key == "non_linear") { + non_linear_hash = get_bool(kv.value); + } else if (kv.key == "per_flow_enable") { + if (CHECKTYPE(kv.value, tSTR)) { + per_flow_enable = true; + per_flow_enable_param = kv.value.s; + } + } else if (kv.key == "pool_sizes") { + if (CHECKTYPE(kv.value, tVEC)) + for (value_t &v : kv.value.vec) + if (CHECKTYPE(v, tINT)) pool_sizes.push_back(v.i); + } else if (kv.key == "selection_hash") { + if (CHECKTYPE(kv.value, tINT)) selection_hash = kv.value.i; + } else if (kv.key == "hash_dist") { + HashDistribution::parse(hash_dist, kv.value); + if (hash_dist.size() > 1) + error(kv.key.lineno, "More than one hast_dist in a selection table not supported"); + } else if (kv.key == "maprams") { + setup_maprams(kv.value); + } else if (kv.key == "p4") { + if (CHECKTYPE(kv.value, tMAP)) + p4_table = P4Table::get(P4Table::Selection, kv.value.map); + } else if (kv.key == "p4_table") { + push_back(p4_info, "name", std::move(kv.value)); + } else if (kv.key == "p4_table_size") { + push_back(p4_info, "size", std::move(kv.value)); + } else if (kv.key == "handle") { + push_back(p4_info, "handle", std::move(kv.value)); + } else if (kv.key == "context_json") { + setup_context_json(kv.value); + } else if (kv.key == "row" || kv.key == "logical_row" || kv.key == "column" || + kv.key == "bus") { + /* already done in setup_layout */ + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } + if (p4_info.size) { + if (p4_table) + error(p4_info[0].key.lineno, "old and new p4 table info in %s", name()); + else + p4_table = P4Table::get(P4Table::Selection, p4_info); + } + fini(p4_info); + if (Target::SRAM_GLOBAL_ACCESS()) + alloc_global_srams(); + else + alloc_rams(true, stage->sram_use); +} + +void SelectionTable::pass1() { + LOG1("### Selection table " << name() << " pass1 " << loc()); + if (!p4_table) + p4_table = P4Table::alloc(P4Table::Selection, this); + else + p4_table->check(this); + alloc_vpns(); + alloc_maprams(); + std::sort(layout.begin(), layout.end(), + [](const Layout &a, const Layout &b) -> bool { return a.row > b.row; }); + for (auto &ixb : input_xbar) ixb->pass1(); + if (param < 0 || param > (resilient_hash ? 7 : 2)) + error(mode_lineno, "Invalid %s hash param %d", resilient_hash ? "resilient" : "fair", + param); + min_words = INT_MAX; + max_words = 0; + if (pool_sizes.empty()) { + min_words = max_words = 1; + } else { + for (int size : pool_sizes) { + int words = (size + SELECTOR_PORTS_PER_WORD - 1) / SELECTOR_PORTS_PER_WORD; + if (words < min_words) min_words = words; + if (words > max_words) max_words = words; + } + } + stage->table_use[timing_thread(gress)] |= Stage::USE_SELECTOR; + if (max_words > 1) { + stage->table_use[timing_thread(gress)] |= Stage::USE_WIDE_SELECTOR; + for (auto &hd : hash_dist) hd.xbar_use |= HashDistribution::HASHMOD_DIVIDEND; + } + for (auto &hd : hash_dist) hd.pass1(this, HashDistribution::SELECTOR, non_linear_hash); + bool home = true; // first layout row is home row + for (Layout &row : layout) { + if (home) + need_bus(row.lineno, stage->selector_adr_bus_use, row.row | 3, "Selector Address"); + need_bus(row.lineno, stage->selector_adr_bus_use, row.row, "Selector Address"); + if ((row.row & 2) == 0) // even phy rows wired together + need_bus(row.lineno, stage->selector_adr_bus_use, row.row ^ 1, "Selector Address"); + home = false; + } + AttachedTable::pass1(); +} + +void SelectionTable::pass2() { + LOG1("### Selection table " << name() << " pass2 " << loc()); + for (auto &ixb : input_xbar) { + ixb->pass2(); + if (selection_hash < 0 && (selection_hash = ixb->hash_group()) < 0) + error(lineno, "No selection_hash in selector table %s", name()); + } + if (input_xbar.empty()) { + error(lineno, "No input xbar in selector table %s", name()); + } + for (auto &hd : hash_dist) hd.pass2(this); +} + +void SelectionTable::pass3() { LOG1("### Selection table " << name() << " pass3 " << loc()); } + +int SelectionTable::indirect_shiftcount() const { + return METER_ADDRESS_ZERO_PAD - 7; // selectors always start at bit 7 address +} + +unsigned SelectionTable::per_flow_enable_bit(MatchTable *match) const { + if (!per_flow_enable) + return SELECTOR_PER_FLOW_ENABLE_START_BIT; + else + return AttachedTable::per_flow_enable_bit(match); +} + +unsigned SelectionTable::determine_shiftcount(Table::Call &call, int group, unsigned word, + int tcam_shift) const { + return determine_meter_shiftcount(call, group, word, tcam_shift); +} + +template +void SelectionTable::write_merge_regs_vt(REGS ®s, MatchTable *match, int type, int bus, + const std::vector &args) { + auto &merge = regs.rams.match.merge; + setup_physical_alu_map(regs, type, bus, meter_group()); + merge.mau_payload_shifter_enable[type][bus].meter_adr_payload_shifter_en = 1; + + unsigned adr_mask = 0U; + unsigned per_entry_en_mux_ctl = 0U; + unsigned adr_default = 0U; + unsigned meter_type_position = 0U; + AttachedTable::determine_meter_merge_regs(match, type, bus, args, METER_SELECTOR, adr_mask, + per_entry_en_mux_ctl, adr_default, + meter_type_position); + merge.mau_meter_adr_default[type][bus] = adr_default; + merge.mau_meter_adr_mask[type][bus] = adr_mask; + merge.mau_meter_adr_per_entry_en_mux_ctl[type][bus] = per_entry_en_mux_ctl; + merge.mau_meter_adr_type_position[type][bus] = meter_type_position; +} + +/** + * This validates the call as the value to the selection_length key. The call requires + * two arguments: + * + * 1. A selector length mod argument + * 2. A selector length shift argument + * + * This is formatted in the following way: + * (msb_side) {shift, mod} (lsb_side) + * + * These can come from match overhead, or can come from $DEFAULT + * + * In actuality, both of these arguments are extracted by the same extractor, and they must + * be contiguous to each other. The reason for the separation is that the driver requires + * them to be separated in the pack format. + */ +bool SelectionTable::validate_length_call(const Table::Call &call) { + if (call.args.size() != 2) { + error(call.lineno, "The selector length call for %s requires two arguments", name()); + return false; + } + + if (call.args[0].name()) { + if (call.args[0] != "$DEFAULT") { + error(call.lineno, "Index %s for %s length cannot be found", call.args[0].name(), + name()); + return false; + } + } else if (!call.args[0].field()) { + error(call.lineno, "Index for %s length cannot be understood", name()); + return false; + } + + if (call.args[1].name()) { + if (call.args[1] != "$DEFAULT") { + error(call.lineno, "Index %s for %s length cannot be found", call.args[0].name(), + name()); + return false; + } + } else if (!call.args[1].field()) { + error(call.lineno, "Index for %s length cannot be understood", name()); + return false; + } + + if (call.args[0].field() && call.args[1].field()) { + auto mod = call.args[0].field(); + auto shift = call.args[1].field(); + + if (mod->bit(0) + mod->size != shift->bit(0)) { + error(call.lineno, "Indexes for %s must be contiguous on the format", name()); + return false; + } + } + return true; +} + +unsigned SelectionTable::determine_length_shiftcount(const Table::Call &call, int group, + int word) const { + if (auto f = call.args[0].field()) { + BUG_CHECK(f->by_group[group]->bit(0) / 128 == word && group == 0); + BUG_CHECK(f->by_group[group]->bit(0) % 128 <= 8); + return f->by_group[group]->bit(0) % 128U; + } + return 0; +} + +unsigned SelectionTable::determine_length_mask(const Table::Call &call) const { + unsigned rv = 0; + if (auto f = call.args[0].field()) rv |= ((1U << f->size) - 1); + if (auto f = call.args[1].field()) rv |= ((1U << f->size) - 1) << SELECTOR_LENGTH_MOD_BITS; + return rv; +} + +unsigned SelectionTable::determine_length_default(const Table::Call &call) const { + unsigned rv = 0; + if (call.args[0].name() && strcmp(call.args[0].name(), "$DIRECT") == 0) rv = 1; + return rv; +} + +template <> +void SelectionTable::setup_physical_alu_map(Target::Tofino::mau_regs ®s, int type, int bus, + int alu) { + auto &merge = regs.rams.match.merge; + merge.mau_physical_to_meter_alu_ixbar_map[type][bus / 8U].set_subfield(4 | alu, 3 * (bus % 8U), + 3); +} +template <> +void SelectionTable::setup_physical_alu_map(Target::JBay::mau_regs ®s, int type, int bus, + int alu) { + auto &merge = regs.rams.match.merge; + merge.mau_physical_to_meter_alu_icxbar_map[type][bus / 8U] |= (1U << alu) << (4 * (bus % 8U)); +} + +template +void SelectionTable::write_regs_vt(REGS ®s) { + LOG1("### Selection table " << name() << " write_regs " << loc()); + for (auto &ixb : input_xbar) ixb->write_regs(regs); + Layout *home = &layout[0]; + bool push_on_overflow = false; + auto &map_alu = regs.rams.map_alu; + DataSwitchboxSetup swbox(regs, this); + int minvpn, maxvpn; + layout_vpn_bounds(minvpn, maxvpn, true); + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + for (Layout &logical_row : layout) { + unsigned row = logical_row.row / 2U; + unsigned side = logical_row.row & 1; /* 0 == left 1 == right */ + /* FIXME factor vpn/mapram stuff with counter.cpp */ + auto vpn = logical_row.vpns.begin(); + auto mapram = logical_row.maprams.begin(); + auto &map_alu_row = map_alu.row[row]; + LOG2("# DataSwitchbox.setup(" << row << ") home=" << home->row / 2U); + swbox.setup_row(row); + for (auto &memunit : logical_row.memunits) { + BUG_CHECK(memunit.stage == INT_MIN && memunit.row == logical_row.row, + "bogus %s in logical row %d", memunit.desc(), logical_row.row); + unsigned col = memunit.col + 6 * side; + swbox.setup_row_col(row, col, *vpn); + write_mapram_regs(regs, row, *mapram, *vpn, MapRam::SELECTOR_SIZE); + if (gress) regs.cfg_regs.mau_cfg_uram_thread[col / 4U] |= 1U << (col % 4U * 8U + row); + ++mapram, ++vpn; + } + if (&logical_row == home) { + auto &vh_adr_xbar = regs.rams.array.row[row].vh_adr_xbar; + setup_muxctl( + vh_adr_xbar.exactmatch_row_hashadr_xbar_ctl[SELECTOR_VHXBAR_HASH_BUS_INDEX], + selection_hash); + vh_adr_xbar.alu_hashdata_bytemask.alu_hashdata_bytemask_right = + bitmask2bytemask(input_xbar[0]->hash_group_bituse()); + map_alu_row.i2portctl.synth2port_vpn_ctl.synth2port_vpn_base = minvpn; + map_alu_row.i2portctl.synth2port_vpn_ctl.synth2port_vpn_limit = maxvpn; + } else { + auto &adr_ctl = map_alu_row.vh_xbars.adr_dist_oflo_adr_xbar_ctl[side]; + if (home->row >= 8 && logical_row.row < 8) { + adr_ctl.adr_dist_oflo_adr_xbar_source_index = 0; + adr_ctl.adr_dist_oflo_adr_xbar_source_sel = AdrDist::OVERFLOW; + push_on_overflow = true; + BUG_CHECK(options.target == TOFINO); + } else { + adr_ctl.adr_dist_oflo_adr_xbar_source_index = home->row % 8; + adr_ctl.adr_dist_oflo_adr_xbar_source_sel = AdrDist::METER; + } + adr_ctl.adr_dist_oflo_adr_xbar_enable = 1; + } + } + + unsigned meter_group = home->row / 4U; + auto &selector_ctl = map_alu.meter_group[meter_group].selector.selector_alu_ctl; + selector_ctl.sps_nonlinear_hash_enable = non_linear_hash ? 1 : 0; + if (resilient_hash) + selector_ctl.resilient_hash_enable = param; + else + selector_ctl.selector_fair_hash_select = param; + selector_ctl.resilient_hash_mode = resilient_hash ? 1 : 0; + selector_ctl.selector_enable = 1; + auto &delay_ctl = map_alu.meter_alu_group_data_delay_ctl[meter_group]; + delay_ctl.meter_alu_right_group_delay = + Target::METER_ALU_GROUP_DATA_DELAY() + meter_group / 2 + stage->tcam_delay(gress); + delay_ctl.meter_alu_right_group_enable = + meter_alu_fifo_enable_from_mask(regs, resilient_hash ? 0x7f : 0x3); + /* FIXME -- error_ctl should be configurable */ + auto &error_ctl = map_alu.meter_alu_group_error_ctl[meter_group]; + error_ctl.meter_alu_group_ecc_error_enable = 1; + error_ctl.meter_alu_group_sel_error_enable = 1; + error_ctl.meter_alu_group_thread = gress; + + auto &merge = regs.rams.match.merge; + auto &adrdist = regs.rams.match.adrdist; + for (MatchTable *m : match_tables) { + adrdist.adr_dist_meter_adr_icxbar_ctl[m->logical_id] |= 1 << meter_group; + // auto &icxbar = adrdist.adr_dist_meter_adr_icxbar_ctl[m->logical_id]; + // icxbar.address_distr_to_logical_rows = 1 << home->row; + // icxbar.address_distr_to_overflow = push_on_overflow; + if (auto &act = m->get_action()) { + /* FIXME -- can't be attached to mutliple tables ? */ + unsigned fmt = 3; + fmt = std::max(fmt, act->format->log2size); + if (auto at = dynamic_cast(&(*act))) + for (auto &f : at->get_action_formats()) fmt = std::max(fmt, f.second->log2size); + merge.mau_selector_action_entry_size[meter_group] = fmt - 3; + } // val in bytes + adrdist.mau_ad_meter_virt_lt[meter_group] |= 1U << m->logical_id; + adrdist.movereg_ad_meter_alu_to_logical_xbar_ctl[m->logical_id / 8U].set_subfield( + 4 | meter_group, 3 * (m->logical_id % 8U), 3); + setup_logical_alu_map(regs, m->logical_id, meter_group); + } + if (max_words == 1) adrdist.movereg_meter_ctl[meter_group].movereg_ad_meter_shift = 7; + if (push_on_overflow) { + adrdist.oflo_adr_user[0] = adrdist.oflo_adr_user[1] = AdrDist::METER; + adrdist.deferred_oflo_ctl = 1 << ((home->row - 8) / 2U); + } + adrdist.packet_action_at_headertime[1][meter_group] = 1; + for (auto &hd : hash_dist) hd.write_regs(regs, this); + if (gress == INGRESS || gress == GHOST) { + merge.meter_alu_thread[0].meter_alu_thread_ingress |= 1U << meter_group; + merge.meter_alu_thread[1].meter_alu_thread_ingress |= 1U << meter_group; + } else if (gress == EGRESS) { + merge.meter_alu_thread[0].meter_alu_thread_egress |= 1U << meter_group; + merge.meter_alu_thread[1].meter_alu_thread_egress |= 1U << meter_group; + } + if (gress == EGRESS) { + regs.rams.map_alu.meter_group[meter_group].meter.meter_ctl.meter_alu_egress = 1; + } +} + +template <> +void SelectionTable::setup_logical_alu_map(Target::Tofino::mau_regs ®s, int logical_id, + int alu) { + auto &merge = regs.rams.match.merge; + if (max_words > 1) merge.mau_logical_to_meter_alu_map.set_subfield(16 | logical_id, 5 * alu, 5); + merge.mau_meter_alu_to_logical_map[logical_id / 8U].set_subfield(4 | alu, 3 * (logical_id % 8U), + 3); +} +template <> +void SelectionTable::setup_logical_alu_map(Target::JBay::mau_regs ®s, int logical_id, int alu) { + auto &merge = regs.rams.match.merge; + merge.mau_logical_to_meter_alu_map[logical_id / 8U] |= (1U << alu) << ((logical_id % 8U) * 4); + merge.mau_meter_alu_to_logical_map[logical_id / 8U].set_subfield(4 | alu, 3 * (logical_id % 8U), + 3); +} + +std::vector SelectionTable::determine_spare_bank_memory_units() const { + if (bound_stateful) return bound_stateful->determine_spare_bank_memory_units(); + return {}; +} + +void SelectionTable::gen_tbl_cfg(json::vector &out) const { + // Stage table size reflects how many RAM lines are available for the selector, according + // to henry wang. + int size = (layout_size() - 1) * 1024; + json::map &tbl = *base_tbl_cfg(out, "selection", size); + tbl["selection_type"] = resilient_hash ? "resilient" : "fair"; + tbl["selector_name"] = p4_table ? p4_table->p4_name() : "undefined"; + tbl["selection_key_name"] = "undefined"; // FIXME! + std::string hr = how_referenced(); + if (hr.empty()) hr = indirect ? "indirect" : "direct"; + tbl["how_referenced"] = hr; + if (pool_sizes.size() > 0) + tbl["max_port_pool_size"] = *std::max_element(std::begin(pool_sizes), std::end(pool_sizes)); + for (MatchTable *m : match_tables) { + if (auto &act = m->get_action()) { + if (auto at = dynamic_cast(&(*act))) { + tbl["bound_to_action_data_table_handle"] = act->handle(); + break; + } + } + } + json::map &stage_tbl = *add_stage_tbl_cfg(tbl, "selection", size); + add_pack_format(stage_tbl, 128, 1, 1); + stage_tbl["memory_resource_allocation"] = + gen_memory_resource_allocation_tbl_cfg("sram", layout, bound_stateful != nullptr); + add_alu_index(stage_tbl, "meter_alu_index"); + stage_tbl["sps_scramble_enable"] = non_linear_hash; + if (context_json) stage_tbl.merge(*context_json); +} + +DEFINE_TABLE_TYPE(SelectionTable) +FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void SelectionTable::write_merge_regs, + (mau_regs & regs, MatchTable *match, int type, int bus, + const std::vector &args), + { write_merge_regs_vt(regs, match, type, bus, args); }) diff --git a/backends/tofino/bf-asm/slist.h b/backends/tofino/bf-asm/slist.h new file mode 100644 index 00000000000..d68a7dd6bad --- /dev/null +++ b/backends/tofino/bf-asm/slist.h @@ -0,0 +1,52 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_SLIST_H_ +#define BACKENDS_TOFINO_BF_ASM_SLIST_H_ + +template +class slist { + const slist *next; + T value; + + public: + explicit slist(T v) : next(nullptr), value(v) {} + slist(T v, const slist *n) : next(n), value(v) {} + typedef T value_type; + class iterator : public std::iterator { + friend class slist; + const slist *ptr; + iterator() : ptr(nullptr) {} + explicit iterator(const slist *p) : ptr(p) {} + + public: + iterator &operator++() { + ptr = ptr->next; + return *this; + } + bool operator==(const iterator &a) const { return ptr == a.ptr; } + bool operator!=(const iterator &a) const { return ptr != a.ptr; } + const T &operator*() const { return ptr->value; } + const T *operator->() const { return &ptr->value; } + }; + typedef iterator const_iterator; + + iterator begin() const { return iterator(this); } + iterator end() const { return iterator(); } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_SLIST_H_ */ diff --git a/backends/tofino/bf-asm/sram_match.cpp b/backends/tofino/bf-asm/sram_match.cpp new file mode 100644 index 00000000000..697a796d028 --- /dev/null +++ b/backends/tofino/bf-asm/sram_match.cpp @@ -0,0 +1,1505 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/action_bus.h" +#include "backends/tofino/bf-asm/input_xbar.h" +#include "backends/tofino/bf-asm/instruction.h" +#include "backends/tofino/bf-asm/mask_counter.h" +#include "backends/tofino/bf-asm/misc.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "lib/algorithm.h" +#include "lib/hex.h" + +Table::Format::Field *SRamMatchTable::lookup_field(const std::string &n, + const std::string &act) const { + auto *rv = format ? format->field(n) : nullptr; + if (!rv && gateway) rv = gateway->lookup_field(n, act); + if (!rv && !act.empty()) { + if (auto call = get_action()) rv = call->lookup_field(n, act); + } + if (!rv && n == "immediate" && !::Phv::get(gress, stage->stageno, n)) { + static Format::Field default_immediate(nullptr, 32, Format::Field::USED_IMMED); + rv = &default_immediate; + } + return rv; +} + +const char *SRamMatchTable::Ram::desc() const { + static char buffer[256], *p = buffer; + char *end = buffer + sizeof(buffer), *rv; + do { + if (end - p < 7) p = buffer; + rv = p; + if (stage >= 0) + p += snprintf(p, end - p, "Ram %d,%d,%d", stage, row, col); + else if (row >= 0) + p += snprintf(p, end - p, "Ram %d,%d", row, col); + else + p += snprintf(p, end - p, "Lamb %d", col); + } while (p++ >= end); + return rv; +} + +/* calculate the 18-bit byte/nybble mask tofino uses for matching in a 128-bit word */ +static unsigned tofino_bytemask(int lo, int hi) { + unsigned rv = 0; + for (unsigned i = lo / 4U; i <= hi / 4U; i++) rv |= 1U << (i < 28 ? i / 2 : i - 14); + return rv; +} + +/** + * Determining the result bus for an entry, if that entry has no overhead. The result bus + * is still needed to get the direct address location to find action data / run an + * instruction, etc. + * + * This section maps the allocation scheme used in the TableFormat::Use in p4c, found + * in the function result_bus_words + */ +void SRamMatchTable::no_overhead_determine_result_bus_usage() { + bitvec result_bus_words; + + for (int i = 0; i < static_cast(group_info.size()); i++) { + BUG_CHECK(group_info[i].overhead_word < 0); + if (group_info[i].match_group.size() == 1) { + group_info[i].result_bus_word = group_info[i].match_group.begin()->first; + result_bus_words.setbit(group_info[i].result_bus_word); + } + } + + for (int i = 0; i < static_cast(group_info.size()); i++) { + if (group_info[i].overhead_word < 0 && group_info[i].match_group.size() > 1) { + bool result_bus_set = false; + for (auto match_group : group_info[i].match_group) { + if (result_bus_words.getbit(match_group.first)) { + group_info[i].result_bus_word = match_group.first; + result_bus_set = true; + } + } + if (!result_bus_set) + group_info[i].result_bus_word = group_info[i].match_group.begin()->first; + LOG1(" format group " << i << " no overhead multiple match groups"); + } + } +} + +void SRamMatchTable::verify_format(Target::Tofino) { + if (format->log2size < 7) format->log2size = 7; + format->pass1(this); + group_info.resize(format->groups()); + unsigned fmt_width = (format->size + 127) / 128; + if (word_info.size() > fmt_width) { + warning(format->lineno, "Match group map wider than format, padding out format"); + format->size = word_info.size() * 128; + fmt_width = word_info.size(); + while ((1U << format->log2size) < format->size) ++format->log2size; + } + for (unsigned i = 0; i < format->groups(); i++) { + auto &info = group_info[i]; + info.tofino_mask.resize(fmt_width); + if (Format::Field *match = format->field("match", i)) { + for (auto &piece : match->bits) { + unsigned word = piece.lo / 128; + if (word != piece.hi / 128) + error(format->lineno, + "'match' field must be explictly split across " + "128-bit boundary in table %s", + name()); + info.tofino_mask[word] |= tofino_bytemask(piece.lo % 128, piece.hi % 128); + info.match_group[word] = -1; + } + } + if (auto *version = format->field("version", i)) { + if (version->bits.size() != 1) error(format->lineno, "'version' field cannot be split"); + auto &piece = version->bits[0]; + unsigned word = piece.lo / 128; + if (version->size != 4 || (piece.lo % 4) != 0) + error(format->lineno, + "'version' field not 4 bits and nibble aligned " + "in table %s", + name()); + info.tofino_mask[word] |= tofino_bytemask(piece.lo % 128, piece.hi % 128); + info.match_group[word] = -1; + } + for (unsigned j = 0; j < i; j++) + for (unsigned word = 0; word < fmt_width; word++) + if (group_info[j].tofino_mask[word] & info.tofino_mask[word]) { + int bit = ffs(group_info[j].tofino_mask[word] & info.tofino_mask[word]) - 1; + if (bit >= 14) bit += 14; + error(format->lineno, "Match groups %d and %d both use %s %d in word %d", i, j, + bit > 20 ? "nibble" : "byte", bit, word); + break; + } + for (auto it = format->begin(i); it != format->end(i); it++) { + Format::Field &f = it->second; + if (it->first == "match" || it->first == "version" || it->first == "proxy_hash") + continue; + if (f.bits.size() != 1) { + error(format->lineno, "Can't deal with split field %s", it->first.c_str()); + continue; + } + unsigned limit = Target::MAX_OVERHEAD_OFFSET(); + if (it->first == "next") limit = Target::MAX_OVERHEAD_OFFSET_NEXT(); + unsigned word = f.bit(0) / 128; + if (info.overhead_word < 0) { + info.overhead_word = word; + format->overhead_word = word; + LOG5("Setting overhead word for format : " << word); + info.overhead_bit = f.bit(0) % 128; + info.match_group[word] = -1; + } else if (info.overhead_word != static_cast(word)) { + error(format->lineno, "Match overhead group %d split across words", i); + } else if (word != f.bit(f.size - 1) / 128 || f.bit(f.size - 1) % 128 >= limit) { + error(format->lineno, "Match overhead field %s(%d) not in bottom %d bits", + it->first.c_str(), i, limit); + } + if (!info.match_group.count(word)) + error(format->lineno, "Match overhead in group %d in word with no match?", i); + if ((unsigned)info.overhead_bit > f.bit(0) % 128) info.overhead_bit = f.bit(0) % 128; + } + info.vpn_offset = i; + } + if (word_info.empty()) { + word_info.resize(fmt_width); + if (format->field("next")) { + /* 'next' for match group 0 must be in bit 0, so make the format group with + * overhead in bit 0 match group 0 in its overhead word */ + for (unsigned i = 0; i < group_info.size(); i++) { + if (group_info[i].overhead_bit == 0) { + BUG_CHECK(error_count > 0 || word_info[group_info[i].overhead_word].empty()); + group_info[i].match_group[group_info[i].overhead_word] = 0; + word_info[group_info[i].overhead_word].push_back(i); + } + } + } + for (unsigned i = 0; i < group_info.size(); i++) { + if (group_info[i].match_group.size() > 1) { + for (auto &mgrp : group_info[i].match_group) { + if (mgrp.second >= 0) continue; + if ((mgrp.second = word_info[mgrp.first].size()) > 1) + error(format->lineno, "Too many multi-word groups using word %d", + mgrp.first); + word_info[mgrp.first].push_back(i); + } + } + } + } else { + if (word_info.size() != fmt_width) + error(mgm_lineno, "Match group map doesn't match format size"); + for (unsigned i = 0; i < word_info.size(); i++) { + for (unsigned j = 0; j < word_info[i].size(); j++) { + int grp = word_info[i][j]; + if (grp < 0 || (unsigned)grp >= format->groups()) { + error(mgm_lineno, "Invalid group number %d", grp); + } else if (!group_info[grp].match_group.count(i)) { + error(mgm_lineno, "Format group %d doesn't match in word %d", grp, i); + } else { + group_info[grp].match_group[i] = j; + auto *next = format->field("next", grp); + if (!next && hit_next.size() > 1) next = format->field("action", grp); + if (next) { + if (next->bit(0) / 128 != i) continue; + static unsigned limit[5] = {0, 8, 32, 32, 32}; + unsigned bit = next->bit(0) % 128U; + if (!j && bit) + error(mgm_lineno, + "Next(%d) field must start at bit %d to be in " + "match group 0", + grp, i * 128); + else if (j && (!bit || bit > limit[j])) + warning(mgm_lineno, + "Next(%d) field must start in range %d..%d " + "to be in match group %d", + grp, i * 128 + 1, i * 128 + limit[j], j); + } + } + } + } + } + if (hit_next.size() > 1 && !format->field("next") && !format->field("action")) + error(format->lineno, "No 'next' field in format"); + if (error_count > 0) return; + + for (int i = 0; i < static_cast(group_info.size()); i++) { + if (group_info[i].match_group.size() == 1) { + for (auto &mgrp : group_info[i].match_group) { + if (mgrp.second >= 0) continue; + if ((mgrp.second = word_info[mgrp.first].size()) > 4) + error(format->lineno, "Too many match groups using word %d", mgrp.first); + word_info[mgrp.first].push_back(i); + } + } + // Determining the result bus word, where the overhead is supposed to be + } + + bool has_overhead_word = false; + bool overhead_word_set = false; + for (int i = 0; i < static_cast(group_info.size()); i++) { + if (overhead_word_set) BUG_CHECK((group_info[i].overhead_word >= 0) == has_overhead_word); + if (group_info[i].overhead_word >= 0) { + has_overhead_word = true; + group_info[i].result_bus_word = group_info[i].overhead_word; + } + overhead_word_set = true; + } + + if (!has_overhead_word) no_overhead_determine_result_bus_usage(); + + /** + * Determining the result bus for an entry, if that entry has no overhead. The result bus + * is still needed to get the direct address location to find action data / run an + * instruction, etc. + * + * This section maps the allocation scheme used in the TableFormat::Use in p4c, found + * in the function result_bus_words + */ + + for (int i = 0; i < static_cast(group_info.size()); i++) { + LOG1(" masks: " << hexvec(group_info[i].tofino_mask)); + for (auto &mgrp : group_info[i].match_group) + LOG1(" match group " << mgrp.second << " in word " << mgrp.first); + } + + for (unsigned i = 0; i < word_info.size(); i++) + LOG1(" word " << i << " groups: " << word_info[i]); + if (options.match_compiler && 0) { + /* hack to match the compiler's nibble usage -- if any of the top 4 nibbles is + * unused in a word, mark it as used by any group that uses the other nibble of the + * byte, UNLESS it is used for the version. This is ok, as the unused nibble will + * end up being masked off by the match_mask anyways */ + for (unsigned word = 0; word < word_info.size(); word++) { + unsigned used_nibbles = 0; + for (auto group : word_info[word]) + used_nibbles |= group_info[group].tofino_mask[word] >> 14; + for (unsigned nibble = 0; nibble < 4; nibble++) { + if (!((used_nibbles >> nibble) & 1) && ((used_nibbles >> (nibble ^ 1)) & 1)) { + LOG1(" ** fixup nibble " << nibble << " in word " << word); + for (auto group : word_info[word]) + if ((group_info[group].tofino_mask[word] >> (14 + (nibble ^ 1))) & 1) { + if (auto *version = format->field("version", group)) { + if (version->bit(0) == word * 128 + (nibble ^ 1) * 4 + 112) { + LOG1(" skip group " << group << " (version)"); + continue; + } + } + group_info[group].tofino_mask[word] |= 1 << (14 + nibble); + LOG1(" adding to group " << group); + } + } + } + } + } + + verify_match(fmt_width); +} + +void SRamMatchTable::verify_format_pass2(Target::Tofino) {} + +/** + * Guarantees that each match field is a PHV field, which is the standard unless the table is + * a proxy hash table. + */ +bool SRamMatchTable::verify_match_key() { + for (auto *match_key : match) { + auto phv_p = dynamic_cast(match_key); + if (phv_p == nullptr) { + error(match_key->get_lineno(), "A non PHV match key in table %s", name()); + continue; + } + auto phv_ref = *phv_p; + if (phv_ref.check() && phv_ref->reg.mau_id() < 0) + error(phv_ref.lineno, "%s not accessable in mau", phv_ref->reg.name); + } + auto match_format = format->field("match"); + if (match_format && match.empty()) { + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + for (auto ixbar_element : *input_xbar[0]) { + match.emplace_back(new Phv::Ref(ixbar_element.second.what)); + } + } + return error_count == 0; +} + +std::unique_ptr SRamMatchTable::gen_memory_resource_allocation_tbl_cfg( + const Way &way) const { + json::map mra; + unsigned vpn_ctr = 0; + unsigned fmt_width = format ? (format->size + 127) / 128 : 0; + unsigned ramdepth = way.isLamb() ? LAMB_DEPTH_BITS : SRAM_DEPTH_BITS; + if (hash_fn_ids.count(way.group_xme) > 0) + mra["hash_function_id"] = hash_fn_ids.at(way.group_xme); + mra["hash_entry_bit_lo"] = way.index; + mra["hash_entry_bit_hi"] = way.index + ramdepth + way.subword_bits - 1; + mra["number_entry_bits"] = ramdepth; + mra["number_subword_bits"] = way.subword_bits; + if (way.select) { + int lo = way.select.min().index(), hi = way.select.max().index(); + mra["hash_select_bit_lo"] = lo; + mra["hash_select_bit_hi"] = hi; + if (way.select.popcount() != hi - lo + 1) { + warning(way.lineno, "driver does not support discontinuous bits in a way mask"); + mra["hash_select_bit_mask"] = way.select.getrange(lo, 32); + } + } else { + mra["hash_select_bit_lo"] = mra["hash_select_bit_hi"] = 40; + } + mra["number_select_bits"] = way.select.popcount(); + mra["memory_type"] = way.isLamb() ? "lamb" : "sram"; + json::vector mem_units; + json::vector &mem_units_and_vpns = mra["memory_units_and_vpns"] = json::vector(); + int way_uses_lambs = -1; // don't know yet + for (auto &ram : way.rams) { + if (ram.isLamb()) { + BUG_CHECK(way_uses_lambs != 0, "mixed lambs and memories in a way"); + way_uses_lambs = 1; + } else { + BUG_CHECK(way_uses_lambs != 1, "mixed lambs and memories in a way"); + way_uses_lambs = 0; + if (mem_units.empty()) + vpn_ctr = layout_get_vpn(ram); + else + BUG_CHECK(vpn_ctr == layout_get_vpn(ram)); + } + mem_units.push_back(json_memunit(ram)); + if (mem_units.size() == fmt_width) { + json::map tmp; + tmp["memory_units"] = std::move(mem_units); + mem_units = json::vector(); + json::vector vpns; + for (auto &grp : group_info) vpns.push_back(vpn_ctr + grp.vpn_offset); + vpn_ctr += group_info.size(); + if (group_info.empty()) // FIXME -- can this happen? + vpns.push_back(vpn_ctr++); + tmp["vpns"] = std::move(vpns); + mem_units_and_vpns.push_back(std::move(tmp)); + } + } + BUG_CHECK(mem_units.empty()); + return json::mkuniq(std::move(mra)); +} + +/** + * The purpose of this function is to generate the hash_functions JSON node. The hash functions + * are for the driver to determine what RAM/RAM line to write the match data into during entry + * adds. + * + * The JSON nodes for the hash functions are the following: + * - hash_bits - A vector determining what each bit is calculated from. Look at the comments + * over the function gen_hash_bits + * The following two fields are required for High Availability mode and Entry Reads from HW + * - ghost_bit_to_hash_bit - A vector describing where the ghost bits are in the hash matrix + * - ghost_bit_info - A vector indicating which p4 fields are used as the ghost bits + * The following field is only necessary for dynamic_key_masks + * - hash_function_number - which of the 8 hash functions this table is using. + * + * The order of the hash functions must coordinate to the order of the hash_function_ids used + * in the Way JSON, as this is how a single way knows which hash function to use for its lookup + */ +void SRamMatchTable::add_hash_functions(json::map &stage_tbl) const { + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + auto &ht = input_xbar[0]->get_hash_tables(); + if (ht.size() == 0) return; + // Output cjson node only if hash tables present + std::map hash_bits_per_group; + for (auto &way : ways) { + int depth = way.isLamb() ? LAMB_DEPTH_BITS : SRAM_DEPTH_BITS; + if (format->field("match")) { + // cuckoo or BPH + } else { + depth += ceil_log2(format->groups()); + if (format->size < 128) depth += 7 - ceil_log2(format->size); + } + bitvec way_impact; + way_impact.setrange(way.index, depth); + way_impact |= way.select; + hash_bits_per_group[way.group_xme] |= way_impact; + } + + // Order so that the order is the same of the hash_function_ids in the ways + // FIXME -- this seems pointless, as iterating over a std::map will always be + // in order. So this loop could go away and the later loop be over hash_bits_per_group + std::vector> hash_function_to_hash_bits(hash_fn_ids.size()); + for (auto entry : hash_bits_per_group) { + int hash_fn_id = hash_fn_ids.at(entry.first); + if (hash_fn_id >= hash_fn_ids.size()) BUG(); + hash_function_to_hash_bits[hash_fn_id] = entry; + } + + json::vector &hash_functions = stage_tbl["hash_functions"] = json::vector(); + for (auto entry : hash_function_to_hash_bits) { + int hash_group_no = entry.first; + + json::map hash_function; + json::vector &hash_bits = hash_function["hash_bits"] = json::vector(); + hash_function["hash_function_number"] = hash_group_no; + json::vector &ghost_bits_to_hash_bits = hash_function["ghost_bit_to_hash_bit"] = + json::vector(); + json::vector &ghost_bits_info = hash_function["ghost_bit_info"] = json::vector(); + // Get the hash group data + if (auto *hash_group = input_xbar[0]->get_hash_group(hash_group_no)) { + // Process only hash tables used per hash group + for (unsigned id : bitvec(hash_group->tables)) { + auto hash_table = input_xbar[0]->get_hash_table(id); + gen_hash_bits(hash_table, InputXbar::HashTable(InputXbar::HashTable::EXACT, id), + hash_bits, hash_group_no, entry.second); + } + } else { + for (auto &ht : input_xbar[0]->get_hash_tables()) + gen_hash_bits(ht.second, ht.first, hash_bits, hash_group_no, entry.second); + } + gen_ghost_bits(hash_group_no, ghost_bits_to_hash_bits, ghost_bits_info); + hash_functions.push_back(std::move(hash_function)); + } +} + +void SRamMatchTable::verify_match(unsigned fmt_width) { + if (!verify_match_key()) return; + // Build the match_by_bit + unsigned bit = 0; + for (auto &r : match) { + match_by_bit.emplace(bit, r); + bit += r->size(); + } + auto match_format = format->field("match"); + if ((unsigned)bit != (match_format ? match_format->size : 0)) + warning(match[0]->get_lineno(), + "Match width %d for table %s doesn't match format match " + "width %d", + bit, name(), match_format ? match_format->size : 0); + match_in_word.resize(fmt_width); + for (unsigned i = 0; i < format->groups(); i++) { + Format::Field *match = format->field("match", i); + if (!match) continue; + unsigned bit = 0; + for (auto &piece : match->bits) { + auto mw = --match_by_bit.upper_bound(bit); + int lo = bit - mw->first; + while (mw != match_by_bit.end() && mw->first < bit + piece.size()) { + if ((piece.lo + mw->first - bit) % 8U != (mw->second->slicelobit() % 8U)) + error(mw->second->get_lineno(), + "bit within byte misalignment matching %s in " + "match group %d of table %s", + mw->second->name(), i, name()); + int hi = + std::min((unsigned)mw->second->size() - 1, bit + piece.size() - mw->first - 1); + BUG_CHECK((unsigned)piece.lo / 128 < fmt_width); + // merge_phv_vec(match_in_word[piece.lo/128], Phv::Ref(mw->second, lo, hi)); + + if (auto phv_p = dynamic_cast(mw->second)) { + auto phv_ref = *phv_p; + auto vec = split_phv_bytes(Phv::Ref(phv_ref, lo, hi)); + for (auto ref : vec) { + match_in_word[piece.lo / 128].emplace_back(new Phv::Ref(ref)); + } + + } else if (auto hash_p = dynamic_cast(mw->second)) { + match_in_word[piece.lo / 128].push_back(new HashMatchSource(*hash_p)); + } else { + BUG(); + } + lo = 0; + ++mw; + } + bit += piece.size(); + } + } + for (unsigned i = 0; i < fmt_width; i++) { + std::string match_word_info = "[ "; + std::string sep = ""; + for (auto entry : match_in_word[i]) { + match_word_info += sep + entry->toString(); + sep = ", "; + } + LOG1(" match in word " << i << ": " << match_word_info); + } +} + +bool SRamMatchTable::parse_ram(const value_t &v, std::vector &res) { + if (!CHECKTYPE(v, tVEC)) return true; // supress added message + for (auto &el : v.vec) // all elements must be positive integers + if (el.type != tINT || el.i < 0) return false; + switch (v.vec.size) { + case 1: // lamb unit + if (v[0].i < Target::SRAM_LAMBS_PER_STAGE()) { + res.emplace_back(v[0].i); + return true; + } + break; + case 2: // row, col + if (Target::SRAM_GLOBAL_ACCESS()) break; // stage required + if (v[0].i < Target::SRAM_ROWS(gress) && v[1].i < Target::SRAM_UNITS_PER_ROW()) { + res.emplace_back(v[0].i, v[1].i); + return true; + } + break; + case 3: // stage, row, col + if (Target::SRAM_GLOBAL_ACCESS() && v[0].i < Target::NUM_STAGES(gress) && + v[1].i < Target::SRAM_ROWS(gress) && v[2].i < Target::SRAM_UNITS_PER_ROW()) { + res.emplace_back(v[0].i, v[1].i, v[2].i); + return true; + } + break; + default: + break; + } + return false; +} + +bool SRamMatchTable::parse_way(const value_t &v) { + Way way = {}; + way.lineno = v.lineno; + if (!CHECKTYPE2(v, tVEC, tMAP)) return true; // supress added message + if (v.type == tVEC) { + // DEPRECATED -- old style "raw" way for tofino1/2 + if (v.vec.size < 3 || v[0].type != tINT || v[1].type != tINT || v[2].type != tINT || + v[0].i < 0 || v[1].i < 0 || v[2].i < 0 || v[0].i >= Target::EXACT_HASH_GROUPS() || + v[1].i >= EXACT_HASH_ADR_GROUPS || v[2].i >= (1 << EXACT_HASH_SELECT_BITS)) { + return false; + } + way.group_xme = v[0].i; + way.index = v[1].i * EXACT_HASH_ADR_BITS; + way.select = bitvec(v[2].i) << EXACT_HASH_FIRST_SELECT_BIT; + for (int i = 3; i < v.vec.size; i++) { + if (!CHECKTYPE(v[i], tVEC)) return true; // supress added message + if (!parse_ram(v[i], way.rams)) error(v[i].lineno, "invalid ram in way"); + } + } else { + int index_size = 0; + for (auto &kv : MapIterChecked(v.map)) { + if ((kv.key == "group" || kv.key == "xme") && CHECKTYPE(kv.value, tINT)) { + if ((way.group_xme = kv.value.i) >= Target::IXBAR_HASH_GROUPS()) + error(kv.value.lineno, "%s %ld out of range", kv.key.s, kv.value.i); + } else if (kv.key == "index") { + if (!CHECKTYPE2(kv.value, tINT, tRANGE)) continue; + if (kv.value.type == tINT) { + way.index = kv.value.i; + } else { + way.index = kv.value.range.lo; + way.index_hi = kv.value.range.hi; + index_size = kv.value.range.hi - kv.value.range.lo + 1; + } + if (way.index > Target::IXBAR_HASH_INDEX_MAX() || + way.index % Target::IXBAR_HASH_INDEX_STRIDE() != 0) + error(kv.value.lineno, "invalid way index %d", way.index); + } else if (kv.key == "select") { + if (kv.value.type == tCMD && kv.value == "&") { + if (CHECKTYPE2(kv.value[1], tINT, tRANGE) && CHECKTYPE(kv.value[2], tINT)) { + way.select = bitvec(kv.value[2].i); + if (kv.value[1].type == tINT) { + way.select <<= kv.value[1].i; + } else { + way.select <<= kv.value[1].range.lo; + if (kv.value[1].range.hi < way.select.max().index()) + error(kv.value.lineno, "invalid select mask for range"); + } + } + } else if (kv.value.type == tRANGE) { + way.select.setrange(kv.value.range.lo, + kv.value.range.hi - kv.value.range.lo + 1); + } else { + error(kv.value.lineno, "invalid select %s", value_desc(&kv.value)); + } + } else if (kv.key == "rams" && CHECKTYPE(kv.value, tVEC)) { + for (auto &ram : kv.value.vec) { + if (!CHECKTYPE(ram, tVEC)) break; + if (!parse_ram(ram, way.rams)) error(ram.lineno, "invalid ram in way"); + } + } + } + if (index_size) { + // FIXME -- currently this code is assuming the index bits cover just the ram index + // bits and the subword bits, and not any select bits. Perhaps that is wrong an it + // should include the select bits. + if (way.rams.empty()) { + error(v.lineno, "no rams in way"); + } else { + way.subword_bits = index_size - (way.isLamb() ? LAMB_DEPTH_BITS : SRAM_DEPTH_BITS); + if (way.subword_bits < 0) error(v.lineno, "index range too small for way rams"); + } + } + } + ways.push_back(way); + return true; +} + +void SRamMatchTable::common_sram_setup(pair_t &kv, const VECTOR(pair_t) & data) { + if (kv.key == "ways") { + if (!CHECKTYPE(kv.value, tVEC)) return; + for (auto &w : kv.value.vec) + if (!parse_way(w)) error(w.lineno, "invalid way descriptor"); + } else if (kv.key == "match") { + if (kv.value.type == tVEC) { + for (auto &v : kv.value.vec) { + if (v == "hash_group") + match.emplace_back(new HashMatchSource(v)); + else + match.emplace_back(new Phv::Ref(gress, stage->stageno, v)); + } + } else { + if (kv.value == "hash_group") + match.emplace_back(new HashMatchSource(kv.value)); + else + match.emplace_back(new Phv::Ref(gress, stage->stageno, kv.value)); + } + } else if (kv.key == "match_group_map") { + mgm_lineno = kv.value.lineno; + if (CHECKTYPE(kv.value, tVEC)) { + word_info.resize(kv.value.vec.size); + for (int i = 0; i < kv.value.vec.size; i++) + if (CHECKTYPE(kv.value[i], tVEC)) { + if (kv.value[i].vec.size > 5) + error(kv.value[i].lineno, "Too many groups for word %d", i); + for (auto &v : kv.value[i].vec) + if (CHECKTYPE(v, tINT)) word_info[i].push_back(v.i); + } + } + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), name()); + } +} + +void SRamMatchTable::common_sram_checks() { + if (Target::SRAM_GLOBAL_ACCESS()) + alloc_global_srams(); + else + alloc_rams(false, stage->sram_use, &stage->sram_search_bus_use); + if (layout_size() > 0 && !format) error(lineno, "No format specified in table %s", name()); + if (!action.set() && !actions) + error(lineno, "Table %s has neither action table nor immediate actions", name()); + if (actions && !action_bus) action_bus = ActionBus::create(); + if (input_xbar.empty()) input_xbar.emplace_back(InputXbar::create(this)); +} + +void SRamMatchTable::alloc_global_busses() { BUG(); } + +void SRamMatchTable::pass1() { + LOG1("### SRam match table " << name() << " pass1 " << loc()); + if (format) { + verify_format(); + setup_ways(); + determine_word_and_result_bus(); + } + if (Target::SRAM_GLOBAL_ACCESS()) + alloc_global_busses(); + else + alloc_busses(stage->sram_search_bus_use, Layout::SEARCH_BUS); + MatchTable::pass1(); + if (action_enable >= 0) + if (action.args.size() < 1 || action.args[0].size() <= (unsigned)action_enable) + error(lineno, "Action enable bit %d out of range for action selector", action_enable); + if (gateway) { + if (!gateway->layout.empty()) { + for (auto &row : layout) { + if (row.row == gateway->layout[0].row && row.bus == gateway->layout[0].bus && + !row.memunits.empty()) { + unsigned gw_use = gateway->input_use() & 0xff; + auto &way = way_map.at(row.memunits[0]); + for (auto &grp : group_info) { + if (gw_use & grp.tofino_mask[way.word]) { + error(gateway->lineno, + "match bus conflict between match and gateway" + " on table %s", + name()); + break; + } + } + break; + } + } + } + } +} + +void SRamMatchTable::setup_hash_function_ids() { + unsigned hash_fn_id = 0; + for (auto &w : ways) { + if (hash_fn_ids.count(w.group_xme) == 0) hash_fn_ids[w.group_xme] = hash_fn_id++; + } +} + +void SRamMatchTable::setup_ways() { + unsigned fmt_width = (format->size + 127) / 128; + if (ways.empty()) { + error(lineno, "No ways defined in table %s", name()); + } else if (ways[0].rams.empty()) { + for (auto &w : ways) + if (!w.rams.empty()) { + error(w.lineno, "Must specify rams for all ways in tabls %s, or none", name()); + return; + } + if (layout.size() % fmt_width != 0) { + error(lineno, "Rows is not a multiple of width in table %s", name()); + return; + } + for (unsigned i = 0; i < layout.size(); ++i) { + unsigned first = (i / fmt_width) * fmt_width; + if (layout[i].memunits.size() != layout[first].memunits.size()) + error(layout[i].lineno, "Row size mismatch within wide table %s", name()); + } + if (error_count > 0) return; + unsigned ridx = 0, cidx = 0; + for (auto &way : ways) { + if (ridx >= layout.size()) { + error(way.lineno, "Not enough rams for ways in table %s", name()); + break; + } + unsigned size = 1U << way.select.popcount(); + for (unsigned i = 0; i < size; i++) { + for (unsigned word = 0; word < fmt_width; ++word) { + BUG_CHECK(ridx + word < layout.size()); + auto &row = layout[ridx + word]; + BUG_CHECK(cidx < row.memunits.size()); + way.rams.push_back(row.memunits[cidx]); + } + if (++cidx == layout[ridx].memunits.size()) { + ridx += fmt_width; + cidx = 0; + } + } + } + if (ridx < layout.size()) + error(ways[0].lineno, "Too many rams for ways in table %s", name()); + } else { + std::set rams; + for (auto &row : layout) { + for (auto &unit : row.memunits) { + BUG_CHECK(!rams.count(unit), "%s duplicate in table", unit.desc()); + rams.insert(unit); + } + } + int way = -1; + for (auto &w : ways) { + ++way; + int index = -1; + if (table_type() != ATCAM) { + if ((w.rams.size() != (1U << w.select.popcount()) * fmt_width)) + error(w.lineno, "Depth of way doesn't match number of rams in table %s", + name()); + } else { + // Allowed to not fully match, as the partition index can be set from the + // control plane + if (!((w.rams.size() <= (1U << w.select.popcount()) * fmt_width) && + (w.rams.size() % fmt_width) == 0)) + error(w.lineno, "RAMs in ATCAM is not a legal multiple of the format width %s", + name()); + } + for (auto &ram : w.rams) { + ++index; + if (way_map.count(ram)) { + if (way == way_map.at(ram).way) + error(w.lineno, "%s used twice in way %d of table %s", ram.desc(), way, + name()); + else + error(w.lineno, "%s used ways %d and %d of table %s", ram.desc(), way, + way_map.at(ram).way, name()); + continue; + } + way_map[ram].way = way; + if (!ram.isLamb() && !rams.count(ram)) + error(w.lineno, "%s in way %d not part of table %s", ram.desc(), way, name()); + rams.erase(ram); + } + } + for (const auto &unit : rams) { + error(lineno, "%s not in any way of table %s", unit.desc(), name()); + } + } + if (error_count > 0) return; + int way = 0; + for (auto &w : ways) { + MaskCounter bank(w.select.getrange(EXACT_HASH_FIRST_SELECT_BIT, 32)); + unsigned index = 0, word = 0; + int col = -1; + for (auto &ram : w.rams) { + auto &wm = way_map[ram]; + wm.way = way; + wm.index = index; + wm.word = fmt_width - word - 1; + wm.bank = bank; + if (word && col != ram.col) + error(w.lineno, "Wide exact match split across columns %d and %d", col, ram.col); + col = ram.col; + ++index; + if (++word == fmt_width) { + word = 0; + bank++; + } + } + ++way; + } + setup_hash_function_ids(); +} + +/** + * Either fills out the word/result bus information each row, if it is not provided directly by + * the compiler, or verifies that the word/result_bus information matches directly with + * what has been calculated through the way information provided. + */ +void SRamMatchTable::determine_word_and_result_bus() { + for (auto &row : layout) { + int word = -1; + bool word_set = false; + for (auto &ram : row.memunits) { + auto &way = way_map.at(ram); + if (word_set) { + BUG_CHECK(word == way.word); + } else { + word = way.word; + word_set = true; + } + } + if (row.word_initialized()) { + if (word != row.word) + error(lineno, "Word on row %d bus %d does not align with word in RAM", row.row, + row.bus.at(Layout::SEARCH_BUS)); + } else { + row.word = word; + } + } + + for (auto &row : layout) { + bool result_bus_needed = false; + if (row.word < 0) { + // row with no rams -- assume it needs a result bus for the payload + result_bus_needed = true; + } else { + for (auto group_in_word : word_info.at(row.word)) { + if (group_info[group_in_word].result_bus_word == row.word) result_bus_needed = true; + } + } + if (!row.bus.count(Layout::RESULT_BUS) && result_bus_needed) + row.bus[Layout::RESULT_BUS] = row.bus.at(Layout::SEARCH_BUS); + if (row.bus.count(Layout::RESULT_BUS)) { + auto *old = stage->match_result_bus_use[row.row][row.bus.at(Layout::RESULT_BUS)]; + if (old && old != this) + error(row.lineno, + "inconsistent use of match result bus %d on row %d between " + "table %s and %s", + row.row, row.bus.at(Layout::RESULT_BUS), name(), old->name()); + stage->match_result_bus_use[row.row][row.bus.at(Layout::RESULT_BUS)] = this; + } + } +} + +int SRamMatchTable::determine_pre_byteswizzle_loc(MatchSource *ms, int lo, int hi, int word) { + auto phv_p = dynamic_cast(ms); + BUG_CHECK(phv_p); + auto phv_ref = *phv_p; + Phv::Slice sl(*phv_ref, lo, hi); + BUG_CHECK(word_ixbar_group[word] >= 0); + return find_on_ixbar(sl, word_ixbar_group[word]); +} + +template +void SRamMatchTable::write_attached_merge_regs(REGS ®s, int bus, int word, int word_group) { + int group = word_info[word][word_group]; + auto &merge = regs.rams.match.merge; + for (auto &st : attached.stats) { + if (group_info[group].result_bus_word == static_cast(word)) { + merge.mau_stats_adr_exact_shiftcount[bus][word_group] = + st->to()->determine_shiftcount(st, group, word, 0); + } else if (options.match_compiler) { + /* unused, so should not be set... */ + merge.mau_stats_adr_exact_shiftcount[bus][word_group] = 7; + } + break; /* all must be the same, only config once */ + } + for (auto &m : attached.meters) { + if (group_info[group].overhead_word == static_cast(word) || + group_info[group].overhead_word == -1) { + m->to()->setup_exact_shift(regs, bus, group, word, word_group, m, + attached.meter_color); + } else if (options.match_compiler) { + /* unused, so should not be set... */ + merge.mau_meter_adr_exact_shiftcount[bus][word_group] = 16; + } + break; /* all must be the same, only config once */ + } + for (auto &s : attached.statefuls) { + if (group_info[group].overhead_word == static_cast(word) || + group_info[group].overhead_word == -1) { + merge.mau_meter_adr_exact_shiftcount[bus][word_group] = + s->to()->determine_shiftcount(s, group, word, 0); + } else if (options.match_compiler) { + /* unused, so should not be set... */ + merge.mau_meter_adr_exact_shiftcount[bus][word_group] = 16; + } + break; /* all must be the same, only config once */ + } +} + +template +void SRamMatchTable::write_regs_vt(REGS ®s) { + LOG1("### SRam match table " << name() << " write_regs " << loc()); + MatchTable::write_regs(regs, 0, this); + auto &merge = regs.rams.match.merge; + unsigned fmt_width = format ? (format->size + 127) / 128 : 0; + bitvec match_mask; + match_mask.setrange(0, 128 * fmt_width); + version_nibble_mask.setrange(0, 32 * fmt_width); + for (unsigned i = 0; format && i < format->groups(); i++) { + if (Format::Field *match = format->field("match", i)) { + for (auto &piece : match->bits) match_mask.clrrange(piece.lo, piece.hi + 1 - piece.lo); + } + if (Format::Field *version = format->field("version", i)) { + match_mask.clrrange(version->bit(0), version->size); + version_nibble_mask.clrrange(version->bit(0) / 4, 1); + } + } + Format::Field *next = format ? format->field("next") : nullptr; + if (format && !next && hit_next.size() > 1) next = format->field("action"); + + /* iterating through rows in the sram array; while in this loop, 'row' is the + * row we're on, 'word' is which word in a wide full-way the row is for, and 'way' + * is which full-way of the match table the row is for. For compatibility with the + * compiler, we iterate over rows and ways in order, and words from msb to lsb (reversed) */ + int index = -1; + for (auto &row : layout) { + index++; /* index of the row in the layout */ + int search_bus = ::get(row.bus, Layout::SEARCH_BUS, -1); + /* setup match logic in rams */ + auto &rams_row = regs.rams.array.row[row.row]; + auto &vh_adr_xbar = rams_row.vh_adr_xbar; + bool first = true; + int hash_group = -1; + unsigned word = ~0; + auto vpn_iter = row.vpns.begin(); + for (auto &memunit : row.memunits) { + int col = memunit.col; + BUG_CHECK(memunit.stage == INT_MIN && memunit.row == row.row, "bogus %s in row %d", + memunit.desc(), row.row); + auto &way = way_map.at(memunit); + if (first) { + hash_group = ways[way.way].group_xme; + word = way.word; + setup_muxctl(vh_adr_xbar.exactmatch_row_hashadr_xbar_ctl[search_bus], hash_group); + first = false; + } else if (hash_group != ways[way.way].group_xme || int(word) != way.word) { + auto first_way = way_map.at(row.memunits[0]); + error(ways[way.way].lineno, + "table %s ways #%d and #%d use the same row bus " + "(%d.%d) but different %s", + name(), first_way.way, way.way, row.row, search_bus, + int(word) == way.word ? "hash groups" : "word order"); + hash_group = ways[way.way].group_xme; + word = way.word; + } + setup_muxctl(vh_adr_xbar.exactmatch_mem_hashadr_xbar_ctl[col], + ways[way.way].index / EXACT_HASH_ADR_BITS + search_bus * 5); + if (options.match_compiler || ways[way.way].select) { + // Glass always sets this. When mask == 0, bank will also be 0, and the + // comparison will always match, so the bus need not be read (inp_sel). + // CSR suggests it should NOT be set if not needed to save power. + auto &bank_enable = vh_adr_xbar.exactmatch_bank_enable[col]; + bank_enable.exactmatch_bank_enable_bank_mask = + ways[way.way].select.getrange(EXACT_HASH_FIRST_SELECT_BIT, 32); + bank_enable.exactmatch_bank_enable_bank_id = way.bank; + bank_enable.exactmatch_bank_enable_inp_sel |= 1 << search_bus; + } + auto &ram = rams_row.ram[col]; + for (unsigned i = 0; i < 4; i++) + ram.match_mask[i] = match_mask.getrange(way.word * 128U + i * 32, 32); + + if (next) { + for (int group : word_info[way.word]) { + if (group_info[group].result_bus_word != way.word) continue; + int pos = (next->by_group[group]->bit(0) % 128) - 1; + auto &n = ram.match_next_table_bitpos; + switch (group_info[group].result_bus_word_group()) { + case 0: + break; + case 1: + n.match_next_table1_bitpos = pos; + break; + case 2: + n.match_next_table2_bitpos = pos; + break; + case 3: + n.match_next_table3_bitpos = pos; + break; + case 4: + n.match_next_table4_bitpos = pos; + break; + default: + BUG(); + } + } + } + + ram.unit_ram_ctl.match_ram_logical_table = logical_id; + ram.unit_ram_ctl.match_ram_write_data_mux_select = 7; /* unused */ + ram.unit_ram_ctl.match_ram_read_data_mux_select = 7; /* unused */ + ram.unit_ram_ctl.match_ram_matchdata_bus1_sel = search_bus; + if (row.bus.count(Layout::RESULT_BUS)) + ram.unit_ram_ctl.match_result_bus_select = 1 << row.bus.at(Layout::RESULT_BUS); + if (auto cnt = word_info[way.word].size()) + ram.unit_ram_ctl.match_entry_enable = ~(~0U << cnt); + auto &unitram_config = + regs.rams.map_alu.row[row.row].adrmux.unitram_config[col / 6][col % 6]; + unitram_config.unitram_type = 1; + unitram_config.unitram_logical_table = logical_id; + switch (gress) { + case INGRESS: + case GHOST: + unitram_config.unitram_ingress = 1; + break; + case EGRESS: + unitram_config.unitram_egress = 1; + break; + default: + BUG(); + } + unitram_config.unitram_enable = 1; + + int vpn = *vpn_iter++; + std::vector vpn01; + auto groups_in_word = word_info[way.word]; + // Action format is made up of multiple groups (groups_in_format) which can be spread + // across multiple words. The match_group_map specifies which groups are within each + // word. For an N pack across M words if N > M, we have one or more words with multiple + // groups. + // Below code assigns VPN for each group(groups_in_word) within a word which are indexed + // separately from groups_in_format. + // E.g. + // format: { + // action(0): 0..0, immediate(0): 2..9, version(0): 112..115, match(0): 18..71, + // action(1): 1..1, immediate(1): 10..17, version(1): 116..119, + // match(1): [ 194..199, 72..111, 120..127 ], + // action(2): 128..128, immediate(2): 129..136, version(2): 240..243, + // match(2): 138..191, + // action(3): 256..256, immediate(3): 257..264, version(3): 368..371, + // match(3): 266..319, + // action(4): 384..384, immediate(4): 385..392, version(4): 496..499, + // match(4): 394..447 } + // match_group_map: [ [ 1, 0 ], [ 1, 2], [ 3 ], [ 4 ] ] + // ^ ^ + // } + // In the above example the "format" specifies the 5 groups packed across 4 RAMs.These + // are the groups_in_format + // The "match_group_map" specifies the groups within each word. + // Group 1 is spread across word 0 & word 1. + // Within word 0 - group 1 is group_in_word 0 and group 0 is group_in_word 1 + // Within word 1 - group 1 is group_in_word 0 and group 2 is group_in_word 1 + // This distinction is used while specifying the config register in setting the subfield + // on match_ram_vpn_lsbs. + for (auto group_in_word = 0; group_in_word < groups_in_word.size(); group_in_word++) { + auto group_in_format = groups_in_word[group_in_word]; + int overhead_word = group_info[group_in_format].overhead_word; + int group_vpn = vpn + group_info[group_in_format].vpn_offset; + bool ok = false; + for (unsigned i = 0; i < vpn01.size(); ++i) { + if (vpn01[i] == group_vpn >> 2) { + ok = true; + group_vpn = (group_vpn & 3) + (i << 2); + break; + } + } + if (!ok) { + if (vpn01.size() >= 2) { + error(mgm_lineno > 0 ? mgm_lineno : lineno, + "Too many diverse vpns in table layout for %s", name()); + break; + } + vpn01.push_back(group_vpn >> 2); + group_vpn &= 3; + if (vpn01.size() == 1) { + ram.match_ram_vpn.match_ram_vpn0 = vpn01.back(); + } else { + ram.match_ram_vpn.match_ram_vpn1 = vpn01.back(); + group_vpn |= 4; + } + } + ram.match_ram_vpn.match_ram_vpn_lsbs.set_subfield(group_vpn, group_in_word * 3, 3); + } + + int word_group = 0; + for (int group : word_info[way.word]) { + unsigned mask = group_info[group].tofino_mask[way.word]; + ram.match_bytemask[word_group].mask_bytes_0_to_13 = ~mask & 0x3fff; + ram.match_bytemask[word_group].mask_nibbles_28_to_31 = ~(mask >> 14) & 0xf; + word_group++; + } + for (; word_group < 5; word_group++) { + ram.match_bytemask[word_group].mask_bytes_0_to_13 = 0x3fff; + ram.match_bytemask[word_group].mask_nibbles_28_to_31 = 0xf; + } + if (gress == EGRESS) + regs.cfg_regs.mau_cfg_uram_thread[col / 4U] |= 1U << (col % 4U * 8U + row.row); + rams_row.emm_ecc_error_uram_ctl[timing_thread(gress)] |= 1U << (col - 2); + } + /* setup input xbars to get data to the right places on the bus(es) */ + bool using_match = false; + // Loop for determining the config to indicate which bytes from the search bus + // are compared to the bytes on the RAM line + if (!row.memunits.empty()) { + auto &byteswizzle_ctl = rams_row.exactmatch_row_vh_xbar_byteswizzle_ctl[search_bus]; + for (unsigned i = 0; format && i < format->groups(); i++) { + if (Format::Field *match = format->field("match", i)) { + unsigned bit = 0; + for (auto &piece : match->bits) { + if (piece.lo / 128U != word) { + bit += piece.size(); + continue; + } + using_match = true; + for (unsigned fmt_bit = piece.lo; fmt_bit <= piece.hi;) { + unsigned byte = (fmt_bit % 128) / 8; + unsigned bits_in_byte = (byte + 1) * 8 - (fmt_bit % 128); + if (fmt_bit + bits_in_byte > piece.hi + 1) + bits_in_byte = piece.hi + 1 - fmt_bit; + auto it = --match_by_bit.upper_bound(bit); + int lo = bit - it->first; + int hi = lo + bits_in_byte - 1; + int bus_loc = determine_pre_byteswizzle_loc(it->second, lo, hi, word); + BUG_CHECK(bus_loc >= 0 && bus_loc < 16); + for (unsigned b = 0; b < bits_in_byte; b++, fmt_bit++) + byteswizzle_ctl[byte][fmt_bit % 8U] = 0x10 + bus_loc; + bit += bits_in_byte; + } + } + BUG_CHECK(bit == match->size); + } + if (Format::Field *version = format->field("version", i)) { + if (version->bit(0) / 128U != word) continue; + ///> if no match, but a version/valid is, the vh_xbar needs to be + ///> enabled. This was preventing anything from running + using_match = true; + for (unsigned j = 0; j < version->size; ++j) { + unsigned bit = version->bit(j); + unsigned byte = (bit % 128) / 8; + byteswizzle_ctl[byte][bit % 8U] = 8; + } + } + } + if (using_match) { + auto &vh_xbar_ctl = rams_row.vh_xbar[search_bus].exactmatch_row_vh_xbar_ctl; + if (word_ixbar_group[word] >= 0) { + setup_muxctl(vh_xbar_ctl, word_ixbar_group[word]); + } else { + // Need the bus for version/valid, but don't care what other data is on it. So + // just set the enable without actually selecting an input -- if another table + // is sharing the bus, it will set it, otherwise we'll get ixbar group 0 + vh_xbar_ctl.exactmatch_row_vh_xbar_enable = 1; + } + vh_xbar_ctl.exactmatch_row_vh_xbar_thread = timing_thread(gress); + } + } + /* setup match central config to extract results of the match */ + ssize_t r_bus = -1; + if (row.bus.count(Layout::RESULT_BUS)) r_bus = row.row * 2 + row.bus.at(Layout::RESULT_BUS); + // If the result bus is not to be used, then the registers are not necessary to set up + // for shift/mask/default etc. + /* FIXME -- factor this where possible with ternary match code */ + if (action) { + if (auto adt = action->to()) { + if (r_bus >= 0) { + /* FIXME -- support for multiple sizes of action data? */ + merge.mau_actiondata_adr_mask[0][r_bus] = adt->determine_mask(action); + merge.mau_actiondata_adr_vpn_shiftcount[0][r_bus] = + adt->determine_vpn_shiftcount(action); + } + } + } + + if (format && word < word_info.size()) { + for (unsigned word_group = 0; word_group < word_info[word].size(); word_group++) { + int group = word_info[word][word_group]; + if (group_info[group].result_bus_word == static_cast(word)) { + BUG_CHECK(r_bus >= 0); + if (format->immed) { + BUG_CHECK(format->immed->by_group[group]->bit(0) / 128U == word); + merge.mau_immediate_data_exact_shiftcount[r_bus][word_group] = + format->immed->by_group[group]->bit(0) % 128; + } + if (instruction) { + int shiftcount = 0; + if (auto field = instruction.args[0].field()) { + assert(field->by_group[group]->bit(0) / 128U == word); + shiftcount = field->by_group[group]->bit(0) % 128U; + } else if (auto field = instruction.args[1].field()) { + assert(field->by_group[group]->bit(0) / 128U == word); + shiftcount = field->by_group[group]->bit(0) % 128U; + } + merge.mau_action_instruction_adr_exact_shiftcount[r_bus][word_group] = + shiftcount; + } + } + /* FIXME -- factor this where possible with ternary match code */ + if (action) { + if (group_info[group].result_bus_word == static_cast(word)) { + BUG_CHECK(r_bus >= 0); + merge.mau_actiondata_adr_exact_shiftcount[r_bus][word_group] = + action->determine_shiftcount(action, group, word, 0); + } + } + if (attached.selector) { + if (group_info[group].result_bus_word == static_cast(word)) { + BUG_CHECK(r_bus >= 0); + auto sel = get_selector(); + merge.mau_meter_adr_exact_shiftcount[r_bus][word_group] = + sel->determine_shiftcount(attached.selector, group, word, 0); + merge.mau_selectorlength_shiftcount[0][r_bus] = + sel->determine_length_shiftcount(attached.selector_length, group, word); + merge.mau_selectorlength_mask[0][r_bus] = + sel->determine_length_mask(attached.selector_length); + merge.mau_selectorlength_default[0][r_bus] = + sel->determine_length_default(attached.selector_length); + } + } + if (idletime) { + if (group_info[group].result_bus_word == static_cast(word)) { + BUG_CHECK(r_bus >= 0); + merge.mau_idletime_adr_exact_shiftcount[r_bus][word_group] = + idletime->direct_shiftcount(); + } + } + if (r_bus >= 0) write_attached_merge_regs(regs, r_bus, word, word_group); + } + } else if (format) { + // If we have a result bus without any attached memories, program + // the registers on this row because a subset of the registers have been + // programmed elsewhere and it can break things if we have a partial configuration. + // FIXME: avoid programming any registers if we don't actually use the result bus. + if (r_bus >= 0) write_attached_merge_regs(regs, r_bus, 0, 0); + } + for (auto &ram : row.memunits) { + int word_group = 0; + auto &merge_col = merge.col[ram.col]; + for (int group : word_info[word]) { + int result_bus_word = group_info[group].result_bus_word; + if (int(word) == result_bus_word) { + BUG_CHECK(r_bus >= 0); + merge_col.row_action_nxtable_bus_drive[row.row] |= 1 << (r_bus % 2); + } + if (word_group < 2) { + auto &way = way_map.at(ram); + int idx = way.index + word - result_bus_word; + int overhead_row = ways[way.way].rams[idx].row; + auto &hitmap_ixbar = merge_col.hitmap_output_map[2 * row.row + word_group]; + setup_muxctl(hitmap_ixbar, + overhead_row * 2 + group_info[group].result_bus_word_group()); + } + ++word_group; + } + // setup_muxctl(merge.col[ram.col].hitmap_output_map[bus], + // layout[index+word].row*2 + layout[index+word].bus); + } + // if (gress == EGRESS) + // merge.exact_match_delay_config.exact_match_bus_thread |= 1 << bus; + if (r_bus >= 0) { + merge.exact_match_phys_result_en[r_bus / 8U] |= 1U << (r_bus % 8U); + merge.exact_match_phys_result_thread[r_bus / 8U] |= timing_thread(gress) + << (r_bus % 8U); + if (stage->tcam_delay(gress)) + merge.exact_match_phys_result_delay[r_bus / 8U] |= 1U << (r_bus % 8U); + } + } + + merge.exact_match_logical_result_en |= 1 << logical_id; + if (stage->tcam_delay(gress) > 0) merge.exact_match_logical_result_delay |= 1 << logical_id; + if (actions) actions->write_regs(regs, this); + if (gateway) gateway->write_regs(regs); + if (idletime) idletime->write_regs(regs); + for (auto &hd : hash_dist) hd.write_regs(regs, this); +} +FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void SRamMatchTable::write_regs, (mau_regs & regs), + { write_regs_vt(regs); }) + +std::string SRamMatchTable::get_match_mode(const Phv::Ref &pref, int offset) const { + return "unused"; +} + +void SRamMatchTable::add_field_to_pack_format(json::vector &field_list, unsigned basebit, + std::string name, const Table::Format::Field &field, + const Table::Actions::Action *act) const { + if (name != "match") { + // FIXME -- tofino always pads out the wordsize so basebit is always 0. + basebit = 0; + Table::add_field_to_pack_format(field_list, basebit, name, field, act); + return; + } + LOG3("Adding fields for " << name << " - " << field << " to pack format for SRAM table " + << this->name() << " in action : " << act); + unsigned bit = 0; + for (auto &piece : field.bits) { + auto mw = --match_by_bit.upper_bound(bit); + int lo = bit - mw->first; + int lsb_mem_word_idx = piece.lo / MEM_WORD_WIDTH; + int msb_mem_word_idx = piece.hi / MEM_WORD_WIDTH; + int offset = piece.lo % MEM_WORD_WIDTH; + while (mw != match_by_bit.end() && mw->first < bit + piece.size()) { + std::string source = ""; + std::string immediate_name = ""; + std::string mw_name = mw->second->name(); + int start_bit = 0; + + get_cjson_source(mw_name, source, start_bit); + if (source == "") + error(lineno, "Cannot determine proper source for field %s", name.c_str()); + std::string field_name, global_name = ""; + std::string match_mode; + if (auto phv_p = dynamic_cast(mw->second)) { + field_name = mw->second->name(); + // If the name has a slice in it, remove it and add the lo bit of + // the slice to field_bit. This takes the place of + // canon_field_list(), rather than extracting the slice component + // of the field name, if present, and appending it to the key name. + int slice_offset = remove_name_tail_range(field_name); + start_bit = lo + slice_offset + mw->second->fieldlobit(); + global_name = field_name; + auto p = find_p4_param(field_name, "", start_bit); + if (!p && !p4_params_list.empty()) { + warning(lineno, + "Cannot find field name %s in p4_param_order " + "for table %s", + field_name.c_str(), this->name()); + } else if (p && !p->key_name.empty()) { + field_name = p->key_name; + } + match_mode = get_match_mode(*phv_p, mw->first); + } else if (dynamic_cast(mw->second)) { + field_name = "--proxy_hash--"; + match_mode = "unused"; + start_bit = mw->second->fieldlobit(); + } else { + BUG(); + } + + field_list.push_back(json::map{{"field_name", json::string(field_name)}, + {"global_name", json::string(global_name)}, + {"source", json::string(source)}, + {"lsb_mem_word_offset", json::number(offset)}, + {"start_bit", json::number(start_bit)}, + {"immediate_name", json::string(immediate_name)}, + {"lsb_mem_word_idx", json::number(lsb_mem_word_idx)}, + {"msb_mem_word_idx", json::number(msb_mem_word_idx)}, + // FIXME-JSON + {"match_mode", json::string(match_mode)}, + {"enable_pfe", json::False()}, // FIXME-JSON + {"field_width", json::number(mw->second->size())}}); + LOG5("Adding json field " << field_list.back()); + offset += mw->second->size(); + lo = 0; + ++mw; + } + bit += piece.size(); + } +} + +void SRamMatchTable::add_action_cfgs(json::map &tbl, json::map &stage_tbl) const { + if (actions) { + actions->gen_tbl_cfg(tbl["actions"]); + actions->add_action_format(this, stage_tbl); + } else if (action && action->actions) { + action->actions->gen_tbl_cfg(tbl["actions"]); + action->actions->add_action_format(this, stage_tbl); + } +} + +unsigned SRamMatchTable::get_format_width() const { + return format ? (format->size + 127) / 128 : 0; +} + +unsigned SRamMatchTable::get_number_entries() const { + unsigned fmt_width = get_format_width(); + unsigned number_entries = 0; + if (format) number_entries = layout_size() / fmt_width * format->groups() * entry_ram_depth(); + return number_entries; +} + +json::map *SRamMatchTable::add_common_sram_tbl_cfgs(json::map &tbl, std::string match_type, + std::string stage_table_type) const { + common_tbl_cfg(tbl); + json::map &match_attributes = tbl["match_attributes"]; + json::vector &stage_tables = match_attributes["stage_tables"]; + json::map *stage_tbl_ptr = + add_stage_tbl_cfg(match_attributes, stage_table_type.c_str(), get_number_entries()); + json::map &stage_tbl = *stage_tbl_ptr; + // This is a only a glass required field, as it is only required when no default action + // is specified, which is impossible for Brig through p4-16 + stage_tbl["default_next_table"] = Stage::end_of_pipe(); + match_attributes["match_type"] = match_type; + add_hash_functions(stage_tbl); + add_action_cfgs(tbl, stage_tbl); + add_result_physical_buses(stage_tbl); + MatchTable::gen_idletime_tbl_cfg(stage_tbl); + merge_context_json(tbl, stage_tbl); + add_all_reference_tables(tbl); + return stage_tbl_ptr; +} + +int SRamMatchTable::find_problematic_vpn_offset() const { + // Any single word of a match that contains 3 or more groups whose min and max vpn_offset + // differs by more than 5 is going to be a problem. We need to permute the offsets so that + // does not happen + if (group_info.size() <= 6) return -1; // can't differ by more than 5 + for (auto &word : word_info) { + if (word.size() <= 2) continue; // can't be a problem + int minvpn = -1, maxvpn = -1, avg = 0; + for (auto group : word) { + int vpn_offset = group_info[group].vpn_offset; + if (minvpn < 0) + minvpn = maxvpn = vpn_offset; + else if (minvpn > vpn_offset) + minvpn = vpn_offset; + else if (maxvpn < vpn_offset) + maxvpn = vpn_offset; + avg += vpn_offset; + } + if (maxvpn - minvpn > 5) { + if (minvpn + maxvpn > (2 * avg) / word.size()) + minvpn = maxvpn; // look for the max to move, instead of min + for (auto group : word) { + if (group_info[group].vpn_offset == minvpn) return group; + } + BUG("failed to find the group vpn we just saw"); + } + } + return -1; // no problem found +} + +void SRamMatchTable::alloc_vpns() { + if (error_count > 0 || no_vpns || layout_size() == 0 || layout[0].vpns.size() > 0) return; + int period, width, depth; + const char *period_name; + vpn_params(width, depth, period, period_name); + std::map vpn_for; + for (auto &row : layout) { + row.vpns.resize(row.memunits.size()); + int i = 0; + for (auto &ram : row.memunits) vpn_for[ram] = &row.vpns[i++]; + } + int vpn = 0, word = 0; + for (auto &way : ways) { + for (auto unit : way.rams) { + *vpn_for[unit] = vpn; + if (++word == width) { + word = 0; + vpn += period; + } + } + } + + int fix = find_problematic_vpn_offset(); + if (fix >= 0) { + // Swap it with the middle one. That should fix all the cases we've seen + int middle = group_info.size() / 2; + BUG_CHECK(middle != fix, "vpn_offset fix doesn't work"); + std::swap(group_info[fix].vpn_offset, group_info[middle].vpn_offset); + BUG_CHECK(find_problematic_vpn_offset() < 0, "vpn_offset fix did not work"); + } +} diff --git a/backends/tofino/bf-asm/stage.cpp b/backends/tofino/bf-asm/stage.cpp new file mode 100644 index 00000000000..2e80360f1b7 --- /dev/null +++ b/backends/tofino/bf-asm/stage.cpp @@ -0,0 +1,857 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/stage.h" + +#include + +#include + +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/target.h" +#include "deparser.h" +#include "input_xbar.h" +#include "lib/range.h" +#include "misc.h" +#include "parser.h" +#include "phv.h" +#include "sections.h" +#include "top_level.h" + +extern std::string asmfile_name; + +unsigned char Stage::action_bus_slot_map[ACTION_DATA_BUS_BYTES]; +unsigned char Stage::action_bus_slot_size[ACTION_DATA_BUS_SLOTS]; + +AsmStage AsmStage::singleton_object; + +#include "jbay/stage.cpp" // NOLINT(build/include) +#include "tofino/stage.cpp" // NOLINT(build/include) + +AsmStage::AsmStage() : Section("stage") { + int slot = 0, byte = 0; + for (int i = 0; i < ACTION_DATA_8B_SLOTS; i++) { + Stage::action_bus_slot_map[byte++] = slot; + Stage::action_bus_slot_size[slot++] = 8; + } + for (int i = 0; i < ACTION_DATA_16B_SLOTS; i++) { + Stage::action_bus_slot_map[byte++] = slot; + Stage::action_bus_slot_map[byte++] = slot; + Stage::action_bus_slot_size[slot++] = 16; + } + for (int i = 0; i < ACTION_DATA_32B_SLOTS; i++) { + Stage::action_bus_slot_map[byte++] = slot; + Stage::action_bus_slot_map[byte++] = slot; + Stage::action_bus_slot_map[byte++] = slot; + Stage::action_bus_slot_map[byte++] = slot; + Stage::action_bus_slot_size[slot++] = 32; + } + BUG_CHECK(byte == ACTION_DATA_BUS_BYTES); + BUG_CHECK(slot == ACTION_DATA_BUS_SLOTS); +} + +void AsmStage::start(int lineno, VECTOR(value_t) args) { + while (int(pipe.size()) < Target::NUM_MAU_STAGES()) pipe.emplace_back(pipe.size(), false); + if (args.size != 2 || args[0].type != tINT || + (args[1] != "ingress" && args[1] != "egress" && + (args[1] != "ghost" || options.target < JBAY))) { + error(lineno, "stage must specify number and ingress%s or egress", + options.target >= JBAY ? ", ghost" : ""); + } else if (args[0].i < 0) { + error(lineno, "invalid stage number"); + } else if ((unsigned)args[0].i >= pipe.size()) { + while ((unsigned)args[0].i >= pipe.size()) pipe.emplace_back(pipe.size(), false); + } +} + +void AsmStage::input(VECTOR(value_t) args, value_t data) { + if (!CHECKTYPE(data, tMAP)) return; + int stageno = args[0].i; + gress_t gress = + args[1] == "ingress" ? INGRESS + : args[1] == "egress" ? EGRESS + : args[1] == "ghost" && options.target >= JBAY + ? GHOST + : (error(args[1].lineno, "Invalid thread %s", value_desc(args[1])), INGRESS); + auto &stage = stages(gress); + BUG_CHECK(stageno >= 0 && (unsigned)stageno < stage.size()); + if (stages_seen[gress][stageno]) + error(args[0].lineno, "Duplicate stage %d %s", stageno, to_string(gress).c_str()); + stages_seen[gress][stageno] = 1; + for (auto &kv : MapIterChecked(data.map, true)) { + if (kv.key == "dependency") { + if (stageno == 0) warning(kv.key.lineno, "Stage dependency in stage 0 will be ignored"); + if (gress == GHOST) { + error(kv.key.lineno, + "Can't specify dependency in ghost thread; it is " + "locked to ingress"); + } else if (kv.value == "concurrent") { + stage[stageno].stage_dep[gress] = Stage::CONCURRENT; + if (stageno == Target::NUM_MAU_STAGES() / 2 && options.target == TOFINO) + error(kv.value.lineno, "stage %d must be match dependent", stageno); + else if (!Target::SUPPORT_CONCURRENT_STAGE_DEP()) + error(kv.value.lineno, "no concurrent execution on %s", Target::name()); + } else if (kv.value == "action") { + stage[stageno].stage_dep[gress] = Stage::ACTION_DEP; + if (stageno == Target::NUM_MAU_STAGES() / 2 && options.target == TOFINO) + error(kv.value.lineno, "stage %d must be match dependent", stageno); + } else if (kv.value == "match") { + stage[stageno].stage_dep[gress] = Stage::MATCH_DEP; + } else { + error(kv.value.lineno, "Invalid stage dependency %s", value_desc(kv.value)); + } + continue; + + } else if (kv.key == "mpr_stage_id") { + stage[stageno].verify_have_mpr(kv.key.s, kv.key.lineno); + if CHECKTYPE (kv.value, tINT) { + if (kv.value.i > stageno) + error(kv.value.lineno, + "mpr_stage_id value cannot be greater than current stage."); + stage[stageno].mpr_stage_id[gress] = kv.value.i; + + /* Intermediate stage must carry the mpr glob_exec and long_branch bitmap. + * If they have been left off by the compiler, we need to propagate the bits; + * if the compiler has provided them, we assume it did so correctly + * DANGER -- this assumes the stages appear in the .bfa file in order (at + * least for each gress) + */ + if (kv.value.i != stageno) { + for (int inter_stage = kv.value.i + 1; inter_stage < stageno; inter_stage++) { + if (!stages_seen[gress][inter_stage]) { + stage[inter_stage].mpr_bus_dep_glob_exec[gress] |= + stage[kv.value.i].mpr_bus_dep_glob_exec[gress]; + stage[inter_stage].mpr_bus_dep_long_branch[gress] |= + stage[kv.value.i].mpr_bus_dep_long_branch[gress]; + } + } + } + } + continue; + } else if (kv.key == "mpr_always_run") { + stage[stageno].verify_have_mpr(kv.key.s, kv.key.lineno); + if CHECKTYPE (kv.value, tINT) { + stage[stageno].mpr_always_run |= kv.value.i; + } + continue; + } else if (kv.key == "mpr_bus_dep_glob_exec") { + stage[stageno].verify_have_mpr(kv.key.s, kv.key.lineno); + if CHECKTYPE (kv.value, tINT) { + stage[stageno].mpr_bus_dep_glob_exec[gress] = kv.value.i; + } + continue; + } else if (kv.key == "mpr_bus_dep_long_brch") { + stage[stageno].verify_have_mpr(kv.key.s, kv.key.lineno); + if CHECKTYPE (kv.value, tINT) { + stage[stageno].mpr_bus_dep_long_branch[gress] = kv.value.i; + } + continue; + } else if (kv.key == "mpr_next_table_lut") { + stage[stageno].verify_have_mpr(kv.key.s, kv.key.lineno); + if (CHECKTYPE(kv.value, tMAP)) { + for (auto &lut : kv.value.map) { + if (!CHECKTYPE(lut.key, tINT) || lut.key.i >= LOGICAL_TABLES_PER_STAGE) + error(lut.key.lineno, "Invalid mpr_next_table_lut key."); + if (!CHECKTYPE(lut.value, tINT) || + lut.value.i >= (1 << LOGICAL_TABLES_PER_STAGE)) + error(lut.value.lineno, "Invalid mpr_next_table_lut value."); + stage[stageno].mpr_next_table_lut[gress][lut.key.i] = lut.value.i; + } + } + continue; + } else if (kv.key == "mpr_glob_exec_lut") { + stage[stageno].verify_have_mpr(kv.key.s, kv.key.lineno); + if (CHECKTYPE(kv.value, tMAP)) { + for (auto &lut : kv.value.map) { + if (!CHECKTYPE(lut.key, tINT) || lut.key.i >= LOGICAL_TABLES_PER_STAGE) + error(lut.key.lineno, "Invalid mpr_glob_exec_lut key."); + if (!CHECKTYPE(lut.value, tINT) || + lut.value.i >= (1 << LOGICAL_TABLES_PER_STAGE)) + error(lut.value.lineno, "Invalid mpr_glob_exec_lut value."); + stage[stageno].mpr_glob_exec_lut[lut.key.i] |= lut.value.i; + } + } + continue; + } else if (kv.key == "mpr_long_brch_lut") { + stage[stageno].verify_have_mpr(kv.key.s, kv.key.lineno); + if (CHECKTYPE(kv.value, tMAP)) { + for (auto &lut : kv.value.map) { + if (!CHECKTYPE(lut.key, tINT) || lut.key.i >= MAX_LONGBRANCH_TAGS) + error(lut.key.lineno, "Invalid mpr_long_brch_lut key."); + if (!CHECKTYPE(lut.value, tINT) || + lut.value.i >= (1 << LOGICAL_TABLES_PER_STAGE)) + error(lut.value.lineno, "Invalid mpr_long_brch_lut value."); + stage[stageno].mpr_long_brch_lut[lut.key.i] |= lut.value.i; + } + } + continue; + } else if (kv.key == "error_mode") { + if (gress == GHOST) + error(kv.key.lineno, "Can't specify error mode in ghost thread"); + else + stage[stageno].error_mode[gress].input(kv.value); + continue; + } else if (Target::SUPPORT_ALWAYS_RUN() && kv.key == "always_run_action") { + if (gress == GHOST) + error(kv.key.lineno, "No always run action for ghost thread, must use ingress"); + else + stage[stageno].tables.push_back(new AlwaysRunTable(gress, &stage[stageno], kv)); + continue; + } + if (!CHECKTYPEM(kv.key, tCMD, "table declaration")) continue; + if (!CHECKTYPE(kv.value, tMAP)) continue; + auto tt = Table::Type::get(kv.key[0].s); + if (!tt) { + error(kv.key[0].lineno, "Unknown table type '%s'", kv.key[0].s); + continue; + } + if (kv.key.vec.size < 2) { + error(kv.key.lineno, "Need table name"); + continue; + } + if (!CHECKTYPE(kv.key[1], tSTR)) continue; + if (kv.key.vec.size > 2 && !CHECKTYPE(kv.key[2], tINT)) continue; + if (kv.key.vec.size > 3) warning(kv.key[3].lineno, "Ignoring extra stuff after table"); + if (auto old = ::get(Table::all, kv.key[1].s)) { + error(kv.key[1].lineno, "Table %s already defined", kv.key[1].s); + warning(old->lineno, "previously defined here"); + continue; + } + if (Table *table = tt->create(kv.key.lineno, kv.key[1].s, gress, &stage[stageno], + kv.key.vec.size > 2 ? kv.key[2].i : -1, kv.value.map)) { + stage[stageno].tables.push_back(table); + } + } +} + +void AsmStage::process() { + for (auto &stage : pipe) { + stage.pass1_logical_id = stage.pass1_tcam_id = -1; + for (auto table : stage.tables) table->pass0(); + } + for (auto &stage : pipe) { + for (auto table : stage.tables) table->pass1(); + if (options.target == TOFINO) { + if (&stage - &pipe[0] == Target::NUM_MAU_STAGES() / 2) { + /* to turn the corner, the middle stage must always be match dependent */ + for (gress_t gress : Range(INGRESS, EGRESS)) + stage.stage_dep[gress] = Stage::MATCH_DEP; + } + } + if (options.match_compiler || 1) { + /* FIXME -- do we really want to do this? In theory different stages could + * FIXME -- use the same PHV slots differently, but the compiler always uses them + * FIXME -- consistently, so we need this to get bit-identical results + * FIXME -- we also don't correctly determine liveness, so need this */ + for (gress_t gress : Range(INGRESS, GHOST)) { + Phv::setuse(gress, stage.match_use[gress]); + Phv::setuse(gress, stage.action_use[gress]); + Phv::setuse(gress, stage.action_set[gress]); + } + } + } + for (auto &stage : pipe) { + for (auto table : stage.tables) table->pass2(); + std::sort(stage.tables.begin(), stage.tables.end(), + [](Table *a, Table *b) { return a->logical_id < b->logical_id; }); + } + for (auto &stage : pipe) { + for (auto table : stage.tables) table->pass3(); + } +} + +void AsmStage::output(json::map &ctxt_json) { + if (int(pipe.size()) > Target::NUM_MAU_STAGES()) { + auto lineno = pipe.back().tables.empty() ? 0 : pipe.back().tables[0]->lineno; + error(lineno, "%s supports up to %d stages, using %zd", Target::name(), + Target::NUM_MAU_STAGES(), pipe.size()); + } + + // If we encounter errors, no binary is generated, however we still proceed + // to generate the context.json with whatever info is provided in the .bfa. + // This can be inspected in p4i for debugging. + if (error_count > 0) { + options.binary = NO_BINARY; + error(0, "Due to errors, no binary will be generated"); + } + if (pipe.empty()) return; + + /* Allow to set any stage as match dependent based on a pattern - Should never be used for + * normal compilation */ + if (options.target != TOFINO && !options.stage_dependency_pattern.empty()) { + for (gress_t gress : Range(INGRESS, EGRESS)) { + auto &stage = stages(gress); + unsigned i = 0; + for (auto ch : options.stage_dependency_pattern) { + if (ch == '1') { + LOG1("explicitly setting stage " << i << " " << gress + << " as match dependent on previous stage"); + stage[i].stage_dep[gress] = Stage::MATCH_DEP; + } + if (++i >= stage.size()) break; + } + } + } + + for (gress_t gress : Range(INGRESS, EGRESS)) { + auto &stage = stages(gress); + bitvec set_regs = stage[0].action_set[gress]; + for (unsigned i = 1; i < stage.size(); i++) { + if (!stage[i].stage_dep[gress]) { + if (stage[i].match_use[gress].intersects(set_regs)) { + LOG1("stage " << i << " " << gress << " is match dependent on previous stage"); + stage[i].stage_dep[gress] = Stage::MATCH_DEP; + } else if (stage[i].action_use[gress].intersects(set_regs)) { + LOG1("stage " << i << " " << gress << " is action dependent on previous stage"); + stage[i].stage_dep[gress] = Stage::ACTION_DEP; + } else { + LOG1("stage " << i << " " << gress << " is concurrent with previous stage"); + if (!Target::SUPPORT_CONCURRENT_STAGE_DEP()) + stage[i].stage_dep[gress] = Stage::ACTION_DEP; + else + stage[i].stage_dep[gress] = Stage::CONCURRENT; + } + } + if (stage[i].stage_dep[gress] == Stage::MATCH_DEP) + set_regs = stage[i].action_set[gress]; + else + set_regs |= stage[i].action_set[gress]; + } + } + + // Propagate group_table_use so we can estimate latencies. + propagate_group_table_use(); + + // In Tofino, add match-dependent stages if latency is not the minimum + // egress latency. There is no such requirement for JBAY - COMPILER-757 + if (options.target == TOFINO) { + // Compute Egress Latency + auto total_cycles = compute_latency(EGRESS); + if (!options.disable_egress_latency_padding) { + // Get non match dependent stages + bitvec non_match_dep; + for (unsigned i = 1; i < pipe.size(); i++) { + auto stage_dep = pipe[i].stage_dep[EGRESS]; + if (stage_dep != Stage::MATCH_DEP) non_match_dep.setbit(i); + } + // Add match-dependent stages and re-evaluate latency + while (total_cycles < Target::Tofino::MINIMUM_REQUIRED_EGRESS_PIPELINE_LATENCY) { + if (non_match_dep == bitvec(0)) break; + auto non_match_dep_stage = non_match_dep.min().index(); + pipe[non_match_dep_stage].stage_dep[EGRESS] = Stage::MATCH_DEP; + LOG3("Converting egress stage " + << non_match_dep_stage + << " to match dependent to meet minimum egress pipeline latency requirement"); + non_match_dep.clrbit(non_match_dep_stage); + total_cycles = compute_latency(EGRESS); + } + } else { + if (total_cycles < Target::Tofino::MINIMUM_REQUIRED_EGRESS_PIPELINE_LATENCY) { + warning(0, + "User disabled adding latency to the egress MAU pipeline " + "to meet its minimum requirements. This may result in under " + "run in certain port speed configurations."); + } + } + } + + // Re-propagate group_table_use to account for any stages that may now be match dependent. + propagate_group_table_use(); + + for (auto &stage : pipe) SWITCH_FOREACH_TARGET(options.target, stage.output(ctxt_json);) + + if (options.log_hashes) { + std::ofstream hash_out; + std::string fname = options.output_dir + "/logs/mau.hashes.log"; + hash_out.open(fname.c_str()); + if (hash_out) { + for (auto &stage : pipe) stage.log_hashes(hash_out); + hash_out.close(); + } + } +} + +void AsmStage::propagate_group_table_use() { + for (gress_t gress : Range(INGRESS, EGRESS)) { + auto &stage = stages(gress); + stage[0].group_table_use[gress] = stage[0].table_use[gress]; + for (unsigned i = 1; i < stage.size(); i++) { + stage[i].group_table_use[gress] = stage[i].table_use[gress]; + if (stage[i].stage_dep[gress] != Stage::MATCH_DEP) + stage[i].group_table_use[gress] |= stage[i - 1].group_table_use[gress]; + } + for (int i = stage.size() - 1; i > 0; i--) + if (stage[i].stage_dep[gress] != Stage::MATCH_DEP) + stage[i - 1].group_table_use[gress] |= stage[i].group_table_use[gress]; + } +} + +unsigned AsmStage::compute_latency(gress_t gress) { + // FIXME -- this is Tofino1 only, so should be in target specific code somewhere + auto total_cycles = 4; // There are 4 extra cycles between stages 5 & 6 of the MAU + for (unsigned i = 1; i < pipe.size(); i++) { + auto stage_dep = pipe[i].stage_dep[gress]; + auto contribute = 0; + if (stage_dep == Stage::MATCH_DEP) { + contribute = pipe[i].pipelength(gress); + } else if (stage_dep == Stage::ACTION_DEP) { + contribute = 2; + } else if (stage_dep == Stage::CONCURRENT) { + contribute = 1; + } + total_cycles += contribute; + } + return total_cycles; +} + +static FakeTable invalid_rams("RAMS NOT PRESENT"); + +std::map> Stage_data::teop = { + {0, {false, INT_MAX}}, {1, {false, INT_MAX}}, {2, {false, INT_MAX}}, {3, {false, INT_MAX}}}; + +Stage::Stage(int stage, bool egress_only) : Stage_data(stage, egress_only) { + static_assert(sizeof(Stage_data) == sizeof(Stage), + "All non-static Stage fields must be in Stage_data"); + table_use[0] = table_use[1] = NONE; + stage_dep[0] = stage_dep[1] = NONE; + error_mode[0] = error_mode[1] = DefaultErrorMode::get(); + for (int i = 0; i < Target::SRAM_ROWS(egress_only ? EGRESS : INGRESS); i++) + for (int j = 0; j < Target::SRAM_REMOVED_COLUMNS(); j++) sram_use[i][j] = &invalid_rams; +} + +Stage::~Stage() { + for (auto *ref : all_refs) *ref = nullptr; +} + +int Stage::first_table(gress_t gress) { + for (auto &st : AsmStage::stages(gress)) { + int min_logical_id = INT_MAX; + for (auto tbl : st.tables) { + if (tbl->gress != gress) continue; + if (tbl->logical_id < 0) continue; // ignore phase 0 + if (tbl->logical_id < min_logical_id) min_logical_id = tbl->logical_id; + } + if (min_logical_id != INT_MAX) { + BUG_CHECK((min_logical_id & ~0xf) == 0); + return (st.stageno << 4) + min_logical_id; + } + } + return -1; +} + +Stage *Stage::stage(gress_t gress, int stageno) { + if (stageno < 0 || stageno >= AsmStage::stages(gress).size()) return nullptr; + return &AsmStage::stages(gress).at(stageno); +} + +Stage::Stage(Stage &&a) : Stage_data(std::move(a)) { + for (auto *ref : all_refs) *ref = this; +} + +bitvec Stage::imem_use_all() const { + bitvec rv; + for (auto &u : imem_use) rv |= u; + return rv; +} + +int Stage::tcam_delay(gress_t gress) const { + if (group_table_use[timing_thread(gress)] & Stage::USE_TCAM) return 2; + if (group_table_use[timing_thread(gress)] & Stage::USE_WIDE_SELECTOR) return 2; + return 0; +} + +int Stage::adr_dist_delay(gress_t gress) const { + if (group_table_use[timing_thread(gress)] & Stage::USE_SELECTOR) + return 8; + else if (group_table_use[timing_thread(gress)] & Stage::USE_STATEFUL_DIVIDE) + return 6; + else if (group_table_use[timing_thread(gress)] & Stage::USE_STATEFUL) + return 4; + else if (group_table_use[timing_thread(gress)] & Stage::USE_METER_LPF_RED) + return 4; + else + return 0; +} + +/* Calculate the meter_alu delay for a meter/stateful ALU based on both things + * used globally in the current stage group, and whether this ALU uses a divmod + * (in which case it will already have an extra 2-cycle delay */ +int Stage::meter_alu_delay(gress_t gress, bool uses_divmod) const { + if (group_table_use[timing_thread(gress)] & Stage::USE_SELECTOR) + return uses_divmod ? 2 : 4; + else if (group_table_use[timing_thread(gress)] & Stage::USE_STATEFUL_DIVIDE) + return uses_divmod ? 0 : 2; + else + return 0; +} + +int Stage::cycles_contribute_to_latency(gress_t gress) { + if (stage_dep[gress] == MATCH_DEP || stageno == 0) + return pipelength(gress); + else if (stage_dep[gress] == CONCURRENT && options.target == TOFINO) + return 1; + else + return 2; // action dependency +} + +int Stage::pipelength(gress_t gress) const { + return Target::MAU_BASE_DELAY() + tcam_delay(gress) + adr_dist_delay(gress); +} + +int Stage::pred_cycle(gress_t gress) const { + return Target::MAU_BASE_PREDICATION_DELAY() + tcam_delay(gress); +} + +void Stage::verify_have_mpr(std::string key, int line_number) { + if (!Target::HAS_MPR()) + error(line_number, "%s is not available on target %s.", key.c_str(), Target::name()); +} + +template +void Stage::write_common_regs(typename TARGET::mau_regs ®s) { + /* FIXME -- most of the values set here are 'placeholder' constants copied + * from build_pipeline_output_2.py in the compiler */ + auto &merge = regs.rams.match.merge; + auto &adrdist = regs.rams.match.adrdist; + // merge.exact_match_delay_config.exact_match_delay_ingress = tcam_delay(INGRESS); + // merge.exact_match_delay_config.exact_match_delay_egress = tcam_delay(EGRESS); + for (gress_t gress : Range(INGRESS, EGRESS)) { + if (tcam_delay(gress) > 0) { + merge.exact_match_delay_thread[0] |= 1U << gress; + merge.exact_match_delay_thread[1] |= 1U << gress; + merge.exact_match_delay_thread[2] |= 1U << gress; + } + regs.rams.match.adrdist.adr_dist_pipe_delay[gress][0] = + regs.rams.match.adrdist.adr_dist_pipe_delay[gress][1] = adr_dist_delay(gress); + regs.dp.action_output_delay[gress] = pipelength(gress) - 3; + regs.dp.pipelength_added_stages[gress] = pipelength(gress) - TARGET::MAU_BASE_DELAY; + if (stageno > 0 && stage_dep[gress] == MATCH_DEP) + regs.dp.match_ie_input_mux_sel |= 1 << gress; + } + + for (gress_t gress : Range(INGRESS, EGRESS)) { + if (stageno == 0) { + /* Credit is set to 2 - Every 512 cycles the credit is reset and every + * bubble request decrements this credit. Acts like a filter to cap bubble + * requests */ + adrdist.bubble_req_ctl[gress].bubble_req_fltr_crd = 0x2; + adrdist.bubble_req_ctl[gress].bubble_req_fltr_en = 0x1; + } + adrdist.bubble_req_ctl[gress].bubble_req_interval = 0x100; + adrdist.bubble_req_ctl[gress].bubble_req_en = 0x1; + adrdist.bubble_req_ctl[gress].bubble_req_interval_eop = 0x100; + adrdist.bubble_req_ctl[gress].bubble_req_en_eop = 0x1; + adrdist.bubble_req_ctl[gress].bubble_req_ext_fltr_en = 0x1; + } + + regs.dp.phv_fifo_enable.phv_fifo_ingress_action_output_enable = + stage_dep[INGRESS] != ACTION_DEP; + regs.dp.phv_fifo_enable.phv_fifo_egress_action_output_enable = stage_dep[EGRESS] != ACTION_DEP; + if (stageno != AsmStage::numstages() - 1) { + regs.dp.phv_fifo_enable.phv_fifo_ingress_final_output_enable = + this[1].stage_dep[INGRESS] == ACTION_DEP; + regs.dp.phv_fifo_enable.phv_fifo_egress_final_output_enable = + this[1].stage_dep[EGRESS] == ACTION_DEP; + } + + /* Error handling related */ + for (gress_t gress : Range(INGRESS, EGRESS)) error_mode[gress].write_regs(regs, this, gress); + + /*-------------------- + * Since a stats ALU enable bit is missing from mau_cfg_stats_alu_lt, need to make sure that for + * unused stats ALUs, they are programmed to point to a logical table that is either unused or + * to one that does not use a stats table. */ + + bool unused_stats_alus = false; + for (auto &salu : regs.cfg_regs.mau_cfg_stats_alu_lt) + if (!salu.modified()) unused_stats_alus = true; + if (unused_stats_alus) { + unsigned avail = 0xffff; + int no_stats = -1; + /* odd pattern of tests to replicate what the old compiler does */ + for (auto tbl : tables) { + avail &= ~(1U << tbl->logical_id); + if (no_stats < 0 && (!tbl->get_attached() || tbl->get_attached()->stats.empty())) + no_stats = tbl->logical_id; + } + if (avail) { + for (int i = 15; i >= 0; --i) + if ((avail >> i) & 1) { + no_stats = i; + break; + } + } + for (auto &salu : regs.cfg_regs.mau_cfg_stats_alu_lt) + if (!salu.modified()) salu = no_stats; + } +} + +void Stage::log_hashes(std::ofstream &out) const { + out << "+-----------------------------------------------------------+" << std::endl; + out << " Stage " << stageno << std::endl; + out << "+-----------------------------------------------------------+" << std::endl; + bool logged = false; + for (auto xbar : ixbar_use) { + if (xbar.first.type == InputXbar::Group::EXACT) { + for (auto use : xbar.second) { + if (use) logged |= use->log_hashes(out); + } + } + } + if (!logged) { + out << " Unused" << std::endl; + } + // Need to use other variables? + out << std::endl; +} + +template +void Stage::gen_gfm_json_info(REGS ®s, std::ostream &out) { + auto &hash = regs.dp.xbar_hash.hash; + auto &gfm = hash.galois_field_matrix; + out << &gfm << "\n"; + out << "Col : "; + for (auto c = 0; c < GALOIS_FIELD_MATRIX_COLUMNS; c++) { + out << std::setw(3) << c; + } + out << " | Row Parity \n"; + for (auto r = 0; r < gfm.size(); r++) { + out << "Row " << std::dec << r << ": \n"; + out << " Byte 0 :"; + unsigned byte0_parity = 0; + unsigned byte1_parity = 0; + for (auto c = 0; c < GALOIS_FIELD_MATRIX_COLUMNS; c++) { + out << std::setw(3) << std::hex << gfm[r][c].byte0; + byte0_parity ^= gfm[r][c].byte0; + } + out << " | " << std::setw(3) << parity(byte0_parity) << "\n"; + out << " Byte 1 :"; + for (auto c = 0; c < GALOIS_FIELD_MATRIX_COLUMNS; c++) { + out << std::setw(3) << std::hex << gfm[r][c].byte1; + byte1_parity ^= gfm[r][c].byte1; + } + out << " | " << std::setw(3) << parity(byte1_parity) << "\n"; + } + + out << "\n"; + auto &grp_enable = regs.dp.hashout_ctl.hash_parity_check_enable; + for (int grp = 0; grp < 8; grp++) { + out << "Hash Group : " << grp << "\n"; + out << "Hash Seed : "; + int seed_parity = 0; + bitvec hash_seed; + for (int bit = 51; bit >= 0; bit--) { + auto seed_bit = (hash.hash_seed[bit] >> grp) & 0x1; + hash_seed[bit] = seed_bit; + out << seed_bit; + seed_parity ^= seed_bit; + } + out << " (" << hash_seed << ")"; + out << "\n"; + auto seed_parity_enable = ((grp_enable >> grp) & 0x1) ? "True" : "False"; + out << "Hash Seed Parity Enable : " << seed_parity_enable; + out << "\n"; + out << "Hash Seed Parity : " << (seed_parity ? "Odd" : "Even"); + out << "\n"; + out << "\n"; + } +} + +template +void Stage::fixup_regs(REGS ®s) { + if (options.condense_json) { + // if any part of the gf matrix is enabled, we can't elide any part of it when + // generating .cfg.json, as otherwise walle will generate an invalid block write + if (options.gen_json && !regs.dp.xbar_hash.hash.galois_field_matrix.disabled()) + regs.dp.xbar_hash.hash.galois_field_matrix.enable(); + } + // Enable mapram_config and imem regs - + // These are cached by the driver, so if they are disabled they wont go + // into tofino.bin as dma block writes and driver will complain + // The driver needs the regs to do parity error correction at runtime and it + // checks for the base address of the register blocks to do a block DMA + // during tofino.bin download + regs.dp.imem.enable(); + for (int row = 0; row < SRAM_ROWS; row++) + for (int col = 0; col < MAPRAM_UNITS_PER_ROW; col++) + regs.rams.map_alu.row[row].adrmux.mapram_config[col].enable(); +} + +template +void Stage::output(json::map &ctxt_json, bool egress_only) { + auto *regs = new typename TARGET::mau_regs(); + declare_registers(regs, egress_only, stageno); + json::vector &ctxt_tables = ctxt_json["tables"]; + for (auto table : tables) { + table->write_regs(*regs); + table->gen_tbl_cfg(ctxt_tables); + if (auto gw = table->get_gateway()) gw->gen_tbl_cfg(ctxt_tables); + } + write_regs(*regs, egress_only); + + // Output GFM + if (gfm_out) gen_gfm_json_info(*regs, *gfm_out); + + if (options.condense_json) regs->disable_if_reset_value(); + + fixup_regs(*regs); + char buf[64]; + snprintf(buf, sizeof(buf), "regs.match_action_stage%s.%02x", egress_only ? ".egress" : "", + stageno); + if (error_count == 0 && options.gen_json) + regs->emit_json(*open_output("%s.cfg.json", buf), stageno); + auto NUM_STAGES = egress_only ? Target::NUM_EGRESS_STAGES() : Target::NUM_MAU_STAGES(); + if (stageno < NUM_STAGES) TopLevel::all->set_mau_stage(stageno, buf, regs, egress_only); + gen_mau_stage_characteristics(*regs, ctxt_json["mau_stage_characteristics"]); + gen_configuration_cache(*regs, ctxt_json["configuration_cache"]); + if (stageno == NUM_STAGES - 1 && Target::OUTPUT_STAGE_EXTENSION()) + gen_mau_stage_extension(*regs, ctxt_json["mau_stage_extension"]); +} + +template +void Stage::gen_mau_stage_characteristics(REGS ®s, json::vector &stg_characteristics) { + for (gress_t gress : Range(INGRESS, EGRESS)) { + json::map anon; + anon["stage"] = stageno; + anon["gress"] = P4Table::direction_name(gress); + anon["match_dependent"] = (regs.dp.cur_stage_dependency_on_prev[gress] == 0) ? true : false; + anon["clock_cycles"] = pipelength(gress); + anon["predication_cycle"] = pred_cycle(gress); + anon["cycles_contribute_to_latency"] = cycles_contribute_to_latency(gress); + stg_characteristics.push_back(std::move(anon)); + } +} + +template +void Stage::gen_configuration_cache(REGS ®s, json::vector &cfg_cache) { + BUG(); // Must be specialized for target -- no generic implementation +} + +template +void Stage::gen_configuration_cache_common(REGS ®s, json::vector &cfg_cache) { + std::string reg_fqname; + std::string reg_name; + unsigned reg_value; + std::string reg_value_str; + unsigned reg_width = 8; // this means number of hex characters + + // meter_sweep_ctl + auto &meter_sweep_ctl = regs.rams.match.adrdist.meter_sweep_ctl; + for (int i = 0; i < 4; i++) { + reg_fqname = "mau[" + std::to_string(stageno) + "].rams.match.adrdist.meter_sweep_ctl[" + + std::to_string(i) + "]"; + if (options.match_compiler) { // FIXME: Temp fix to match glass typo + reg_fqname = "mau[" + std::to_string(stageno) + + "].rams.match.adrdist.meter_sweep_ctl.meter_sweep_ctl[" + + std::to_string(i) + "]"; + } + reg_name = "stage_" + std::to_string(stageno) + "_meter_sweep_ctl_" + std::to_string(i); + reg_value = (meter_sweep_ctl[i].meter_sweep_en & 0x00000001) | + ((meter_sweep_ctl[i].meter_sweep_offset & 0x0000003F) << 1) | + ((meter_sweep_ctl[i].meter_sweep_size & 0x0000003F) << 7) | + ((meter_sweep_ctl[i].meter_sweep_remove_hole_pos & 0x00000003) << 13) | + ((meter_sweep_ctl[i].meter_sweep_remove_hole_en & 0x00000001) << 16) | + ((meter_sweep_ctl[i].meter_sweep_interval & 0x0000001F) << 17); + if ((reg_value != 0) || (options.match_compiler)) { + reg_value_str = int_to_hex_string(reg_value, reg_width); + add_cfg_reg(cfg_cache, reg_fqname, reg_name, reg_value_str); + } + } + + // meter_ctl is different for Tofino and Tofino2, so it is added in + // specialized functions. + + // statistics_ctl + auto &statistics_ctl = regs.rams.map_alu.stats_wrap; + for (int i = 0; i < 4; i++) { + reg_fqname = "mau[" + std::to_string(stageno) + "].rams.map_alu.stats_wrap[" + + std::to_string(i) + "]" + ".stats.statistics_ctl"; + reg_name = "stage_" + std::to_string(stageno) + "_statistics_ctl_" + std::to_string(i); + reg_value = + (statistics_ctl[i].stats.statistics_ctl.stats_entries_per_word & 0x00000007) | + ((statistics_ctl[i].stats.statistics_ctl.stats_process_bytes & 0x00000001) << 3) | + ((statistics_ctl[i].stats.statistics_ctl.stats_process_packets & 0x00000001) << 4) | + ((statistics_ctl[i].stats.statistics_ctl.lrt_enable & 0x00000001) << 5) | + ((statistics_ctl[i].stats.statistics_ctl.stats_alu_egress & 0x00000001) << 6) | + ((statistics_ctl[i].stats.statistics_ctl.stats_bytecount_adjust & 0x00003FFF) << 7) | + ((statistics_ctl[i].stats.statistics_ctl.stats_alu_error_enable & 0x00000001) << 21); + if ((reg_value != 0) || (options.match_compiler)) { + reg_value_str = int_to_hex_string(reg_value, reg_width); + add_cfg_reg(cfg_cache, reg_fqname, reg_name, reg_value_str); + } + } + + // match_input_xbar_din_power_ctl + auto &mixdpctl = regs.dp.match_input_xbar_din_power_ctl; + reg_value_str = ""; + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 16; j++) { + reg_value = mixdpctl[i][j]; + reg_value_str = reg_value_str + int_to_hex_string(reg_value, reg_width); + } + } + if (!check_zero_string(reg_value_str) || options.match_compiler) { + reg_fqname = "mau[" + std::to_string(stageno) + "].dp.match_input_xbar_din_power_ctl"; + reg_name = "stage_" + std::to_string(stageno) + "_match_input_xbar_din_power_ctl"; + add_cfg_reg(cfg_cache, reg_fqname, reg_name, reg_value_str); + } + + // hash_seed + auto &hash_seed = regs.dp.xbar_hash.hash.hash_seed; + reg_value_str = ""; + for (int i = 0; i < 52; i++) { + reg_value = hash_seed[i]; + reg_value_str = reg_value_str + int_to_hex_string(reg_value, reg_width); + } + if (!check_zero_string(reg_value_str) || options.match_compiler) { + reg_fqname = "mau[" + std::to_string(stageno) + "].dp.xbar_hash.hash.hash_seed"; + reg_name = "stage_" + std::to_string(stageno) + "_hash_seed"; + add_cfg_reg(cfg_cache, reg_fqname, reg_name, reg_value_str); + } + + // parity_group_mask + auto &parity_group_mask = regs.dp.xbar_hash.hash.parity_group_mask; + reg_value_str = ""; + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 2; j++) { + reg_value = parity_group_mask[i][j]; + reg_value_str = reg_value_str + int_to_hex_string(reg_value, reg_width); + } + } + if (!check_zero_string(reg_value_str) || options.match_compiler) { + reg_fqname = "mau[" + std::to_string(stageno) + "].dp.xbar_hash.hash.parity_group_mask"; + reg_name = "stage_" + std::to_string(stageno) + "_parity_group_mask"; + add_cfg_reg(cfg_cache, reg_fqname, reg_name, reg_value_str); + } +} + +template +void Stage::write_teop_regs(REGS ®s) { + BUG_CHECK(Target::SUPPORT_TRUE_EOP(), "teop not supported on target"); + // Set teop bus delay regs on current stage if previous stage is driving teop + for (auto t : teop) { + if (t.second.first && t.second.second < stageno) { + auto delay_en = (stage_dep[EGRESS] != Stage::ACTION_DEP); + if (delay_en) { + auto delay = pipelength(EGRESS) - 4; + auto &adrdist = regs.rams.match.adrdist; + adrdist.teop_bus_ctl[t.first].teop_bus_ctl_delay = delay; + adrdist.teop_bus_ctl[t.first].teop_bus_ctl_delay_en = delay_en; + } + } + } +} diff --git a/backends/tofino/bf-asm/stage.h b/backends/tofino/bf-asm/stage.h new file mode 100644 index 00000000000..f5f2cdae2c8 --- /dev/null +++ b/backends/tofino/bf-asm/stage.h @@ -0,0 +1,227 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_STAGE_H_ +#define BACKENDS_TOFINO_BF_ASM_STAGE_H_ + +#include +#include + +#include "alloc.h" +#include "backends/tofino/bf-asm/tables.h" +#include "error_mode.h" +#include "input_xbar.h" +#include "lib/bitvec.h" + +class Stage_data { + /* we encapsulate all the Stage non-static fields in a base class to automate the + * generation of the move construtor properly */ + public: + int stageno; + std::vector
tables; + std::set all_refs; + BFN::Alloc2Dbase
sram_use; + BFN::Alloc2D
sram_search_bus_use; + BFN::Alloc3Dbase
stm_hbus_use; + BFN::Alloc2D
match_result_bus_use; + BFN::Alloc2D
mapram_use; + BFN::Alloc2Dbase
tcam_use; + BFN::Alloc2Dbase
tcam_match_bus_use; + BFN::Alloc2D, TCAM_ROWS, 2> tcam_byte_group_use; + BFN::Alloc1Dbase
local_tind_use; + BFN::Alloc2D
tcam_indirect_bus_use; + BFN::Alloc2D gw_unit_use; + BFN::Alloc2D gw_payload_use; + BFN::Alloc1D
logical_id_use; + BFN::Alloc1D
physical_id_use; + BFN::Alloc1D
tcam_id_use; + ordered_map> ixbar_use; + BFN::Alloc1D
tcam_ixbar_input; + BFN::Alloc1Dbase> hash_table_use; + BFN::Alloc1Dbase> hash_group_use; + BFN::Alloc1D, 6> hash_dist_use; + BFN::Alloc1Dbase action_unit_use; + BFN::Alloc1Dbase dp_unit_use; + BFN::Alloc1D
action_bus_use; + BFN::Alloc1D
action_data_use, meter_bus_use, stats_bus_use, + selector_adr_bus_use, overflow_bus_use; + BFN::Alloc1D
idletime_bus_use; + bitvec action_bus_use_bit_mask; + BFN::Alloc2D imem_addr_use; + bitvec imem_use[ACTION_IMEM_SLOTS]; + BFN::Alloc1D long_branch_use; + unsigned long_branch_thread[3] = {0}; + unsigned long_branch_terminate = 0; + + // for timing, ghost thread is tied to ingress, so we track ghost as ingress here + enum { + USE_TCAM = 1, + USE_STATEFUL = 4, + USE_METER = 8, + USE_METER_LPF_RED = 16, + USE_SELECTOR = 32, + USE_WIDE_SELECTOR = 64, + USE_STATEFUL_DIVIDE = 128 + }; + int /* enum */ table_use[2], group_table_use[2]; + + enum { NONE = 0, CONCURRENT = 1, ACTION_DEP = 2, MATCH_DEP = 3 } stage_dep[2]; + bitvec match_use[3], action_use[3], action_set[3]; + + // there's no error mode registers for ghost thread, so we don't allow it to be set + ErrorMode error_mode[2]; + + // MPR stage config + int mpr_stage_id[3] = {0}; // per-gress + int mpr_always_run = 0; + int mpr_bus_dep_glob_exec[3] = {0}; + int mpr_bus_dep_long_branch[3] = {0}; + // per gress, per logical table + BFN::Alloc2D mpr_next_table_lut; + // per global execute bit + BFN::Alloc1D mpr_glob_exec_lut; + // per long branch tag + BFN::Alloc1D mpr_long_brch_lut; + + int pass1_logical_id = -1, pass1_tcam_id = -1; + + // True egress accounting (4 buses) Tofino2 + static std::map> teop; + + protected: + Stage_data(int stage, bool egress_only) + : stageno(stage), + sram_use(Target::SRAM_ROWS(egress_only ? EGRESS : INGRESS), Target::SRAM_UNITS_PER_ROW()), + stm_hbus_use(Target::SRAM_ROWS(egress_only ? EGRESS : INGRESS), + Target::SRAM_HBUS_SECTIONS_PER_STAGE(), Target::SRAM_HBUSSES_PER_ROW()), + tcam_use(Target::TCAM_ROWS(), Target::TCAM_UNITS_PER_ROW()), + tcam_match_bus_use(Target::TCAM_ROWS(), Target::TCAM_MATCH_BUSSES()), + local_tind_use(Target::LOCAL_TIND_UNITS()), + hash_table_use(Target::EXACT_HASH_TABLES()), + hash_group_use(Target::EXACT_HASH_GROUPS()), + action_unit_use(Target::ARAM_UNITS_PER_STAGE()), + dp_unit_use(Target::DP_UNITS_PER_STAGE()) {} + Stage_data(const Stage_data &) = delete; + Stage_data(Stage_data &&) = default; + ~Stage_data() {} +}; + +class Stage : public Stage_data { + public: + static unsigned char action_bus_slot_map[ACTION_DATA_BUS_BYTES]; + static unsigned char action_bus_slot_size[ACTION_DATA_BUS_SLOTS]; // size in bits + + explicit Stage(int stageno, bool egress_only); + Stage(const Stage &) = delete; + Stage(Stage &&); + ~Stage(); + template + void output(json::map &ctxt_json, bool egress_only = false); + template + void fixup_regs(REGS ®s); + template + void gen_configuration_cache_common(REGS ®s, json::vector &cfg_cache); + template + void gen_configuration_cache(REGS ®s, json::vector &cfg_cache); + template + void gen_gfm_json_info(REGS ®s, std::ostream &out); + template + void gen_mau_stage_characteristics(REGS ®s, json::vector &stg_characteristics); + template + void gen_mau_stage_extension(REGS ®s, json::map &extend); + template + void write_regs(REGS ®s, bool egress_only); + template + void write_common_regs(typename TARGET::mau_regs ®s); + template + void write_teop_regs(REGS ®s); + int adr_dist_delay(gress_t gress) const; + int meter_alu_delay(gress_t gress, bool uses_divmod) const; + int pipelength(gress_t gress) const; + int pred_cycle(gress_t gress) const; + int tcam_delay(gress_t gress) const; + int cycles_contribute_to_latency(gress_t gress); + void verify_have_mpr(std::string key, int line_number); + static int first_table(gress_t gress); + static unsigned end_of_pipe() { return Target::END_OF_PIPE(); } + static Stage *stage(gress_t gress, int stageno); + void log_hashes(std::ofstream &out) const; + bitvec imem_use_all() const; +}; + +class AsmStage : public Section { + void start(int lineno, VECTOR(value_t) args); + void input(VECTOR(value_t) args, value_t data); + void output(json::map &); + + /// Propagates group_table_use to adjacent stages that are not match-dependent. + void propagate_group_table_use(); + + unsigned compute_latency(gress_t gress); + AsmStage(); + ~AsmStage() {} + std::vector pipe; + static AsmStage singleton_object; + bitvec stages_seen[NUM_GRESS_T]; + + public: + void process(); + static int numstages() { return singleton_object.pipe.size(); } + static std::vector &stages(gress_t gress) { return singleton_object.pipe; } + + // for gtest + void reset_stage(Stage &stage) { + for (auto &tbl : stage.tables) tbl->all->clear(); + stage.tables.clear(); + stage.all_refs.clear(); + stage.sram_use.clear(); + stage.sram_search_bus_use.clear(); + stage.stm_hbus_use.clear(); + stage.match_result_bus_use.clear(); + stage.mapram_use.clear(); + stage.tcam_use.clear(); + stage.tcam_match_bus_use.clear(); + stage.tcam_byte_group_use.clear(); + stage.gw_unit_use.clear(); + stage.gw_payload_use.clear(); + stage.logical_id_use.clear(); + stage.physical_id_use.clear(); + stage.tcam_id_use.clear(); + stage.ixbar_use.clear(); + stage.tcam_ixbar_input.clear(); + stage.hash_table_use.clear(); + stage.hash_group_use.clear(); + stage.hash_dist_use.clear(); + stage.action_bus_use.clear(); + stage.action_data_use.clear(); + stage.meter_bus_use.clear(); + stage.stats_bus_use.clear(); + stage.selector_adr_bus_use.clear(); + stage.overflow_bus_use.clear(); + stage.idletime_bus_use.clear(); + stage.imem_addr_use.clear(); + stage.long_branch_use.clear(); + } + + void reset() { + stages_seen[INGRESS].clear(); + stages_seen[EGRESS].clear(); + for (auto &stage : pipe) reset_stage(stage); + } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_STAGE_H_ */ diff --git a/backends/tofino/bf-asm/stateful.cpp b/backends/tofino/bf-asm/stateful.cpp new file mode 100644 index 00000000000..ab125df0abc --- /dev/null +++ b/backends/tofino/bf-asm/stateful.cpp @@ -0,0 +1,676 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "tofino/stateful.h" + +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "data_switchbox.h" +#include "input_xbar.h" +#include "instruction.h" +#include "jbay/stateful.h" +#include "lib/algorithm.h" +#include "misc.h" + +void StatefulTable::parse_register_params(int idx, const value_t &val) { + if (idx < 0 || idx > Target::STATEFUL_REGFILE_ROWS()) + error(lineno, + "Index out of range of the number of the register file rows (%d). " + "Reduce the number of large constants or RegisterParams.", + Target::STATEFUL_REGFILE_ROWS()); + if (const_vals.size() <= size_t(idx)) const_vals.resize(idx + 1); + if (CHECKTYPE(val, tMAP) && val.map.size == 1) + if (CHECKTYPE(val.map.data->key, tSTR) && CHECKTYPE(val.map.data->value, tINT)) + const_vals[idx] = std::move(const_info_t(val.lineno, val.map.data->value.i, true, + std::string(val.map.data->key.s))); +} + +void StatefulTable::setup(VECTOR(pair_t) & data) { + common_init_setup(data, false, P4Table::Stateful); + if (!format) error(lineno, "No format specified in table %s", name()); + for (auto &kv : MapIterChecked(data, true)) { + if (common_setup(kv, data, P4Table::Stateful)) { + } else if (kv.key == "initial_value") { + if (CHECKTYPE(kv.value, tMAP)) { + for (auto &v : kv.value.map) { + if (v.key == "lo") { + if (CHECKTYPE2(v.value, tINT, tBIGINT)) { + if (v.value.type == tINT) { + initial_value_lo = v.value.i; + } else { + initial_value_lo = v.value.bigi.data[0]; + if (v.value.bigi.size > 1) initial_value_hi = v.value.bigi.data[1]; + } + } + } else if (v.key == "hi") { + if (CHECKTYPE(v.value, tINT)) initial_value_hi = v.value.i; + } + } + } + } else if (kv.key == "input_xbar") { + if (CHECKTYPE(kv.value, tMAP)) + input_xbar.emplace_back(InputXbar::create(this, false, kv.key, kv.value.map)); + } else if (kv.key == "data_bytemask") { + if (CHECKTYPE(kv.value, tINT)) data_bytemask = kv.value.i; + } else if (kv.key == "hash_bytemask") { + if (CHECKTYPE(kv.value, tINT)) hash_bytemask = kv.value.i; + } else if (kv.key == "hash_dist") { + /* parsed in common_init_setup */ + } else if (kv.key == "actions") { + if (CHECKTYPE(kv.value, tMAP)) actions.reset(new Actions(this, kv.value.map)); + } else if (kv.key == "selection_table") { + bound_selector = kv.value; + } else if (kv.key == "register_params") { + if (!CHECKTYPE2(kv.value, tVEC, tMAP)) continue; + if (kv.value.type == tVEC) { + for (auto &v : kv.value.vec) parse_register_params(const_vals.size(), v); + } else { + for (auto &v : kv.value.map) + if (CHECKTYPE(v.key, tINT)) parse_register_params(v.key.i, v.value); + } + } else if (kv.key == "math_table") { + if (!CHECKTYPE(kv.value, tMAP)) continue; + math_table.lineno = kv.value.lineno; + for (auto &v : kv.value.map) { + if (v.key == "data") { + if (!CHECKTYPE2(v.value, tVEC, tMAP)) continue; + if (v.value.type == tVEC) { + parse_vector(math_table.data, v.value); + } else { + math_table.data.resize(16); + for (auto &d : v.value.map) + if (CHECKTYPE(d.key, tINT) && CHECKTYPE(d.value, tINT)) { + if (d.key.i < 0 || d.key.i >= 16) + error(v.key.lineno, "invalid index for math_table"); + else + math_table.data[v.key.i] = v.value.i; + } + } + } else if (v.key == "invert") { + math_table.invert = get_bool(v.value); + } else if (v.key == "shift") { + if (CHECKTYPE(v.value, tINT)) math_table.shift = v.value.i; + } else if (v.key == "scale") { + if (CHECKTYPE(v.value, tINT)) math_table.scale = v.value.i; + } else if (v.key.type == tINT && v.key.i >= 0 && v.key.i < 16) { + if (CHECKTYPE(v.value, tINT)) math_table.data[v.key.i] = v.value.i; + } else { + error(v.key.lineno, "Unknow item %s in math_table", value_desc(kv.key)); + } + } + } else if (options.target >= JBAY && setup_jbay(kv)) { + /* jbay specific extensions done in setup_jbay */ + // FIXME -- these should probably be based on individual Target::FEATURE() queries + } else if (kv.key == "log_vpn") { + logvpn_lineno = kv.value.lineno; + if (CHECKTYPE2(kv.value, tINT, tRANGE)) { + if (kv.value.type == tINT) { + logvpn_min = logvpn_max = kv.value.i; + } else { + logvpn_min = kv.value.range.lo; + logvpn_max = kv.value.range.hi; + } + } + } else if (kv.key == "pred_shift") { + if (CHECKTYPE(kv.value, tINT)) + if ((pred_shift = kv.value.i) < 0 || pred_shift >= 32 || (pred_shift & 3) != 0) + error(kv.value.lineno, "Invalid pred_shift value %d: %s", pred_shift, + pred_shift < 0 ? "negative" + : pred_shift >= 32 ? "too large" + : "must be a mulitple of 4"); + } else if (kv.key == "pred_comb_shift") { + if (CHECKTYPE(kv.value, tINT)) + if ((pred_comb_shift = kv.value.i) < 0 || pred_comb_shift >= 32) + error(kv.value.lineno, "Invalid pred_comb_shift value %d: %s", pred_comb_shift, + pred_comb_shift < 0 ? "negative" : "too large"); + } else if (kv.key == "busy_value" && Target::SUPPORT_SALU_FAST_CLEAR()) { + if (CHECKTYPE(kv.value, tINT)) busy_value = kv.value.i; + } else if (kv.key == "clear_value" && Target::SUPPORT_SALU_FAST_CLEAR()) { + if (CHECKTYPE2(kv.value, tINT, tBIGINT)) + clear_value = get_bitvec(kv.value, 128, "Value too large for 128 bits"); + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } +} + +bool match_table_layouts(Table *t1, Table *t2) { + if (t1->layout.size() != t2->layout.size()) return false; + auto it = t2->layout.begin(); + for (auto &row : t1->layout) { + if (row.row != it->row) return false; + if (row.memunits != it->memunits) return false; + if (row.maprams.empty()) row.maprams = it->maprams; + if (row.maprams != it->maprams) return false; + ++it; + } + return true; +} + +void StatefulTable::MathTable::check() { + if (data.size() > 16) error(lineno, "math table only has 16 data entries"); + data.resize(16); + for (auto &v : data) + if (v < 0 || v >= 256) error(lineno, "%d out of range for math_table data", v); + if (shift < -1 || shift > 1) error(lineno, "%d out of range for math_table shift", shift); + if (scale < -32 || scale >= 32) error(lineno, "%d out of range for math_table scale", scale); +} + +void StatefulTable::pass1() { + LOG1("### Stateful table " << name() << " pass1 " << loc()); + if (!p4_table) + p4_table = P4Table::alloc(P4Table::Stateful, this); + else + p4_table->check(this); + alloc_vpns(); + if (bound_selector.check()) { + if (layout.empty()) + layout = bound_selector->layout; + else if (!match_table_layouts(this, bound_selector)) + error(layout[0].lineno, "Layout in %s does not match selector %s", name(), + bound_selector->name()); + // Add a back reference to this table + if (!bound_selector->get_stateful()) bound_selector->set_stateful(this); + if (logical_id < 0) logical_id = bound_selector->logical_id; + } else { + alloc_maprams(); + if (Target::SRAM_GLOBAL_ACCESS()) + alloc_global_srams(); + else + alloc_rams(true, stage->sram_use); + } + std::sort(layout.begin(), layout.end(), + [](const Layout &a, const Layout &b) -> bool { return a.row > b.row; }); + stage->table_use[timing_thread(gress)] |= Stage::USE_STATEFUL; + for (auto &hd : hash_dist) hd.pass1(this, HashDistribution::OTHER, false); + for (auto &ixb : input_xbar) ixb->pass1(); + int prev_row = -1; + for (auto &row : layout) { + if (prev_row >= 0) + need_bus(lineno, stage->overflow_bus_use, row.row, "Overflow"); + else + need_bus(lineno, stage->meter_bus_use, row.row, "Meter data"); + for (int r = (row.row + 1) | 1; r < prev_row; r += 2) + need_bus(lineno, stage->overflow_bus_use, r, "Overflow"); + prev_row = row.row; + } + unsigned idx = 0, size = 0; + for (auto &fld : *format) { + switch (idx++) { + case 0: + if ((size = fld.second.size) != 1 && size != 8 && size != 16 && size != 32 && + ((size != 64 && size != 128) || options.target == TOFINO)) { + error(format->lineno, "invalid size %d for stateful format field %s", size, + fld.first.c_str()); + break; + } + break; + case 1: + if (size != fld.second.size) + error(format->lineno, "stateful fields must be the same size"); + else if (size == 1) + error(format->lineno, "one bit stateful tables can only have a single field"); + break; + default: + error(format->lineno, "only two fields allowed in a stateful table"); + } + } + if ((idx == 2) && (format->size == 2 * size)) dual_mode = true; + if (actions) { + actions->pass1(this); + bool stop = false; + for (auto &act : *actions) { + for (auto &inst : act.instr) { + if (inst->salu_output()) { + need_bus(layout.at(0).lineno, stage->action_data_use, home_row(), + "action data"); + stop = true; + break; + } + } + if (stop) break; + } + } else { + error(lineno, "No actions in stateful table %s", name()); + } + if (math_table) math_table.check(); + for (auto &r : sbus_learn) + if (r.check() && (r->table_type() != STATEFUL || r->stage != stage)) + error(r.lineno, "%s is not a stateful table in the same stage as %s", r->name(), + name()); + for (auto &r : sbus_match) + if (r.check() && (r->table_type() != STATEFUL || r->stage != stage)) + error(r.lineno, "%s is not a stateful table in the same stage as %s", r->name(), + name()); + Synth2Port::pass1(); + if (underflow_action.set() && (!actions || !actions->exists(underflow_action.name))) + error(underflow_action.lineno, "No action %s in table %s", underflow_action.name.c_str(), + name()); + if (overflow_action.set() && (!actions || !actions->exists(overflow_action.name))) + error(overflow_action.lineno, "No action %s in table %s", overflow_action.name.c_str(), + name()); +} + +int StatefulTable::get_const(int lineno, int64_t v) { + size_t rv; + for (rv = 0; rv < const_vals.size(); rv++) { + // Skip constants allocated for RegisterParams as they cannot be shared + // as they are subject to change. + if (const_vals[rv].is_param) continue; + if (const_vals[rv].value == v) break; + } + if (rv == const_vals.size()) { + if (rv >= Target::STATEFUL_REGFILE_ROWS()) + error(lineno, + "Out of the number of register file rows (%d). Reduce the number" + " of large constants or RegisterParams.", + Target::STATEFUL_REGFILE_ROWS()); + const_vals.push_back(std::move(const_info_t(lineno, v))); + } + return rv; +} + +void StatefulTable::pass2() { + LOG1("### Stateful table " << name() << " pass2 " << loc()); + for (auto &ixb : input_xbar) ixb->pass2(); + if (actions) actions->stateful_pass2(this); + if (stateful_counter_mode) { + if (logvpn_min < 0) { + layout_vpn_bounds(logvpn_min, logvpn_max, true); + } else if (!offset_vpn) { + int min, max; + layout_vpn_bounds(min, max, true); + if (logvpn_min < min || logvpn_max > max) + error(logvpn_lineno, "log_vpn out of range (%d..%d)", min, max); + } + } + + for (auto &ixb : input_xbar) { + if (!data_bytemask && !hash_bytemask) { + hash_bytemask = bitmask2bytemask(ixb->hash_group_bituse()) & phv_byte_mask; + // should we also mask off bits not set in the ixbar of this table? + // as long as the compiler explicitly zeroes everything in the hash + // that needs to be zero, it should be ok. + data_bytemask = phv_byte_mask & ~hash_bytemask; + } + } + if (input_xbar.empty()) { + if (data_bytemask || hash_bytemask) { + error(lineno, "No input_xbar in %s, but %s is present", name(), + data_bytemask ? "data_bytemask" : "hash_bytemask"); + } else if (phv_byte_mask) { + error(lineno, "No input_xbar in %s, but raw phv_%s use is present", name(), + (phv_byte_mask & 1) ? "lo" : "hi"); + } + } + for (auto &hd : hash_dist) hd.pass2(this); +} + +void StatefulTable::pass3() { LOG1("### Stateful table " << name() << " pass3 " << loc()); } + +int StatefulTable::direct_shiftcount() const { + return 64 + METER_ADDRESS_ZERO_PAD - address_shift(); +} + +int StatefulTable::indirect_shiftcount() const { return METER_ADDRESS_ZERO_PAD - address_shift(); } + +int StatefulTable::address_shift() const { return ceil_log2(format->size) - meter_adr_shift; } + +unsigned StatefulTable::per_flow_enable_bit(MatchTable *match) const { + if (!per_flow_enable) + return METER_ADDRESS_BITS - METER_TYPE_BITS - 1; + else + return AttachedTable::per_flow_enable_bit(match); +} + +unsigned StatefulTable::determine_shiftcount(Table::Call &call, int group, unsigned word, + int tcam_shift) const { + return determine_meter_shiftcount(call, group, word, tcam_shift); +} + +/** Determine which stateful action a given table action invokes (if any) + * In theory, the stateful action to run could be an action data param or even come from + * hash_dist (so the action could run any stateful action), but currently the compiler will + * never geneate such code. If we add that ability, we'll need to revisit this, and need + * to revise the context.json appropriately. Currently, this code will return a nullptr + * for such bfa code (meter_type_arg would be a Field or HashDist) + */ +Table::Actions::Action *StatefulTable::action_for_table_action(const MatchTable *tbl, + const Actions::Action *act) const { + // Check for action args to determine which stateful action is + // called. If no args are present skip as the action does not + // invoke stateful + if (indirect) { + for (auto att : act->attached) { + if (att != this) continue; + if (att.args.size() == 0) continue; + auto meter_type_arg = att.args[0]; + if (meter_type_arg.type == Call::Arg::Name) { + // Check if stateful has this called action + return actions->action(meter_type_arg.name()); + } else if (meter_type_arg.type == Call::Arg::Const) { + int index = -1; + switch (meter_type_arg.value()) { + case STATEFUL_INSTRUCTION_0: + index = 0; + break; + case STATEFUL_INSTRUCTION_1: + index = 1; + break; + case STATEFUL_INSTRUCTION_2: + index = 2; + break; + case STATEFUL_INSTRUCTION_3: + index = 3; + break; + } + if (index == -1) continue; + auto it = actions->begin(); + while (it != actions->end() && index > 0) { + --index; + ++it; + } + if (it != actions->end()) return &(*it); + } + } + } else { + // If stateful is direct, all user defined actions will + // invoke stateful except for the miss action. This is + // defined as 'default_only' in p4, if not the compiler + // generates a default_only action and adds it + // FIXME: Brig should add these generated actions as + // default_only in assembly + if (!((act->name == tbl->default_action) && tbl->default_only_action)) { + // Direct has only one action + if (actions) return &*actions->begin(); + } + } + return nullptr; +} + +template +void StatefulTable::write_action_regs_vt(REGS ®s, const Actions::Action *act) { + int meter_alu = layout[0].row / 4U; + auto &stateful_regs = regs.rams.map_alu.meter_group[meter_alu].stateful; + auto &salu_instr_common = stateful_regs.salu_instr_common[act->code]; + if (act->minmax_use) { + salu_instr_common.salu_datasize = 7; + salu_instr_common.salu_op_dual = is_dual_mode(); + } else if (is_dual_mode() || p4c_5192_workaround(act)) { + salu_instr_common.salu_datasize = format->log2size - 1; + salu_instr_common.salu_op_dual = 1; + } else { + salu_instr_common.salu_datasize = format->log2size; + } +} + +template +void StatefulTable::write_merge_regs_vt(REGS ®s, MatchTable *match, int type, int bus, + const std::vector &args) { + auto &merge = regs.rams.match.merge; + unsigned adr_mask = 0U; + unsigned per_entry_en_mux_ctl = 0U; + unsigned adr_default = 0U; + unsigned meter_type_position = 0U; + METER_ACCESS_TYPE default_type = match->default_meter_access_type(true); + AttachedTable::determine_meter_merge_regs(match, type, bus, args, default_type, adr_mask, + per_entry_en_mux_ctl, adr_default, + meter_type_position); + merge.mau_meter_adr_default[type][bus] = adr_default; + merge.mau_meter_adr_mask[type][bus] = adr_mask; + merge.mau_meter_adr_per_entry_en_mux_ctl[type][bus] = per_entry_en_mux_ctl; + merge.mau_meter_adr_type_position[type][bus] = meter_type_position; +} + +template +void StatefulTable::write_regs_vt(REGS ®s) { + LOG1("### Stateful table " << name() << " write_regs " << loc()); + // FIXME -- factor common AttachedTable::write_regs + // FIXME -- factor common Synth2Port::write_regs + // FIXME -- factor common CounterTable::write_regs + // FIXME -- factor common MeterTable::write_regs + for (auto &ixb : input_xbar) ixb->write_regs(regs); + Layout *home = &layout[0]; + bool push_on_overflow = false; + auto &map_alu = regs.rams.map_alu; + auto &merge = regs.rams.match.merge; + auto &adrdist = regs.rams.match.adrdist; + DataSwitchboxSetup swbox(regs, this); + int minvpn, maxvpn; + layout_vpn_bounds(minvpn, maxvpn, true); + if (!bound_selector) { + for (Layout &logical_row : layout) { + unsigned row = logical_row.row / 2U; + unsigned side = logical_row.row & 1; /* 0 == left 1 == right */ + BUG_CHECK(side == 1); /* no map rams or alus on left side anymore */ + auto vpn = logical_row.vpns.begin(); + auto mapram = logical_row.maprams.begin(); + auto &map_alu_row = map_alu.row[row]; + LOG2("# DataSwitchbox.setup(" << row << ") home=" << home->row / 2U); + swbox.setup_row(row); + for (auto &memunit : logical_row.memunits) { + BUG_CHECK(memunit.stage == INT_MIN && memunit.row == logical_row.row, + "bogus %s in logical row %d", memunit.desc(), logical_row.row); + unsigned col = memunit.col + 6 * side; + swbox.setup_row_col(row, col, *vpn); + write_mapram_regs(regs, row, *mapram, *vpn, MapRam::STATEFUL); + if (gress) + regs.cfg_regs.mau_cfg_uram_thread[col / 4U] |= 1U << (col % 4U * 8U + row); + ++mapram, ++vpn; + } + /* FIXME -- factor with selector/meter? */ + if (&logical_row == home) { + auto &vh_adr_xbar = regs.rams.array.row[row].vh_adr_xbar; + auto &data_ctl = regs.rams.array.row[row].vh_xbar[side].stateful_meter_alu_data_ctl; + for (auto &ixb : input_xbar) { + if (hash_bytemask != 0U) { + vh_adr_xbar.alu_hashdata_bytemask.alu_hashdata_bytemask_right = + hash_bytemask; + setup_muxctl(vh_adr_xbar.exactmatch_row_hashadr_xbar_ctl[2 + side], + ixb->hash_group()); + } + if (data_bytemask != 0) { + data_ctl.stateful_meter_alu_data_bytemask = data_bytemask; + data_ctl.stateful_meter_alu_data_xbar_ctl = 8 | ixb->match_group(); + } + } + map_alu_row.i2portctl.synth2port_vpn_ctl.synth2port_vpn_base = minvpn; + map_alu_row.i2portctl.synth2port_vpn_ctl.synth2port_vpn_limit = maxvpn; + int meter_group_index = row / 2U; + auto &delay_ctl = map_alu.meter_alu_group_data_delay_ctl[meter_group_index]; + delay_ctl.meter_alu_right_group_delay = + Target::METER_ALU_GROUP_DATA_DELAY() + row / 4 + stage->tcam_delay(gress); + delay_ctl.meter_alu_right_group_enable = + meter_alu_fifo_enable_from_mask(regs, phv_byte_mask); + auto &error_ctl = map_alu.meter_alu_group_error_ctl[meter_group_index]; + error_ctl.meter_alu_group_ecc_error_enable = 1; + if (output_used) { + auto &action_ctl = map_alu.meter_alu_group_action_ctl[meter_group_index]; + action_ctl.right_alu_action_enable = 1; + action_ctl.right_alu_action_delay = stage->meter_alu_delay(gress, divmod_used); + auto &switch_ctl = regs.rams.array.switchbox.row[row].ctl; + switch_ctl.r_action_o_mux_select.r_action_o_sel_action_rd_r_i = 1; + // disable action data address huffman decoding, on the assumtion we're not + // trying to combine this with an action data table on the same home row. + // Otherwise, the huffman decoding will think this is an 8-bit value and + // replicate it. + regs.rams.array.row[row] + .action_hv_xbar.action_hv_xbar_disable_ram_adr + .action_hv_xbar_disable_ram_adr_right = 1; + } + } else { + auto &adr_ctl = map_alu_row.vh_xbars.adr_dist_oflo_adr_xbar_ctl[side]; + if (home->row >= 8 && logical_row.row < 8) { + adr_ctl.adr_dist_oflo_adr_xbar_source_index = 0; + adr_ctl.adr_dist_oflo_adr_xbar_source_sel = AdrDist::OVERFLOW; + push_on_overflow = true; + BUG_CHECK(options.target == TOFINO); + } else { + adr_ctl.adr_dist_oflo_adr_xbar_source_index = home->row % 8; + adr_ctl.adr_dist_oflo_adr_xbar_source_sel = AdrDist::METER; + } + adr_ctl.adr_dist_oflo_adr_xbar_enable = 1; + } + } + } + if (actions) actions->write_regs(regs, this); + unsigned meter_group = home->row / 4U; + for (MatchTable *m : match_tables) { + adrdist.mau_ad_meter_virt_lt[meter_group] |= 1U << m->logical_id; + adrdist.adr_dist_meter_adr_icxbar_ctl[m->logical_id] |= 1 << meter_group; + } + if (!bound_selector) { + bool first_match = true; + for (MatchTable *m : match_tables) { + adrdist.adr_dist_meter_adr_icxbar_ctl[m->logical_id] |= 1 << meter_group; + adrdist.movereg_ad_meter_alu_to_logical_xbar_ctl[m->logical_id / 8U].set_subfield( + 4 | meter_group, 3 * (m->logical_id % 8U), 3); + if (first_match) + adrdist.movereg_meter_ctl[meter_group].movereg_meter_ctl_lt = m->logical_id; + if (direct) { + if (first_match) + adrdist.movereg_meter_ctl[meter_group].movereg_meter_ctl_direct = 1; + adrdist.movereg_ad_direct[MoveReg::METER] |= 1U << m->logical_id; + } + first_match = false; + } + adrdist.movereg_meter_ctl[meter_group].movereg_ad_meter_shift = format->log2size; + if (push_on_overflow) { + adrdist.oflo_adr_user[0] = adrdist.oflo_adr_user[1] = AdrDist::METER; + adrdist.deferred_oflo_ctl = 1 << (home->row - 8) / 2U; + } + adrdist.packet_action_at_headertime[1][meter_group] = 1; + } + write_logging_regs(regs); + for (auto &hd : hash_dist) hd.write_regs(regs, this); + if (gress == INGRESS || gress == GHOST) { + merge.meter_alu_thread[0].meter_alu_thread_ingress |= 1U << meter_group; + merge.meter_alu_thread[1].meter_alu_thread_ingress |= 1U << meter_group; + } else if (gress == EGRESS) { + merge.meter_alu_thread[0].meter_alu_thread_egress |= 1U << meter_group; + merge.meter_alu_thread[1].meter_alu_thread_egress |= 1U << meter_group; + } + auto &salu = regs.rams.map_alu.meter_group[meter_group].stateful; + salu.stateful_ctl.salu_enable = 1; + salu.stateful_ctl.salu_output_pred_shift = pred_shift / 4; + salu.stateful_ctl.salu_output_pred_comb_shift = pred_comb_shift; + // The reset value for the CMP opcode is enabled by default -- we want to disable + // any unused cmp units + for (auto &inst : salu.salu_instr_cmp_alu) { + for (auto &alu : inst) { + if (!alu.salu_cmp_opcode.modified()) { + alu.salu_cmp_opcode = 2; + } + } + } + if (gress == EGRESS) { + regs.rams.map_alu.meter_group[meter_group].meter.meter_ctl.meter_alu_egress = 1; + } + if (math_table) { + for (size_t i = 0; i < math_table.data.size(); ++i) + salu.salu_mathtable[i / 4U].set_subfield(math_table.data[i], 8 * (i % 4U), 8); + salu.salu_mathunit_ctl.salu_mathunit_output_scale = math_table.scale & 0x3fU; + salu.salu_mathunit_ctl.salu_mathunit_exponent_invert = math_table.invert; + switch (math_table.shift) { + case -1: + salu.salu_mathunit_ctl.salu_mathunit_exponent_shift = 2; + break; + case 0: + salu.salu_mathunit_ctl.salu_mathunit_exponent_shift = 0; + break; + case 1: + salu.salu_mathunit_ctl.salu_mathunit_exponent_shift = 1; + break; + } + } +} + +void StatefulTable::gen_tbl_cfg(json::vector &out) const { + // FIXME -- factor common Synth2Port stuff + int size = (layout_size() - 1) * 1024 * (128U / format->size); + json::map &tbl = *base_tbl_cfg(out, "stateful", size); + unsigned alu_width = format->size / (dual_mode ? 2 : 1); + tbl["initial_value_lo"] = initial_value_lo; + tbl["initial_value_hi"] = initial_value_hi; + tbl["alu_width"] = alu_width; + tbl["dual_width_mode"] = dual_mode; + json::vector &act_to_sful_instr_slot = tbl["action_to_stateful_instruction_slot"]; + if (actions) { + for (auto &a : *actions) { + for (auto &i : a.instr) { + if ((i->name() == "set_bit_at") || (i->name() == "set_bitc_at")) + tbl["set_instr_adjust_total"] = a.code; + if ((i->name() == "set_bit") || (i->name() == "set_bitc")) + tbl["set_instr"] = a.code; + if ((i->name() == "clr_bit_at") || (i->name() == "clr_bitc_at")) + tbl["clr_instr_adjust_total"] = a.code; + if ((i->name() == "clr_bit") || (i->name() == "clr_bitc")) + tbl["clr_instr"] = a.code; + } + } + } + // Add action handle and instr slot for action which references stateful + for (auto *m : match_tables) { + if (auto *acts = m->get_actions()) { + for (auto &a : *acts) { + Actions::Action *stful_action = action_for_table_action(m, &a); + if (!stful_action) continue; + bool act_present = false; + // Do not add handle if already present, if stateful spans + // multiple stages this can happen as action handles are unique + // and this code will get called again + for (auto &s : act_to_sful_instr_slot) { + auto s_handle = s->to()["action_handle"]; + if (*s_handle->as_number() == a.handle) { + act_present = true; + break; + } + } + if (act_present) continue; + json::map instr_slot; + instr_slot["action_handle"] = a.handle; + instr_slot["instruction_slot"] = stful_action->code; + act_to_sful_instr_slot.push_back(std::move(instr_slot)); + } + } + } + json::vector ®ister_file = tbl["register_params"]; + for (size_t i = 0; i < const_vals.size(); i++) { + if (!const_vals[i].is_param) continue; + json::map register_file_row; + register_file_row["register_file_index"] = i; + register_file_row["initial_value"] = const_vals[i].value; + register_file_row["name"] = const_vals[i].param_name; + register_file_row["handle"] = const_vals[i].param_handle; + register_file.push_back(std::move(register_file_row)); + } + if (bound_selector) tbl["bound_to_selection_table_handle"] = bound_selector->handle(); + json::map &stage_tbl = *add_stage_tbl_cfg(tbl, "stateful", size); + add_alu_index(stage_tbl, "meter_alu_index"); + gen_tbl_cfg(tbl, stage_tbl); + if (context_json) stage_tbl.merge(*context_json); +} + +DEFINE_TABLE_TYPE_WITH_SPECIALIZATION(StatefulTable, TARGET_CLASS) +FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void StatefulTable::write_action_regs, + (mau_regs & regs, const Actions::Action *act), + { write_action_regs_vt(regs, act); }) +FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void StatefulTable::write_merge_regs, + (mau_regs & regs, MatchTable *match, int type, int bus, + const std::vector &args), + { write_merge_regs_vt(regs, match, type, bus, args); }) diff --git a/backends/tofino/bf-asm/synth2port.cpp b/backends/tofino/bf-asm/synth2port.cpp new file mode 100644 index 00000000000..f86d33e5254 --- /dev/null +++ b/backends/tofino/bf-asm/synth2port.cpp @@ -0,0 +1,177 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "data_switchbox.h" +#include "input_xbar.h" +#include "lib/algorithm.h" +#include "misc.h" + +void Synth2Port::common_init_setup(const VECTOR(pair_t) & data, bool, P4Table::type p4type) { + setup_layout(layout, data); + if (auto *fmt = get(data, "format")) { + if (CHECKTYPEPM(*fmt, tMAP, fmt->map.size > 0, "non-empty map")) + format.reset(new Format(this, fmt->map)); + } +} + +bool Synth2Port::common_setup(pair_t &kv, const VECTOR(pair_t) & data, P4Table::type p4type) { + if (kv.key == "vpns") { + if (kv.value == "null") { + no_vpns = true; + } else if (CHECKTYPE(kv.value, tVEC)) { + setup_vpns(layout, &kv.value.vec, true); + } + } else if (kv.key == "maprams") { + setup_maprams(kv.value); + } else if (kv.key == "global_binding") { + global_binding = get_bool(kv.value); + } else if (kv.key == "per_flow_enable") { + if (CHECKTYPE(kv.value, tSTR)) { + per_flow_enable = 1; + per_flow_enable_param = kv.value.s; + } + } else if (kv.key == "p4") { + if (CHECKTYPE(kv.value, tMAP)) p4_table = P4Table::get(p4type, kv.value.map); + } else if (kv.key == "context_json") { + setup_context_json(kv.value); + } else if (kv.key == "format" || kv.key == "row" || kv.key == "logical_row" || + kv.key == "column" || kv.key == "bus") { + /* already done in setup_layout */ + } else if (kv.key == "logical_bus") { + if (CHECKTYPE2(kv.value, tSTR, tVEC)) { + if (kv.value.type == tSTR) { + if (*kv.value.s != 'A' && *kv.value.s != 'O' && *kv.value.s != 'S') + error(kv.value.lineno, "Invalid logical bus %s", kv.value.s); + } else { + for (auto &v : kv.value.vec) { + if (CHECKTYPE(v, tSTR)) { + if (*v.s != 'A' && *v.s != 'O' && *v.s != 'S') + error(v.lineno, "Invalid logical bus %s", v.s); + } + } + } + } + } else if (kv.key == "home_row") { + home_lineno = kv.value.lineno; + if (CHECKTYPE2(kv.value, tINT, tVEC)) { + if (kv.value.type == tINT) { + if (kv.value.i >= 0 || kv.value.i < LOGICAL_SRAM_ROWS) + home_rows.insert(kv.value.i); + else + error(kv.value.lineno, "Invalid home row %" PRId64 "", kv.value.i); + } else { + for (auto &v : kv.value.vec) { + if (CHECKTYPE(v, tINT)) { + if (v.i >= 0 || v.i < LOGICAL_SRAM_ROWS) + home_rows.insert(v.i); + else + error(v.lineno, "Invalid home row %" PRId64 "", v.i); + } + } + } + } + } else { + return false; + } + return true; +} + +void Synth2Port::pass1() { + LOG1("### Synth2Port table " << name() << " pass1 " << loc()); + AttachedTable::pass1(); +} + +void Synth2Port::alloc_vpns(Target::Tofino) { AttachedTable::alloc_vpns(); } + +void Synth2Port::pass2() { LOG1("### Synth2Port table " << name() << " pass2 " << loc()); } + +void Synth2Port::pass3() { LOG1("### Synth2Port table " << name() << " pass3 " << loc()); } + +json::map *Synth2Port::add_stage_tbl_cfg(json::map &tbl, const char *type, int size) const { + json::map &stage_tbl = *AttachedTable::add_stage_tbl_cfg(tbl, type, size); + std::string hr = how_referenced(); + if (hr.empty()) hr = direct ? "direct" : "indirect"; + tbl["how_referenced"] = hr; + int entries = 1; + if (format) { + BUG_CHECK(format->log2size <= 7); + if (format->groups() > 1) { + BUG_CHECK(format->log2size == 7); + entries = format->groups(); + } else { + entries = 128U >> format->log2size; + } + } + add_pack_format(stage_tbl, 128, 1, entries); + stage_tbl["memory_resource_allocation"] = + gen_memory_resource_allocation_tbl_cfg("sram", layout, true); + return &stage_tbl; +} + +void Synth2Port::add_alu_indexes(json::map &stage_tbl, std::string alu_indexes) const { + json::vector home_alu; + + for (auto row : home_rows) home_alu.push_back(row / 4U); + + stage_tbl[alu_indexes] = home_alu.clone(); +} + +std::vector Synth2Port::determine_spare_bank_memory_units(Target::Tofino) const { + std::vector spare_mem; + int vpn_ctr = 0; + int minvpn, spare_vpn; + + // Retrieve the Spare VPN + layout_vpn_bounds(minvpn, spare_vpn, false); + for (auto &row : layout) { + auto vpn_itr = row.vpns.begin(); + for (auto &ram : row.memunits) { + BUG_CHECK(ram.stage == INT_MIN && ram.row == row.row, "bogus %s in row %d", ram.desc(), + row.row); + if (vpn_itr != row.vpns.end()) vpn_ctr = *vpn_itr++; + if (spare_vpn == vpn_ctr) { + spare_mem.push_back(json_memunit(ram)); + if (table_type() == SELECTION || table_type() == COUNTER || table_type() == METER || + table_type() == STATEFUL) + continue; + } + } + } + return spare_mem; +} + +int Synth2Port::get_home_row_for_row(int row) const { + for (int home_row : home_rows) { + // Tofino1 have an overflow bus in the middle of the SRAM array + if (options.target == TOFINO) + return home_row; + else if (row / 8 == home_row / 8) + return home_row; + } + BUG(); + return -1; +} + +template +void Synth2Port::write_regs_vt(REGS ®s) { + // FIXME move common Counter/Meter/StatefulTable::write_regs_vt stuff here +} + +REGSETS_IN_CLASS(Tofino, TARGET_OVERLOAD, void Synth2Port::write_regs, (mau_regs & regs), + { write_regs_vt(regs); }) diff --git a/backends/tofino/bf-asm/tables.cpp b/backends/tofino/bf-asm/tables.cpp new file mode 100644 index 00000000000..5d915918f05 --- /dev/null +++ b/backends/tofino/bf-asm/tables.cpp @@ -0,0 +1,3357 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/tables.h" + +#include +#include + +#include "action_bus.h" +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/stage.h" +#include "input_xbar.h" +#include "instruction.h" +#include "lib/algorithm.h" +#include "misc.h" + +// template specialization declarations + +const char *MemUnit::desc() const { + static char buffer[256], *p = buffer; + char *end = buffer + sizeof(buffer), *rv; + do { + if (end - p < 7) p = buffer; + rv = p; + if (stage != INT_MIN) + p += snprintf(p, end - p, "Mem %d,%d,%d", stage, row, col); + else if (row >= 0) + p += snprintf(p, end - p, "Mem %d,%d", row, col); + else + p += snprintf(p, end - p, "Mem %d", col); + } while (p++ >= end); + return rv; +} + +bool Table::Layout::operator==(const Table::Layout &a) const { + return row == a.row && bus == a.bus && word == a.word && memunits == a.memunits; + // ignoring other fields as if the above are all the same, will use the same resources +} + +unsigned StatefulTable::const_info_t::unique_register_param_handle = REGISTER_PARAM_HANDLE_START; + +std::map *Table::all; +std::vector
*Table::by_uid; +std::map *Table::Type::all; + +Table::Table(int line, std::string &&n, gress_t gr, Stage *s, int lid) + : // NOLINT(whitespace/operators) + name_(n), + stage(s), + gress(gr), + lineno(line), + logical_id(lid) { + if (!all) all = new std::map; + if (!by_uid) by_uid = new std::vector
; + uid = by_uid->size(); + by_uid->push_back(this); + if (all->count(name_)) { + error(lineno, "Duplicate table %s", name()); + error(all->at(name_)->lineno, "previously defined here"); + } + all->emplace(name_, this); + if (stage) stage->all_refs.insert(&stage); +} +Table::~Table() { + BUG_CHECK(by_uid && uid >= 0 && uid < by_uid->size(), "invalid uid %d in table", uid); + all->erase(name_); + (*by_uid)[uid] = nullptr; + if (stage) stage->all_refs.erase(&stage); + if (all->empty()) { + delete all; + delete by_uid; + all = nullptr; + by_uid = nullptr; + } +} + +Table::Type::Type(std::string &&name) { // NOLINT(whitespace/operators) + if (!all) all = new std::map(); + if (get(name)) { + fprintf(stderr, "Duplicate table type %s\n", name.c_str()); + exit(1); + } + self = all->emplace(name, this).first; +} + +Table::Type::~Type() { + all->erase(self); + if (all->empty()) { + delete all; + all = nullptr; + } +} + +Table::NextTables::NextTables(value_t &v) : lineno(v.lineno) { + if (v.type == tVEC && (Target::LONG_BRANCH_TAGS() > 0 || v.vec.size == 0)) { + for (auto &el : v.vec) + if (CHECKTYPE(el, tSTR)) next.emplace(el); + } else if (CHECKTYPE(v, tSTR)) { + if (v != "END") next.emplace(v); + } +} + +bool Table::NextTables::can_use_lb(int stage, const NextTables &lbrch) { + if (options.disable_long_branch) return false; + if (!lbrch.subset_of(*this)) return false; + return true; +} + +void Table::NextTables::resolve_long_branch(const Table *tbl, + const std::map &lbrch) { + if (resolved) return; + resolved = true; + for (auto &lb : lbrch) { + if (can_use_lb(tbl->stage->stageno, lb.second)) { + lb_tags |= 1U << lb.first; + } + } + for (auto &lb : tbl->long_branch) { + if (can_use_lb(tbl->stage->stageno, lb.second)) { + lb_tags |= 1U << lb.first; + } + } + for (auto &n : next) { + if (!n) continue; + if (Target::LONG_BRANCH_TAGS() > 0 && !options.disable_long_branch) { + if (n->stage->stageno <= tbl->stage->stageno + 1) // local or global exec + continue; + auto lb_covers = [this, n](const std::pair &lb) -> bool { + return ((lb_tags >> lb.first) & 1) && lb.second.next.count(n); + }; + if (std::any_of(lbrch.begin(), lbrch.end(), lb_covers)) continue; + if (std::any_of(tbl->long_branch.begin(), tbl->long_branch.end(), lb_covers)) continue; + } + if (next_table_) { + error(n.lineno, "Can't have multiple next tables for table %s", tbl->name()); + break; + } + next_table_ = n; + } +} + +unsigned Table::NextTables::next_in_stage(int stage) const { + unsigned rv = 0; + for (auto &n : next) + if (n->stage->stageno == stage) rv |= 1U << n->logical_id; + return rv; +} + +bool Table::NextTables::need_next_map_lut() const { + BUG_CHECK(resolved); + return next.size() > 1 || (next.size() == 1 && !next_table_); +} + +void Table::NextTables::force_single_next_table() { + BUG_CHECK(resolved); // must be resolved already + if (next.size() > 1) + error(lineno, + "Can't support multiple next tables; next is directly in overhead " + "without using 8-entry lut"); + if (next.size() == 1) next_table_ = *next.begin(); +} + +int Table::table_id() const { return (stage->stageno << 4) + logical_id; } + +void Table::Call::setup(const value_t &val, Table *tbl) { + if (!CHECKTYPE2(val, tSTR, tCMD)) return; + if (val.type == tSTR) { + Ref::operator=(val); + return; + } + Ref::operator=(val[0]); + for (int i = 1; i < val.vec.size; i++) { + int mode; + if (val[i].type == tINT) { + args.emplace_back(val[i].i); + } else if (val[i].type == tCMD && val[i] == "hash_dist") { + if (PCHECKTYPE(val[i].vec.size > 1, val[i][1], tINT)) { + if (auto hd = tbl->find_hash_dist(val[i][1].i)) + args.emplace_back(hd); + else + error(val[i].lineno, "hash_dist %" PRId64 " not defined in table %s", + val[i][1].i, tbl->name()); + } + } else if ((mode = StatefulTable::parse_counter_mode(val[i])) >= 0) { + args.emplace_back(Arg::Counter, mode); + } else if (!CHECKTYPE(val[i], tSTR)) { + // syntax error message emit by CHEKCTYPE + } else if (auto arg = tbl->lookup_field(val[i].s)) { + if (arg->bits.size() != 1) error(val[i].lineno, "arg fields can't be split in format"); + args.emplace_back(arg); + } else { + args.emplace_back(val[i].s); + } + } + lineno = val.lineno; +} + +unsigned Table::Call::Arg::size() const { + switch (type) { + case Field: + return fld ? fld->size : 0; + case HashDist: + return hd ? hd->expand >= 0 ? 23 : 16 : 0; + case Counter: + return 23; + case Const: + case Name: + return 0; + default: + BUG(); + } + return -1; +} + +static void add_row(int lineno, std::vector &layout, int row) { + layout.push_back(Table::Layout(lineno, row)); +} + +static int add_rows(std::vector &layout, const value_t &rows) { + if (!CHECKTYPE2(rows, tINT, tRANGE)) return 1; + if (rows.type == tINT) { + add_row(rows.lineno, layout, rows.i); + } else { + int step = rows.range.lo > rows.range.hi ? -1 : 1; + for (int i = rows.range.lo; i != rows.range.hi; i += step) add_row(rows.lineno, layout, i); + add_row(rows.lineno, layout, rows.range.hi); + } + return 0; +} + +static int add_col(int lineno, int stage, Table::Layout &row, int col) { + for (auto &mu : row.memunits) { + if (mu.stage == stage && mu.col == col) { + error(lineno, "column %d duplicated", col); + return 1; + } + } + row.memunits.emplace_back(stage, row.row, col); + return 0; +} + +static int add_cols(int stage, Table::Layout &row, const value_t &cols) { + int rv = 0; + if (cols.type == tVEC) { + if (cols.vec.size == 1) return add_cols(stage, row, cols.vec[0]); + for (auto &col : cols.vec) { + if (col.type == tVEC) { + error(col.lineno, "Column shape doesn't match rows"); + rv |= 1; + } else { + rv |= add_cols(stage, row, col); + } + } + return rv; + } + if (cols.type == tMAP && Target::SRAM_GLOBAL_ACCESS()) { + bitvec stages_seen; + for (auto &kv : cols.map) { + if (kv.key == "stage" && kv.key.type == tCMD && kv.key[1].type == tINT) + stage = kv.key[1].i; + else { + error(kv.key.lineno, "syntax error, expecting a stage number"); + continue; + } + if (stage < 0 || stage > Target::NUM_MAU_STAGES()) { + error(kv.key.lineno, "stage %d out of range", stage); + } else if (stages_seen[stage]) { + error(kv.key.lineno, "duplicate stage %d", stage); + } else { + rv |= add_cols(stage, row, kv.value); + } + } + return rv; + } + if (!CHECKTYPE2(cols, tINT, tRANGE)) return 1; + if (cols.type == tINT) return add_col(cols.lineno, stage, row, cols.i); + int step = cols.range.lo > cols.range.hi ? -1 : 1; + for (int i = cols.range.lo; i != cols.range.hi; i += step) + rv |= add_col(cols.lineno, stage, row, i); + rv |= add_col(cols.lineno, stage, row, cols.range.hi); + return rv; +} + +static int add_stages(Table::Layout &row, const value_t &stages) { + int rv = 0; + if (stages.type == tVEC) { + if (stages.vec.size == 1) return add_stages(row, stages.vec[0]); + for (auto &stg : stages.vec) { + if (stg.type == tVEC) { + error(stg.lineno, "Stages shape doesn't match rows"); + rv |= 1; + } else { + rv |= add_stages(row, stg); + } + } + return rv; + } + if (!CHECKTYPE2(stages, tINT, tRANGE)) return 1; + if (stages.type == tINT) return add_col(stages.lineno, stages.i, row, 0); + int step = stages.range.lo > stages.range.hi ? -1 : 1; + for (int i = stages.range.lo; i != stages.range.hi; i += step) + rv |= add_col(stages.lineno, i, row, 0); + rv |= add_col(stages.lineno, stages.range.hi, row, 0); + return rv; +} + +std::ostream &operator<<(std::ostream &out, const Table::Layout::bus_type_t type) { + switch (type) { + case Table::Layout::SEARCH_BUS: + return out << "search_bus"; + case Table::Layout::RESULT_BUS: + return out << "result_bus"; + case Table::Layout::TIND_BUS: + return out << "tind_bus"; + case Table::Layout::IDLE_BUS: + return out << "idle_bus"; + case Table::Layout::L2R_BUS: + return out << "l2r bus"; + case Table::Layout::R2L_BUS: + return out << "r2l bus"; + default: + return out << "[bus_t " << static_cast(type) << "]"; + } +} + +std::ostream &operator<<(std::ostream &out, const Table::Layout &l) { + if (l.home_row) out << "home_"; + out << "row=" << l.row; + for (auto [type, idx] : l.bus) out << " " << type << "=" << idx; + if (l.word >= 0) out << " word=" << l.word; + if (!l.memunits.empty()) { + const char *sep = ""; + out << " ["; + for (auto &unit : l.memunits) { + out << sep << unit; + sep = ", "; + } + out << ']'; + } + if (!l.vpns.empty()) { + const char *sep = ""; + out << " vpns=["; + for (auto vpn : l.vpns) { + out << sep << vpn; + sep = ", "; + } + out << ']'; + } + if (!l.maprams.empty()) { + const char *sep = ""; + out << " maprams=["; + for (auto mr : l.maprams) { + out << sep << mr; + sep = ", "; + } + out << ']'; + } + return out; +} + +int Table::setup_layout_attrib(std::vector &layout, const value_t &data, const char *what, + int Layout::*attr) { + if (!CHECKTYPE2(data, tINT, tVEC)) { + return 1; + } else if (data.type == tVEC) { + if (data.vec.size != static_cast(layout.size())) { + error(data.lineno, "%s shape doesn't match rows", what); + return 1; + } else { + for (int i = 0; i < data.vec.size; i++) { + if (CHECKTYPE(data.vec[i], tINT)) + layout[i].*attr = data.vec[i].i; + else + return 1; + } + } + } else { + for (auto &lrow : layout) lrow.*attr = data.i; + } + return 0; +} + +int Table::setup_layout_bus_attrib(std::vector &layout, const value_t &data, + const char *what, Layout::bus_type_t type) { + int limit = Target::NUM_BUS_OF_TYPE(type); + int err = 0; + if (limit <= 0) { + error(data.lineno, "No %s on target %s", to_string(type).c_str(), Target::name()); + return 1; + } else if (!CHECKTYPE2(data, tINT, tVEC)) { + return 1; + } else if (data.type == tVEC) { + if (data.vec.size != static_cast(layout.size())) { + error(data.lineno, "%s shape doesn't match rows", what); + return 1; + } else { + for (int i = 0; i < data.vec.size; i++) { + if (!CHECKTYPE(data.vec[i], tINT)) return 1; + if (data.vec[i].i >= limit) { + error(data.vec[i].lineno, "%" PRId64 " to large for %s", data.vec[i].i, + to_string(type).c_str()); + err = 1; + } + if (data.vec[i].i >= 0) layout[i].bus[type] = data.vec[i].i; + } + } + } else if (data.i < 0) { + error(data.lineno, "%s value %" PRId64 " invalid", what, data.i); + err = 1; + } else if (data.i >= limit) { + error(data.lineno, "%" PRId64 " to large for %s", data.i, to_string(type).c_str()); + err = 1; + } else { + for (auto &lrow : layout) lrow.bus[type] = data.i; + } + return err; +} + +void Table::setup_layout(std::vector &layout, const VECTOR(pair_t) & data, + const char *subname) { + auto *row = get(data, "row"); + if (!row && this->to()) row = get(data, "logical_row"); + if (!row) { + if (table_type() != TERNARY && Target::TABLES_REQUIRE_ROW()) + error(lineno, "No 'row' attribute in table %s%s", name(), subname); + return; + } + int err = 0; + if (row->type == tVEC) + for (value_t &r : row->vec) err |= add_rows(layout, r); + else + err |= add_rows(layout, *row); + if (err) return; + bool global_access = + (table_type() == TERNARY) ? Target::TCAM_GLOBAL_ACCESS() : Target::SRAM_GLOBAL_ACCESS(); + if (global_access && table_type() == TERNARY && Target::TCAM_UNITS_PER_ROW() == 1) { + if (auto *stg = get(data, "stages")) { + if (stg->type == tVEC && stg->vec.size == static_cast(layout.size())) { + for (int i = 0; i < stg->vec.size; i++) err |= add_stages(layout[i], stg->vec[i]); + } else if (layout.size() == 1) + err |= add_stages(layout[0], *stg); + } else { + for (auto &lrow : layout) err |= add_col(lineno, this->stage->stageno, lrow, 0); + } + } else if (auto *col = get(data, "column")) { + int stage = global_access ? this->stage->stageno : INT_MIN; + if (col->type == tMAP && global_access) { + bitvec stages_seen; + for (auto &kv : col->map) { + if (kv.key.type == tINT) + stage = kv.key.i; + else if (kv.key == "stage" && kv.key.type == tCMD && kv.key[1].type == tINT) + stage = kv.key[1].i; + else { + error(kv.key.lineno, "syntax error, expecting a stage number"); + continue; + } + if (stage < 0 || stage > Target::NUM_STAGES(gress)) { + error(kv.key.lineno, "stage %d out of range", stage); + } else if (stages_seen[stage]) { + error(kv.key.lineno, "duplicate stage %d", stage); + } else { + if (kv.value.type == tVEC && kv.value.vec.size + 0U == layout.size()) { + for (int i = 0; i < kv.value.vec.size; i++) + err |= add_cols(stage, layout[i], kv.value.vec[i]); + } else { + for (auto &lrow : layout) + if ((err |= add_cols(stage, lrow, kv.value))) break; + } + } + } + } else if (col->type == tVEC && col->vec.size == static_cast(layout.size())) { + for (int i = 0; i < col->vec.size; i++) err |= add_cols(stage, layout[i], col->vec[i]); + } else { + for (auto &lrow : layout) + if ((err |= add_cols(stage, lrow, *col))) break; + } + } else if (layout.size() > 1) { + error(lineno, "No 'column' attribute in table %s%s", name(), subname); + return; + } + if (auto *bus = get(data, "bus")) + err |= Table::setup_layout_bus_attrib(layout, *bus, "Bus", default_bus_type()); + else if (auto *bus = get(data, "search_bus")) + err |= Table::setup_layout_bus_attrib(layout, *bus, "Bus", Layout::SEARCH_BUS); + if (auto *bus = get(data, "lhbus")) + err |= Table::setup_layout_bus_attrib(layout, *bus, "R2L hbus", Layout::R2L_BUS); + if (auto *bus = get(data, "rhbus")) + err |= Table::setup_layout_bus_attrib(layout, *bus, "L2R hbus", Layout::L2R_BUS); + if (auto *bus = get(data, "result_bus")) + err |= Table::setup_layout_bus_attrib(layout, *bus, "Bus", Layout::RESULT_BUS); + if (auto *word = get(data, "word")) + err |= Table::setup_layout_attrib(layout, *word, "Word", &Layout::word); + if (err) return; + for (auto i = layout.begin(); i != layout.end(); i++) + for (auto j = i + 1; j != layout.end(); j++) + if (*i == *j) { + std::stringstream bus; + if (!i->bus.empty()) + bus << " " << i->bus.begin()->first << " " << i->bus.begin()->second; + error(i->lineno, "row %d%s duplicated in table %s%s", i->row, bus.str().c_str(), + name(), subname); + } +} + +void Table::setup_logical_id() { + if (logical_id >= 0) { + if (Table *old = stage->logical_id_use[logical_id]) { + error(lineno, "table %s wants logical id %d:%d", name(), stage->stageno, logical_id); + error(old->lineno, "already in use by %s", old->name()); + } + stage->logical_id_use[logical_id] = this; + } +} + +void Table::setup_maprams(value_t &v) { + if (!CHECKTYPE2(v, tINT, tVEC)) return; + VECTOR(value_t) *rams = &v.vec, single_ram; + if (v.type == tINT) { + // treat as a vector of length 1 + rams = &single_ram; + single_ram.size = single_ram.capacity = 1; + single_ram.data = &v; + } + auto r = rams->begin(); + for (auto &row : layout) { + if (r == rams->end()) { + error(r->lineno, "Mapram layout doesn't match table layout"); + break; + } + auto &maprow = *r++; + VECTOR(value_t) * maprow_rams, tmp; + if (maprow.type == tINT) { + if (layout.size() == 1) { + maprow_rams = rams; + } else { + // treat as a vector of length 1 + maprow_rams = &tmp; + tmp.size = tmp.capacity = 1; + tmp.data = &maprow; + } + } else if (CHECKTYPE(maprow, tVEC)) { + maprow_rams = &maprow.vec; + } else { + continue; + } + if (maprow_rams->size != static_cast(row.memunits.size())) { + error(r->lineno, "Mapram layout doesn't match table layout"); + continue; + } + for (auto mapcol : *maprow_rams) + if (CHECKTYPE(mapcol, tINT)) { + if (mapcol.i < 0 || mapcol.i >= MAPRAM_UNITS_PER_ROW) + error(mapcol.lineno, "Invalid mapram column %" PRId64 "", mapcol.i); + else + row.maprams.push_back(mapcol.i); + } + } +} + +/** + * Guarantees that the instruction call provided to the table has valid entries, and that + * if multiple choices are required, the compiler can make that choices. + * + * The instruction address is a two piece address. The first argument is the address bits + * location. The second argument is a per flow enable bit location. These are both required. + * Additionally, the keyword $DEFAULT means that that particular portion of the address comes + * from the default register. + * + * FIXME -- this code is a messy hack -- various target-specific special cases. Should try + * to figure out a better way to organize this. + */ +bool Table::validate_instruction(Table::Call &call) const { + if (call.args.size() != 2) { + error(call.lineno, "Instruction call has invalid number of arguments"); + return false; + } + + bool field_address = false; + + if (call.args[0].name()) { + if (Target::GATEWAY_INHIBIT_INDEX() && call.args[0] == "$GATEWAY_IDX") { + field_address = true; + } else if (call.args[0] != "$DEFAULT") { + error(call.lineno, "Index %s for %s cannot be found", call.args[0].name(), + call->name()); + return false; + } + } else if (!call.args[0].field()) { + error(call.lineno, "Index for %s cannot be understood", call->name()); + return false; + } else { + field_address = true; + } + + if (call.args[1].name()) { + if (call.args[1] != "$DEFAULT") { + error(call.lineno, "Per flow enable %s for %s cannot be found", call.args[1].name(), + call->name()); + return false; + } + } else if (!call.args[1].field()) { + error(call.lineno, "Per flow enable for %s cannot be understood", call->name()); + return false; + } + + if (actions->hit_actions_count() > 1 && !field_address) + error(lineno, "No field to select between multiple action in table %s format", name()); + + return true; +} + +static bool column_match(const std::vector &a, const std::vector &b) { + auto it = b.begin(); + for (auto &u : a) { + if (it == b.end()) return false; + if (u.col != it->col) return false; + ++it; + } + return it == b.end(); +} + +void Table::setup_vpns(std::vector &layout, VECTOR(value_t) * vpn, bool allow_holes) { + int period, width, depth; + const char *period_name; + vpn_params(width, depth, period, period_name); + int word = width; + Layout *firstrow = 0; + auto vpniter = vpn ? vpn->begin() : 0; + int *vpn_ctr = new int[period]; + std::fill_n(vpn_ctr, period, get_start_vpn()); + std::vector used_vpns(period); + bool on_repeat = false; + for (auto &row : layout) { + if (++word < width) { + BUG_CHECK(firstrow); + if (!column_match(row.memunits, firstrow->memunits)) + error(row.lineno, "Columns across wide rows don't match in table %s", name()); + row.vpns = firstrow->vpns; + continue; + } + word = 0; + firstrow = &row; + row.vpns.resize(row.memunits.size()); + value_t *vpncoliter = 0; + for (int &el : row.vpns) { + // If VPN's are provided by the compiler, they need to match each + // element in the specified columns. Below code checks if all + // elements are present and errors out if there is any discrepancy. + if (vpniter) { + if (vpniter == vpn->end()) { + on_repeat = true; + vpniter = vpn->begin(); + } + if (CHECKTYPE2(*vpniter, tVEC, tINT)) { + if (vpniter->type == tVEC) { + if (!vpncoliter) { + if (static_cast(row.vpns.size()) != vpniter->vec.size) { + error(vpniter->lineno, + "Vpn entries for row %d is %d not equal to column " + "entries %d", + row.row, vpniter->vec.size, + static_cast(row.vpns.size())); + continue; + } else { + vpncoliter = vpniter->vec.begin(); + } + } + el = vpncoliter->i; + if (++vpncoliter == &*vpniter->vec.end()) ++vpniter; + continue; + } else if (vpniter->type == tINT) { + el = vpniter->i; + } + ++vpniter; + } + // Error out if VPN's are repeated in a table. For wide words, + // each individual word can have the same vpn + if (!on_repeat && used_vpns[period - 1][el].set(true)) + error(vpniter->lineno, "Vpn %d used twice in table %s", el, name()); + } else { + // If there is no word information provided in assembly (Ternary + // Indirect/Stats) tables, the allocation is always a single + // word. + // For SRamMatchTables, this should be handled by SRamMatchTable::alloc_vpns(), + // so this code will never be hit + // FIXME -- move this to Table::alloc_vpns and only call setup_vpns when + // there's a vpn specified in the bfa? + if (row.word < 0) row.word = word; + el = vpn_ctr[row.word]; + if ((vpn_ctr[row.word] += period) == depth) vpn_ctr[row.word] = 0; + } + } + } + delete[] vpn_ctr; +} + +void Table::common_init_setup(const VECTOR(pair_t) & data, bool, P4Table::type) { + setup_layout(layout, data); + if (auto *fmt = get(data, "format")) { + if (CHECKTYPEPM(*fmt, tMAP, fmt->map.size > 0, "non-empty map")) + format.reset(new Format(this, fmt->map)); + } + if (auto *hd = get(data, "hash_dist")) HashDistribution::parse(hash_dist, *hd); +} + +bool Table::common_setup(pair_t &kv, const VECTOR(pair_t) & data, P4Table::type p4type) { + bool global_access = + (table_type() == TERNARY) ? Target::TCAM_GLOBAL_ACCESS() : Target::SRAM_GLOBAL_ACCESS(); + if (kv.key == "format" || kv.key == "row" || kv.key == "column" || kv.key == "bus") { + /* done in Table::common_init_setup */ + } else if (global_access && (kv.key == "stages" || kv.key == "lhbus" || kv.key == "rhbus")) { + /* done in Table::common_init_setup */ + } else if (kv.key == "action") { + action.setup(kv.value, this); + } else if (kv.key == "instruction") { + instruction.setup(kv.value, this); + } else if (kv.key == "action_enable") { + if (CHECKTYPE(kv.value, tINT)) action_enable = kv.value.i; + if (get(data, "action")) enable_action_data_enable = true; + enable_action_instruction_enable = true; + } else if (kv.key == "enable_action_data_enable") { + enable_action_data_enable = get_bool(kv.value); + } else if (kv.key == "enable_action_instruction_enable") { + enable_action_instruction_enable = get_bool(kv.value); + } else if (kv.key == "actions") { + if (CHECKTYPE(kv.value, tMAP)) actions.reset(new Actions(this, kv.value.map)); + } else if (kv.key == "action_bus") { + if (CHECKTYPE(kv.value, tMAP)) action_bus = ActionBus::create(this, kv.value.map); + } else if ((kv.key == "default_action") || (kv.key == "default_only_action")) { + if (kv.key == "default_only_action") default_only_action = true; + default_action_lineno = kv.value.lineno; + if (CHECKTYPE2(kv.value, tSTR, tCMD)) + if (CHECKTYPE(kv.value, tSTR)) default_action = kv.value.s; + } else if (kv.key == "default_action_parameters") { + if (CHECKTYPE(kv.value, tMAP)) + for (auto &v : kv.value.map) + if (CHECKTYPE(v.key, tSTR) && CHECKTYPE(v.value, tSTR)) + default_action_parameters[v.key.s] = v.value.s; + } else if (kv.key == "default_action_handle") { + default_action_handle = kv.value.i; + } else if (kv.key == "hit") { + if (!hit_next.empty()) { + error(kv.key.lineno, "Specifying both 'hit' and 'next' in table %s", name()); + } else if (kv.value.type == tVEC) { + for (auto &v : kv.value.vec) hit_next.emplace_back(v); + } else { + hit_next.emplace_back(kv.value); + } + } else if (kv.key == "miss") { + if (miss_next.set()) { + error(kv.key.lineno, "Specifying both 'miss' and 'next' in table %s", name()); + } else { + miss_next = kv.value; + } + } else if (kv.key == "next") { + if (!hit_next.empty()) { + error(kv.key.lineno, "Specifying both 'hit' and 'next' in table %s", name()); + } else if (miss_next.set()) { + error(kv.key.lineno, "Specifying both 'miss' and 'next' in table %s", name()); + } else { + miss_next = kv.value; + hit_next.emplace_back(miss_next); + } + } else if (kv.key == "long_branch" && Target::LONG_BRANCH_TAGS() > 0) { + if (options.disable_long_branch) error(kv.key.lineno, "long branches disabled"); + if (CHECKTYPE(kv.value, tMAP)) { + for (auto &lb : kv.value.map) { + if (lb.key.type != tINT || lb.key.i < 0 || lb.key.i >= Target::LONG_BRANCH_TAGS()) + error(lb.key.lineno, "Invalid long branch tag %s", value_desc(lb.key)); + else if (long_branch.count(lb.key.i)) + error(lb.key.lineno, "Duplicate long branch tag %" PRId64, lb.key.i); + else + long_branch.emplace(lb.key.i, lb.value); + } + } + } else if (kv.key == "vpns") { + if (CHECKTYPESIZE(kv.value, tVEC)) setup_vpns(layout, &kv.value.vec); + } else if (kv.key == "p4") { + if (CHECKTYPE(kv.value, tMAP)) p4_table = P4Table::get(p4type, kv.value.map); + } else if (kv.key == "p4_param_order") { + if (CHECKTYPE(kv.value, tMAP)) { + unsigned position = 0; + for (auto &v : kv.value.map) { + if ((CHECKTYPE(v.key, tSTR)) && (CHECKTYPE(v.value, tMAP))) { + p4_param p(v.key.s); + for (auto &w : v.value.map) { + if (!CHECKTYPE(w.key, tSTR)) continue; + + if (w.key == "type" && CHECKTYPE(w.value, tSTR)) + p.type = w.value.s; + else if (w.key == "size" && CHECKTYPE(w.value, tINT)) + p.bit_width = w.value.i; + else if (w.key == "full_size" && CHECKTYPE(w.value, tINT)) + p.bit_width_full = w.value.i; + else if (w.key == "mask") + p.mask = get_bitvec(w.value); + else if (w.key == "alias" && CHECKTYPE(w.value, tSTR)) + p.alias = w.value.s; + else if (w.key == "key_name" && CHECKTYPE(w.value, tSTR)) + p.key_name = w.value.s; + else if (w.key == "start_bit" && CHECKTYPE(w.value, tINT)) + p.start_bit = w.value.i; + else if (w.key == "context_json" && CHECKTYPE(w.value, tMAP)) + p.context_json = toJson(w.value.map); + else + error(lineno, "Incorrect param type %s in p4_param_order", w.key.s); + } + // Determine position in p4_param_order. Repeated fields get + // the same position which is set on first occurrence. + // Driver relies on position to order fields. The case when + // we have multiple slices of same field based on position + // only one location is assigned for the entire field. + // However if the field has a name annotation (key_name) + // this overrides the position even if the field belongs to + // the same slice. + bool ppFound = false; + for (auto &pp : p4_params_list) { + if ((pp.name == p.name) && (pp.key_name == p.key_name)) { + ppFound = true; + p.position = pp.position; + break; + } + } + if (!ppFound) p.position = position++; + p4_params_list.emplace_back(std::move(p)); + } + } + } + } else if (kv.key == "context_json") { + setup_context_json(kv.value); + } else { + return false; + } + return true; +} + +void Table::setup_context_json(value_t &v) { + if (!CHECKTYPE(v, tMAP)) return; + + auto map = toJson(v.map); + if (context_json) + context_json->merge(*map); + else + context_json = std::move(map); +} + +/** check two tables to see if they can share ram. + * FIXME -- for now we just allow a STATEFUL and a SELECTION to share -- we should + * FIXME -- check to make sure they're mutually exclusive and use the memory in + * FIXME -- a compatible way + */ +bool Table::allow_ram_sharing(const Table *t1, const Table *t2) { + if (t1->table_type() == STATEFUL && t2->table_type() == SELECTION && + t1->to()->bound_selector == t2) + return true; + if (t2->table_type() == STATEFUL && t1->table_type() == SELECTION && + t2->to()->bound_selector == t1) + return true; + return false; +} + +/** check two tables to see if they can share action bus + * Two ATCAM tables or their action tables can occur in the same stage and share + * bytes on the action bus which is valid as they are always mutually exclusive + */ +bool Table::allow_bus_sharing(Table *t1, Table *t2) { + if (!t1 || !t2) return false; + if ((t1->table_type() == ATCAM) && (t2->table_type() == ATCAM) && + (t1->p4_name() == t2->p4_name())) + return true; + if ((t1->table_type() == ACTION) && (t2->table_type() == ACTION) && + (t1->p4_name() == t2->p4_name())) { + // Check if action tables are attached to atcam's + auto *m1 = t1->to()->get_match_table(); + auto *m2 = t2->to()->get_match_table(); + if (m1 && m2) { + if ((m1->table_type() == ATCAM) && (m2->table_type() == ATCAM)) return true; + } + } + return false; +} + +void Table::alloc_rams(bool logical, BFN::Alloc2Dbase
&use, + BFN::Alloc2Dbase
*bus_use, Layout::bus_type_t bus_type) { + for (auto &row : layout) { + for (auto &memunit : row.memunits) { + BUG_CHECK(memunit.stage == INT_MIN && memunit.row == row.row, "memunit fail"); + int r = row.row, c = memunit.col; + if (logical) { + c += 6 * (r & 1); + r >>= 1; + } + try { + if (Table *old = use[r][c]) { + if (!allow_ram_sharing(this, old)) { + error(lineno, + "Table %s trying to use (%d,%d) which is already in use " + "by table %s", + name(), r, c, old->name()); + } + } else { + use[r][c] = this; + } + } catch (std::out_of_range & /*e*/) { + error(lineno, "Table %s using out-of-bounds (%d,%d)", name(), r, c); + } + } + if (bus_use && row.bus.count(bus_type)) { + int bus = row.bus.at(bus_type); + if (Table *old = (*bus_use)[row.row][bus]) { + if (old != this && old->p4_name() != p4_name()) + error(lineno, + "Table %s trying to use bus %d on row %d which is already in " + "use by table %s", + name(), bus, row.row, old->name()); + } else { + (*bus_use)[row.row][bus] = this; + } + } + } +} + +void Table::alloc_global_busses() { BUG(); } +void Table::alloc_global_srams() { BUG(); } +void Table::alloc_global_tcams() { BUG(); } + +void Table::alloc_busses(BFN::Alloc2Dbase
&bus_use, Layout::bus_type_t bus_type) { + for (auto &row : layout) { + // If row.memunits is empty, we don't really need a bus here (won't use it + // for anything). + // E.g. An exact match table with 4 or less static entries (JBay) or 1 + // static entry (Tofino) + // In these examples compiler does gateway optimization where static + // entries are encoded in the gateway and no RAM's are used. We skip bus + // allocation in these cases. + if (!row.bus.count(bus_type) && !row.memunits.empty()) { + // FIXME -- iterate over bus_use[row.row] rather than assuming 2 rows + if (bus_use[row.row][0] == this) + row.bus[bus_type] = 0; + else if (bus_use[row.row][1] == this) + row.bus[bus_type] = 1; + else if (!bus_use[row.row][0]) + bus_use[row.row][row.bus[bus_type] = 0] = this; + else if (!bus_use[row.row][1]) + bus_use[row.row][row.bus[bus_type] = 1] = this; + else + error(lineno, "No bus available on row %d for table %s", row.row, name()); + } + } +} + +void Table::alloc_id(const char *idname, int &id, int &next_id, int max_id, bool order, + BFN::Alloc1Dbase
&use) { + if (id >= 0) { + next_id = id; + return; + } + while (++next_id < max_id && use[next_id]) { + } + if (next_id >= max_id && !order) { + next_id = -1; + while (++next_id < max_id && use[next_id]) { + } + } + if (next_id < max_id) + use[id = next_id] = this; + else + error(lineno, "Can't pick %s id for table %s (ran out)", idname, name()); +} + +void Table::alloc_maprams() { + if (!Target::SYNTH2PORT_NEED_MAPRAMS()) return; + for (auto &row : layout) { + int sram_row = row.row / 2; + if ((row.row & 1) == 0) { + error(row.lineno, "Can only use 2-port rams on right side srams (odd logical rows)"); + continue; + } + if (row.maprams.empty()) { + int use = 0; + for (unsigned i = 0; i < row.memunits.size(); i++) { + while (use < MAPRAM_UNITS_PER_ROW && stage->mapram_use[sram_row][use]) use++; + if (use >= MAPRAM_UNITS_PER_ROW) { + error(row.lineno, "Ran out of maprams on row %d in stage %d", sram_row, + stage->stageno); + break; + } + row.maprams.push_back(use); + stage->mapram_use[sram_row][use++] = this; + } + } else { + for (auto mapcol : row.maprams) { + if (auto *old = stage->mapram_use[sram_row][mapcol]) { + if (!allow_ram_sharing(this, old)) + error(lineno, + "Table %s trying to use mapram %d,%d which is use by " + "table %s", + name(), sram_row, mapcol, old->name()); + } else { + stage->mapram_use[sram_row][mapcol] = this; + } + } + } + } +} + +void Table::alloc_vpns() { + if (no_vpns || layout_size() == 0 || layout[0].vpns.size() > 0) return; + setup_vpns(layout, 0); +} + +void Table::check_next(const Table::Ref &n) { + if (n.check()) { + if (logical_id >= 0 && n->logical_id >= 0 ? table_id() > n->table_id() + : stage->stageno > n->stage->stageno) + error(n.lineno, "Next table %s comes before %s", n->name(), name()); + if (gress != n->gress) + error(n.lineno, "Next table %s in %s when %s is in %s", n->name(), + P4Table::direction_name(n->gress).c_str(), name(), + P4Table::direction_name(gress).c_str()); + // Need to add to the predication map + Table *tbl = get_match_table(); + if (!tbl) tbl = this; // standalone gateway + if (tbl != n) { + n->pred[tbl]; // ensure that its in the map, even as an empty set + } + } +} + +void Table::for_all_next(std::function fn) { + for (auto &n1 : hit_next) + for (auto &n2 : n1) fn(n2); + for (auto &n : miss_next) fn(n); +} + +void Table::check_next(NextTables &next) { + for (auto &n : next) check_next(n); + Table *tbl = get_match_table(); + if (!tbl) tbl = this; + next.resolve_long_branch(tbl, long_branch); +} + +void Table::check_next() { + for (auto &lb : long_branch) { + for (auto &t : lb.second) { + if (t.check()) { + if (t->stage->stageno <= stage->stageno) + error(t.lineno, "Long branch table %s is not in a later stage than %s", + t->name(), name()); + else if (stage->stageno + 1 == t->stage->stageno) + warning(t.lineno, "Long branch table %s is the next stage after %s", t->name(), + name()); + if (gress != t->gress) + error(t.lineno, "Long branch table %s in %s when %s is in %s", t->name(), + P4Table::direction_name(t->gress).c_str(), name(), + P4Table::direction_name(gress).c_str()); + } + } + } + for (auto &hn : hit_next) check_next(hn); + for (auto &hn : extra_next_lut) check_next(hn); + check_next(miss_next); +} + +void Table::set_pred() { + if (actions == nullptr) return; + for (auto &act : *actions) { + if (!act.default_only) + for (auto &n : act.next_table_ref) n->pred[this].insert(&act); + for (auto &n : act.next_table_miss_ref) n->pred[this].insert(&act); + } +} + +bool Table::choose_logical_id(const slist
*work) { + if (logical_id >= 0) return true; + if (work && find(*work, this) != work->end()) { + error(lineno, "Logical table loop with table %s", name()); + for (auto *tbl : *work) { + if (tbl == this) break; + warning(tbl->lineno, "loop involves table %s", tbl->name()); + } + return false; + } + slist
local(this, work); + for (auto *p : Keys(pred)) + if (!p->choose_logical_id(&local)) return false; + int min_id = 0, max_id = LOGICAL_TABLES_PER_STAGE - 1; + for (auto *p : Keys(pred)) + if (p->stage->stageno == stage->stageno && p->logical_id >= min_id) + min_id = p->logical_id + 1; + for_all_next([&max_id, this](const Ref &n) { + if (n && n->stage->stageno == stage->stageno && n->logical_id >= 0 && + n->logical_id <= max_id) { + max_id = n->logical_id - 1; + } + }); + for (int id = min_id; id <= max_id; ++id) { + if (!stage->logical_id_use[id]) { + logical_id = id; + stage->logical_id_use[id] = this; + return true; + } + } + error(lineno, "Can't find a logcial id for table %s", name()); + return false; +} + +void Table::need_bus(int lineno, BFN::Alloc1Dbase
&use, int idx, const char *busname) { + if (use[idx] && use[idx] != this) { + error(lineno, "%s bus conflict on row %d between tables %s and %s", busname, idx, name(), + use[idx]->name()); + error(use[idx]->lineno, "%s defined here", use[idx]->name()); + } else { + use[idx] = this; + } +} + +bitvec Table::compute_reachable_tables() { + reachable_tables_[uid] = 1; + for_all_next([this](const Ref &t) { + if (t) { + reachable_tables_ |= t->reachable_tables(); + } + }); + return reachable_tables_; +} + +std::string Table::loc() const { + std::stringstream ss; + ss << "(" << gress << ", stage=" << stage->stageno << ")"; + return ss.str(); +} + +void Table::pass1() { + alloc_vpns(); + check_next(); + if (auto att = get_attached()) att->pass1(get_match_table()); + if (action_bus) action_bus->pass1(this); + + if (actions) { + if (instruction) { + validate_instruction(instruction); + } else { + // Phase0 has empty actions which list param order + if (table_type() != PHASE0) { + error(lineno, "No instruction call provided, but actions provided"); + } + } + actions->pass1(this); + } + set_pred(); + + if (action) { + auto reqd_args = 2; + action->validate_call(action, get_match_table(), reqd_args, + HashDistribution::ACTION_DATA_ADDRESS, action); + } + for (auto &lb : long_branch) { + int last_stage = -1; + for (auto &n : lb.second) { + if (!n) continue; // already output error about invalid table + last_stage = std::max(last_stage, n->stage->stageno); + if (n->long_branch_input >= 0 && n->long_branch_input != lb.first) + error(lb.second.lineno, "Conflicting long branch input (%d and %d) for table %s", + lb.first, n->long_branch_input, n->name()); + n->long_branch_input = lb.first; + } + // we track the long branch as being 'live' from the stage it is set until the stage + // before it is terminated; it can still be use to trigger a table in that stage, even + // though it is not 'live' there. It can also be reused (set) in that stage for use in + // later stages. This matches the range of stages we need to set timing regs for. + for (int st = stage->stageno; st < last_stage; ++st) { + auto stg = Stage::stage(gress, st); + BUG_CHECK(stg); + auto &prev = stg->long_branch_use[lb.first]; + if (prev && *prev != lb.second) { + error(lb.second.lineno, "Conflicting use of long_branch tag %d", lb.first); + error(prev->lineno, "previous use"); + } else { + prev = &lb.second; + } + stg->long_branch_thread[gress] |= 1U << lb.first; + } + auto last_stg = Stage::stage(gress, last_stage); + BUG_CHECK(last_stg); + last_stg->long_branch_thread[gress] |= 1U << lb.first; + last_stg->long_branch_terminate |= 1U << lb.first; + } +} + +static void overlap_test(int lineno, unsigned a_bit, + ordered_map::iterator a, unsigned b_bit, + ordered_map::iterator b) { + if (b_bit <= a->second.hi(a_bit)) { + if (a->second.group || b->second.group) + error(lineno, "Field %s(%d) overlaps with %s(%d)", a->first.c_str(), a->second.group, + b->first.c_str(), b->second.group); + else + error(lineno, "Field %s overlaps with %s", a->first.c_str(), b->first.c_str()); + } +} + +static void append_bits(std::vector &vec, int lo, int hi) { + /* split any chunks that cross a word (128-bit) boundary */ + while (lo < hi && lo / 128U != hi / 128U) { + vec.emplace_back(lo, lo | 127); + lo = (lo | 127) + 1; + } + vec.emplace_back(lo, hi); +} + +bool Table::Format::equiv(const ordered_map &a, + const ordered_map &b) { + if (a.size() != b.size()) return false; + for (auto &el : a) + if (!b.count(el.first) || b.at(el.first) != el.second) return false; + return true; +} + +Table::Format::Format(Table *t, const VECTOR(pair_t) & data, bool may_overlap) : tbl(t) { + unsigned nextbit = 0; + fmt.resize(1); + for (auto &kv : data) { + if (lineno < 0) lineno = kv.key.lineno; + if (!CHECKTYPE2M(kv.key, tSTR, tCMD, "expecting field desc")) continue; + value_t &name = kv.key.type == tSTR ? kv.key : kv.key[0]; + unsigned idx = 0; + if (kv.key.type == tCMD && + (kv.key.vec.size != 2 || !CHECKTYPE(kv.key[1], tINT) || (idx = kv.key[1].i) > 15)) { + error(kv.key.lineno, "Invalid field group"); + continue; + } + if (kv.value.type != tVEC && + !(CHECKTYPE2(kv.value, tINT, tRANGE) && VALIDATE_RANGE(kv.value))) + continue; + if (idx >= fmt.size()) fmt.resize(idx + 1); + if (fmt[idx].count(name.s) > 0) { + if (kv.key.type == tCMD) + error(name.lineno, "Duplicate key %s(%d) in format", name.s, idx); + else + error(name.lineno, "Duplicate key %s in format", name.s); + continue; + } + Field *f = &fmt[idx].emplace(name.s, Field(this)).first->second; + f->group = idx; + if (kv.value.type == tINT) { + if (kv.value.i <= 0) + error(kv.value.lineno, "invalid size %" PRId64 " for format field %s", kv.value.i, + name.s); + f->size = kv.value.i; + append_bits(f->bits, nextbit, nextbit + f->size - 1); + } else if (kv.value.type == tRANGE) { + if (kv.value.range.lo > kv.value.range.hi) + error(kv.value.lineno, "invalid range %d..%d", kv.value.range.lo, + kv.value.range.hi); + append_bits(f->bits, kv.value.range.lo, kv.value.range.hi); + f->size = kv.value.range.hi - kv.value.range.lo + 1; + } else if (kv.value.type == tVEC) { + f->size = 0; + for (auto &c : kv.value.vec) + if (CHECKTYPE(c, tRANGE) && VALIDATE_RANGE(c)) { + append_bits(f->bits, c.range.lo, c.range.hi); + f->size += c.range.hi - c.range.lo + 1; + if ((size_t)c.range.hi + 1 > size) size = c.range.hi + 1; + } + } + nextbit = f->bits.back().hi + 1; + if (nextbit > size) size = nextbit; + } + if (!may_overlap) { + for (auto &grp : fmt) { + for (auto it = grp.begin(); it != grp.end(); ++it) { + for (auto &piece : it->second.bits) { + auto p = byindex.upper_bound(piece.lo); + if (p != byindex.end()) overlap_test(lineno, piece.lo, it, p->first, p->second); + if (p != byindex.begin()) { + --p; + overlap_test(lineno, p->first, p->second, piece.lo, it); + if (p->first == piece.lo && piece.hi <= p->second->second.hi(piece.lo)) + continue; + } + byindex[piece.lo] = it; + } + } + } + } + for (size_t i = 1; i < fmt.size(); i++) + if (!equiv(fmt[0], fmt[i])) + error(data[0].key.lineno, "Format group %zu doesn't match group 0", i); + for (log2size = 0; (1U << log2size) < size; log2size++) { + } + if (error_count > 0) return; + for (auto &f : fmt[0]) { + f.second.by_group = new Field *[fmt.size()]; + f.second.by_group[0] = &f.second; + } + for (size_t i = 1; i < fmt.size(); i++) + for (auto &f : fmt[i]) { + Field &f0 = fmt[0].at(f.first); + f.second.by_group = f0.by_group; + f.second.by_group[i] = &f.second; + } +} + +Table::Format::~Format() { + for (auto &f : fmt[0]) delete[] f.second.by_group; +} + +void Table::Format::pass1(Table *tbl) { + std::map immed_fields; + unsigned lo = INT_MAX, hi = 0; + for (auto &f : fmt[0]) { + if (!(f.second.flags & Field::USED_IMMED)) continue; + if (f.second.bits.size() > 1) + error(lineno, "Immmediate action data %s cannot be split", f.first.c_str()); + immed_fields[f.second.bits[0].lo] = &f.second; + if (f.second.bits[0].lo < lo) { + immed = &f.second; + lo = immed->bits[0].lo; + } + if (f.second.bits[0].hi > hi) hi = f.second.bits[0].hi; + } + if (immed_fields.empty()) { + LOG2("table " << tbl->name() << " has no immediate data"); + } else { + LOG2("table " << tbl->name() << " has " << immed_fields.size() + << " immediate data fields " + "over " + << (hi + 1 - lo) << " bits"); + if (hi - lo >= Target::MAX_IMMED_ACTION_DATA()) { + error(lineno, "Immediate data for table %s spread over more than %d bits", tbl->name(), + Target::MAX_IMMED_ACTION_DATA()); + return; + } + immed_size = hi + 1 - lo; + for (unsigned i = 1; i < fmt.size(); i++) { + int delta = static_cast(immed->by_group[i]->bits[0].lo) - + static_cast(immed->bits[0].lo); + for (auto &f : fmt[0]) { + if (!(f.second.flags & Field::USED_IMMED)) continue; + if (delta != static_cast(f.second.by_group[i]->bits[0].lo) - + static_cast(f.second.bits[0].lo)) { + error(lineno, + "Immediate data field %s for table %s does not match across " + "ways in a ram", + f.first.c_str(), tbl->name()); + break; + } + } + } + } + lo = INT_MAX, hi = 0; + for (auto &[name, field] : fmt[0]) { + // FIXME -- should use a flag rather than names here? Someone would need to set the flag + if (name == "match" || name == "version" || name == "valid") continue; + lo = std::min(lo, field.bit(0)); + hi = std::max(hi, field.bit(field.size - 1)); + } + overhead_size = hi > lo ? hi - lo + 1 : 0; + overhead_start = hi > lo ? lo : 0; +} + +void Table::Format::pass2(Table *tbl) { + int byte[4] = {-1, -1, -1, -1}; + int half[2] = {-1, -1}; + int word = -1; + bool err = false; + for (auto &f : fmt[0]) { + int byte_slot = tbl->find_on_actionbus(&f.second, 0, 8 * f.second.size - 1, f.second.size); + if (byte_slot < 0) continue; + int slot = Stage::action_bus_slot_map[byte_slot]; + unsigned off = f.second.immed_bit(0); + switch (Stage::action_bus_slot_size[slot]) { + case 8: + for (unsigned b = off / 8; b <= (off + f.second.size - 1) / 8; b++) { + if (b >= 4 || (b & 3) != (slot & 3) || (byte[b] >= 0 && byte[b] != slot) || + (byte[b ^ 1] >= 0 && byte[b ^ 1] != (slot ^ 1)) || + Stage::action_bus_slot_size[slot] != 8) { + err = true; + break; + } + byte[b] = slot++; + } + break; + case 16: + for (unsigned w = off / 16; w <= (off + f.second.size - 1) / 16; w++) { + if (w >= 2 || (w & 1) != (slot & 1) || (half[w] >= 0 && half[w] != slot) || + Stage::action_bus_slot_size[slot] != 16) { + err = true; + break; + } + half[w] = slot++; + } + break; + case 32: + if (word >= 0 && word != slot) err = true; + word = slot; + break; + default: + BUG(); + } + if (err) error(lineno, "Immediate data misaligned for action bus byte %d", byte_slot); + } +} + +std::ostream &operator<<(std::ostream &out, const Table::Format::Field &f) { + out << "(size = " << f.size << " "; + for (auto b : f.bits) out << "[" << b.lo << ".." << b.hi << "]"; + out << ")"; + return out; +} + +bool Table::Actions::Action::equiv(Action *a) { + if (instr.size() != a->instr.size()) return false; + for (unsigned i = 0; i < instr.size(); i++) + if (!instr[i]->equiv(a->instr[i])) return false; + if (attached.size() != a->attached.size()) return false; + for (unsigned i = 0; i < attached.size(); i++) + if (attached[i] != a->attached[i]) return false; + return true; +} + +bool Table::Actions::Action::equivVLIW(Action *a) { + if (instr.size() != a->instr.size()) return false; + for (unsigned i = 0; i < instr.size(); i++) + if (!instr[i]->equiv(a->instr[i])) return false; + return true; +} + +std::map> +Table::Actions::Action::reverse_alias() const { + std::map> rv; + for (auto &a : alias) rv[a.second.name].push_back(&a); + return rv; +} + +std::string Table::Actions::Action::alias_lookup(int lineno, std::string name, int &lo, + int &hi) const { + bool err = false; + bool found = false; + while (alias.count(name) && !found) { + for (auto &a : ValuesForKey(alias, name)) { + // FIXME -- need better handling of multiple aliases... + if (lo >= 0 && a.name != "hash_dist") { + if (a.lo >= 0) { + if (a.hi >= 0 && hi + a.lo > a.hi) { + err = true; + continue; + } + lo += a.lo; + hi += a.lo; + name = a.name; + found = true; + } + } else { + lo = a.lo; + hi = a.hi; + name = (alias.count(a.name)) ? alias_lookup(lineno, a.name, lo, hi) : a.name; + } + lineno = a.lineno; + err = false; + break; + } + if (err) { + error(lineno, "invalid bitslice of %s", name.c_str()); + break; + } + } + return name; +} + +Table::Actions::Action::alias_t::alias_t(value_t &data) { + lineno = data.lineno; + if (CHECKTYPE3(data, tSTR, tCMD, tINT)) { + if (data.type == tSTR) { + name = data.s; + lo = 0; + hi = -1; + } else if (data.type == tCMD) { + name = data.vec[0].s; + if (CHECKTYPE2(data.vec[1], tINT, tRANGE)) { + if (data.vec[1].type == tINT) { + lo = hi = data.vec[1].i; + } else { + lo = data.vec[1].range.lo; + hi = data.vec[1].range.hi; + } + } + } else { + is_constant = true; + } + value = data.i; + } +} + +/** + * Builds a map of conditional variable to which bits in the action data format that they + * control. Used for JSON later. + * + * @sa asm_output::EmitAction::mod_cond_value + */ +void Table::Actions::Action::setup_mod_cond_values(value_t &map) { + for (auto &kv : map.map) { + if (CHECKTYPE(kv.key, tSTR) && CHECKTYPE(kv.value, tVEC)) { + mod_cond_values[kv.key.s].resize(2, bitvec()); + for (auto &v : kv.value.vec) { + if (CHECKTYPEPM(v, tCMD, v.vec.size == 2, "action data or immediate slice")) { + int array_index = -1; + if (v[0] == "action_data_table") { + array_index = MC_ADT; + } else if (v[0] == "immediate") { + array_index = MC_IMMED; + } else { + error(map.lineno, + "A non action_data_table or immediate value in the " + "mod_con_value map: %s", + v[0].s); + continue; + } + int lo = -1; + int hi = -1; + if (v[1].type == tINT) { + lo = hi = v[1].i; + } else if (v[1].type == tRANGE) { + lo = v[1].range.lo; + hi = v[1].range.hi; + } + mod_cond_values.at(kv.key.s).at(array_index).setrange(lo, hi - lo + 1); + } + } + } + } +} + +Table::Actions::Action::Action(Table *tbl, Actions *actions, pair_t &kv, int pos) { + lineno = kv.key.lineno; + position_in_assembly = pos; + if (kv.key.type == tCMD) { + name = kv.key[0].s; + if (CHECKTYPE(kv.key[1], tINT)) code = kv.key[1].i; + if (kv.key.vec.size > 2 && CHECKTYPE(kv.key[2], tINT)) { + if ((addr = kv.key[2].i) < 0 || addr >= ACTION_IMEM_ADDR_MAX) + error(kv.key[2].lineno, "Invalid instruction address %d", addr); + } + } else if (kv.key.type == tINT) { + name = std::to_string((code = kv.key.i)); + } else { + name = kv.key.s; + } + if (code >= 0) { + if (actions->code_use[code]) { + if (!equivVLIW(actions->by_code[code])) + error(kv.key.lineno, "Duplicate action code %d", code); + } else { + actions->by_code[code] = this; + actions->code_use[code] = true; + } + } + for (auto &i : kv.value.vec) { + if (i.type == tINT && instr.empty()) { + if ((addr = i.i) < 0 || i.i >= ACTION_IMEM_ADDR_MAX) + error(i.lineno, "Invalid instruction address %" PRId64 "", i.i); + } else if (i.type == tMAP) { + for (auto &a : i.map) + if (CHECKTYPE(a.key, tSTR)) { + if (a.key == "p4_param_order") { + if (!CHECKTYPE(a.value, tMAP)) continue; + + unsigned position = 0; + for (auto &v : a.value.map) { + if (!(CHECKTYPE(v.key, tSTR) && CHECKTYPE2(v.value, tINT, tMAP))) + continue; + + if (v.value.type == tINT) { + p4_params_list.emplace_back(v.key.s, position++, v.value.i); + } else { + p4_param p(v.key.s, position++); + for (auto &w : v.value.map) { + if (!CHECKTYPE(w.key, tSTR)) continue; + if (w.key == "width" && CHECKTYPE(w.value, tINT)) + p.bit_width = w.value.i; + else if (w.key == "context_json" && CHECKTYPE(w.value, tMAP)) + p.context_json = toJson(w.value.map); + else + error(lineno, "Incorrect param type %s in p4_param_order", + w.key.s); + } + + p4_params_list.emplace_back(std::move(p)); + } + } + } else if (a.key == "hit_allowed") { + if CHECKTYPE (a.value, tMAP) { + for (auto &p : a.value.map) { + if (CHECKTYPE(p.key, tSTR) && CHECKTYPE(p.value, tSTR)) { + if (p.key == "allowed") + hit_allowed = get_bool(p.value); + else if (p.key == "reason") + hit_disallowed_reason = p.value.s; + } + } + } + } else if (a.key == "default_action" || a.key == "default_only_action") { + if CHECKTYPE (a.value, tMAP) { + for (auto &p : a.value.map) { + if (CHECKTYPE(p.key, tSTR) && CHECKTYPE(p.value, tSTR)) { + if (p.key == "allowed") + default_allowed = get_bool(p.value); + else if (p.key == "is_constant") + is_constant = get_bool(p.value); + else if (p.key == "reason") + default_disallowed_reason = p.value.s; + } + } + } + default_only = a.key == "default_only_action"; + } else if (a.key == "handle") { + if CHECKTYPE (a.value, tINT) { + handle = a.value.i; + } + } else if (a.key == "next_table") { + if (a.value.type == tINT) + next_table_encode = a.value.i; + else + next_table_ref = a.value; + } else if (a.key == "next_table_miss") { + next_table_miss_ref = a.value; + } else if (a.key == "mod_cond_value") { + if (CHECKTYPE(a.value, tMAP)) { + setup_mod_cond_values(a.value); + } + } else if (a.key == "context_json") { + if (CHECKTYPE(a.value, tMAP)) { + context_json = toJson(a.value.map); + } + } else if (CHECKTYPE3(a.value, tSTR, tCMD, tINT)) { + if (a.value.type == tINT) { + auto k = alias.find(a.key.s); + if (k == alias.end()) { + alias.emplace(a.key.s, a.value); + } else { + k->second.is_constant = true; + k->second.value = a.value.i; + } + } else if (a.value.type == tSTR) { + auto k = alias.find(a.value.s); + if (k == alias.end()) { + alias.emplace(a.key.s, a.value); + } else { + auto alias_value = k->second; + alias.erase(k); + alias.emplace(a.key.s, alias_value); + } + } else { + alias.emplace(a.key.s, a.value); + } + } + } + + } else if (CHECKTYPE2(i, tSTR, tCMD)) { + VECTOR(value_t) tmp; + if (i.type == tSTR) { + if (!*i.s) continue; // skip blank line + VECTOR_init1(tmp, i); + } else { + VECTOR_initcopy(tmp, i.vec); + } + if (auto *p = Instruction::decode(tbl, this, tmp)) + instr.emplace_back(p); + else if (tbl->to() || tbl->to() || + tbl->to()) + attached.emplace_back(i, tbl); + else + error(i.lineno, "Unknown instruction %s", tmp[0].s); + VECTOR_fini(tmp); + } + } +} + +Table::Actions::Action::Action(const char *n, int l) : name(n), lineno(l) {} +Table::Actions::Action::~Action() {} + +Table::Actions::Actions(Table *tbl, VECTOR(pair_t) & data) { + table = tbl; + int pos = 0; + for (auto &kv : data) { + if ((kv.key.type != tINT && !CHECKTYPE2M(kv.key, tSTR, tCMD, "action")) || + !CHECKTYPE(kv.value, tVEC)) + continue; + std::string name = kv.key.type == tINT ? std::to_string(kv.key.i) + : kv.key.type == tSTR ? kv.key.s + : kv.key[0].s; + if (actions.count(name)) { + error(kv.key.lineno, "Duplicate action %s", name.c_str()); + continue; + } + actions.emplace(name, tbl, this, kv, pos++); + } +} + +int Table::Actions::hit_actions_count() const { + int cnt = 0; + for (auto &a : actions) { + if (a.second.hit_allowed) ++cnt; + } + return cnt; +} + +int Table::Actions::default_actions_count() const { + int cnt = 0; + for (auto &a : actions) { + if (a.second.default_allowed) ++cnt; + } + return cnt; +} + +AlwaysRunTable::AlwaysRunTable(gress_t gress, Stage *stage, pair_t &init) + : Table(init.key.lineno, + "always run " + to_string(gress) + " stage " + to_string(stage->stageno), gress, + stage) { + VECTOR(pair_t) tmp = {1, 1, &init}; + actions.reset(new Actions(this, tmp)); + if (actions->count() == 1) { // unless there was an error parsing the action... + auto &act = *actions->begin(); + if (act.addr >= 0) error(act.lineno, "always run action address is fixed"); + act.addr = ACTION_ALWAYS_RUN_IMEM_ADDR; + } +} + +void Table::Actions::Action::check_next_ref(Table *tbl, const Table::Ref &ref) const { + if (ref.check() && ref->table_id() >= 0 && ref->table_id() < tbl->table_id()) { + error(lineno, "Next table %s for action %s before containing table %s", ref->name(), + name.c_str(), tbl->name()); + return; + } + + if (ref->table_id() > (1U << NEXT_TABLE_MAX_RAM_EXTRACT_BITS) - 1 && + tbl->get_hit_next().size() == 0) { + error(lineno, "Next table cannot properly be saved on the RAM line for this action %s", + name.c_str()); + } +} + +/** + * By the end of this function, both next_table and next_table_miss_ref will have been created + * and validated. + * + * Each action must have at least next_table or a next_table_miss from the node. + * - next_table: The next table to run on hit + * - next_table_miss: The next table to run on miss + * + * The next_table_encode is the entry into the next_table_hitmap, if a next_table hit map is + * provided. If the next_table hit map is empty, then the next_table_encode won't have been + * set. If the action can be used on a hit, then either a next_table_ref/next_table_encode + * would be provided. + * + * The next_table_ref could come from the next_table as an int value, which would be on offset + * into the hit_map + */ +void Table::Actions::Action::check_next(Table *tbl) { + if (next_table_encode >= 0) { + int idx = next_table_encode; + if (idx < tbl->get_hit_next().size()) { + next_table_ref = tbl->get_hit_next().at(idx); + } else if ((idx -= tbl->get_hit_next().size()) < tbl->extra_next_lut.size()) { + next_table_ref = tbl->extra_next_lut.at(idx); + } else { + error(lineno, + "The encoding on action %s is outside the range of the hitmap in " + "table %s", + name.c_str(), tbl->name()); + } + } + + if (!next_table_miss_ref.set() && !next_table_ref.set()) { + if (tbl->get_hit_next().size() != 1) { + error(lineno, + "Either next_table or next_table_miss must be required on action %s " + "if the next table cannot be determined", + name.c_str()); + } else { + next_table_ref = tbl->get_hit_next()[0]; + next_table_miss_ref = next_table_ref; + next_table_encode = 0; + } + } else if (!next_table_ref.set()) { + if (!default_only) { + error(lineno, + "Action %s on table %s that can be programmed on hit must have " + "a next_table encoding", + name.c_str(), tbl->name()); + } + next_table_ref = next_table_miss_ref; + } else if (!next_table_miss_ref.set()) { + next_table_miss_ref = next_table_ref; + } + tbl->check_next(next_table_ref); + tbl->check_next(next_table_miss_ref); + if (next_table_encode < 0 && !default_only) next_table_ref.force_single_next_table(); + for (auto &n : next_table_ref) check_next_ref(tbl, n); + for (auto &n : next_table_miss_ref) check_next_ref(tbl, n); +} + +void Table::Actions::Action::pass1(Table *tbl) { + // The compiler generates all action handles which must be specified in the + // assembly, if not we throw an error. + if ((handle == 0) && tbl->needs_handle()) { + error(lineno, "No action handle specified for table - %s, action - %s", tbl->name(), + name.c_str()); + } + + if (tbl->needs_next()) { + check_next(tbl); + } + + if (tbl->get_default_action() == name) { + if (!tbl->default_action_handle) tbl->default_action_handle = handle; + if (tbl->default_only_action) default_only = true; + } + /* SALU actions always have addr == -1 (so iaddr == -1) */ + int iaddr = -1; + bool shared_VLIW = false; + for (auto &inst : instr) { + inst.reset(inst.release()->pass1(tbl, this)); + if (inst->slot >= 0) { + if (slot_use[inst->slot]) + error(inst->lineno, "instruction slot %d used multiple times in action %s", + inst->slot, name.c_str()); + slot_use[inst->slot] = 1; + } + } + if (addr >= 0) { + if (auto old = tbl->stage->imem_addr_use[imem_thread(tbl->gress)][addr]) { + if (equivVLIW(old)) { + shared_VLIW = true; + } else { + error(lineno, "action instruction addr %d in use elsewhere", addr); + warning(old->lineno, "also defined here"); + } + } + tbl->stage->imem_addr_use[imem_thread(tbl->gress)][addr] = this; + iaddr = addr / ACTION_IMEM_COLORS; + } + if (!shared_VLIW) { + for (auto &inst : instr) { + if (inst->slot >= 0 && iaddr >= 0) { + if (tbl->stage->imem_use[iaddr][inst->slot]) + error(lineno, "action instruction slot %d.%d in use elsewhere", iaddr, + inst->slot); + tbl->stage->imem_use[iaddr][inst->slot] = 1; + } + } + } + for (auto &a : alias) { + while (alias.count(a.second.name) >= 1) { + // the alias refers to something else in the alias list + auto &rec = alias.find(a.second.name)->second; + if (rec.name == a.first) { + error(a.second.lineno, "recursive alias %s", a.first.c_str()); + break; + } + if (rec.lo > 0) { + a.second.lo += rec.lo; + if (a.second.hi >= 0) a.second.hi += rec.lo; + } + if (rec.hi > 0 && a.second.hi < 0) a.second.hi = rec.hi; + if (a.second.lo < rec.lo || (rec.hi >= 0 && a.second.hi > rec.hi)) { + error(a.second.lineno, + "alias for %s:%s(%d:%d) has out of range index from allowed %s:%s(%d:%d)", + a.first.c_str(), a.second.name.c_str(), a.second.lo, a.second.hi, + a.second.name.c_str(), rec.name.c_str(), rec.lo, rec.hi); + break; + } + a.second.name = rec.name; + } + if (auto *f = tbl->lookup_field(a.second.name, name)) { + if (a.second.hi < 0) a.second.hi = f->size - 1; + } else if (a.second.name == "hash_dist" && a.second.lo >= 0) { + // nothing to be done for now. lo..hi is the hash dist index rather than + // a bit index, which will cause problems if we want to later slice the alias + // to access only some bits of it. + } else { + error(a.second.lineno, "No field %s in table %s", a.second.to_string().c_str(), + tbl->name()); + } + } + // Update default value for params if default action parameters present + for (auto &p : p4_params_list) { + if (auto def_act_params = tbl->get_default_action_parameters()) { + if (def_act_params->count(p.name) > 0) { + p.default_value = (*def_act_params)[p.name]; + p.defaulted = true; + } + } + } + for (auto &c : attached) { + if (!c) { + error(c.lineno, "Unknown instruction or table %s", c.name.c_str()); + continue; + } + if (c->table_type() != COUNTER && c->table_type() != METER && c->table_type() != STATEFUL) { + error(c.lineno, "%s is not a counter, meter or stateful table", c.name.c_str()); + continue; + } + } +} + +/** + * Determines if the field, which has a particular range of bits in the format, is controlled + * by a conditional variable. This is required for context JSON information on parameters in + * the action data table pack format, or in the immediate fields: + * + * -is_mod_field_conditionally_value + * -mod_field_conditionally_mask_field_name + * + * @sa asm_output::EmitAction::mod_cond_value + */ +void Table::Actions::Action::check_conditional(Table::Format::Field &field) const { + bool found = false; + std::string condition; + for (auto kv : mod_cond_values) { + for (auto br : field.bits) { + auto overlap = kv.second[MC_ADT].getslice(br.lo, br.size()); + if (overlap.empty()) { + BUG_CHECK(!found || (found && condition != kv.first)); + } else if (overlap.popcount() == br.size()) { + if (found) { + BUG_CHECK(condition == kv.first); + } else { + found = true; + condition = kv.first; + } + } else { + BUG(); + } + } + } + if (found) { + field.conditional_value = true; + field.condition = condition; + } +} + +/** + * @sa Table::Actions::Action::check_conditional + */ +bool Table::Actions::Action::immediate_conditional(int lo, int sz, std::string &condition) const { + bool found = false; + for (auto kv : mod_cond_values) { + auto overlap = kv.second[MC_IMMED].getslice(lo, sz); + if (overlap.empty()) { + BUG_CHECK(!found || (found && condition != kv.first)); + } else { + if (found) { + BUG_CHECK(condition == kv.first); + } else if (overlap.popcount() == sz) { + found = true; + condition = kv.first; + } else { + BUG(); + } + } + } + return found; +} + +void Table::Actions::pass1(Table *tbl) { + for (auto &act : *this) { + act.pass1(tbl); + slot_use |= act.slot_use; + } +} + +std::map
> Table::find_pred_in_stage( + int stageno, const std::set &acts) { + std::map
> rv; + if (stage->stageno < stageno) return rv; + if (stage->stageno == stageno) { + rv[this].insert(acts.begin(), acts.end()); + } + for (auto &p : pred) { + for (auto &kv : p.first->find_pred_in_stage(stageno, p.second)) { + rv[kv.first].insert(kv.second.begin(), kv.second.end()); + } + } + for (auto *mt : get_match_tables()) { + if (mt != this) { + for (auto &kv : mt->find_pred_in_stage(stageno, acts)) { + rv[kv.first].insert(kv.second.begin(), kv.second.end()); + } + } + } + return rv; +} + +void Table::Actions::pass2(Table *tbl) { + /* We do NOT call this for SALU actions, so we can assume VLIW actions here */ + BUG_CHECK(tbl->table_type() != STATEFUL); + int code = tbl->get_gateway() ? 1 : 0; // if there's a gateway, reserve code 0 for a NOP + // to run when the gateway inhibits the table + + /* figure out how many codes we can encode in the match table(s), and if we need a distinct + * code for every action to handle next_table properly */ + int code_limit = 0x10000; + bool use_code_for_next = false; // true iff a table uses the action code for next table + // selection in addition to using it for the action instruction + + for (auto match : tbl->get_match_tables()) { + // action is currently a default keyword for the instruction address + auto instruction = match->instruction_call(); + auto fld = instruction.args[0].field(); + if (fld) { + code_limit = 1 << fld->size; + if (match->hit_next_size() > 1 && !match->lookup_field("next")) + use_code_for_next = true; + } else { + code_limit = code + 1; + } + } + + /* figure out if we need more codes than can fit in the action_instruction_adr_map. + * use code = -1 to signal that condition. */ + int non_nop_actions = by_code.size(); + // Check if a nop action is defined. The action will be empty (no + // instructions). By default we will use code '0' for nop action, unless + // compiler has assigned a different value. + int nop_code = 0; + for (auto &bc : by_code) { + if (bc.second->instr.empty()) nop_code = bc.first; + } + if (by_code.count(nop_code) && by_code.at(nop_code)->instr.empty()) { + --non_nop_actions; // don't count nop code action + code = 1; + } + for (auto &act : *this) { + if (act.default_only) continue; + if (act.instr.empty() && !use_code_for_next) + code = 1; // nop action -- use code 0 unless it needs to be used as next + else if (act.code < 0) + ++non_nop_actions; + } // FIXME -- should combine identical actions? + if (code + non_nop_actions > ACTION_INSTRUCTION_SUCCESSOR_TABLE_DEPTH) code = -1; + bool code0_is_noop = (code != 0); + + for (auto &act : *this) { + for (auto &inst : act.instr) inst->pass2(tbl, &act); + if (act.addr < 0) { + for (int i = 0; i < ACTION_IMEM_ADDR_MAX; i++) { + if (auto old = tbl->stage->imem_addr_use[imem_thread(tbl->gress)][i]) { + if (act.equivVLIW(old)) { + act.addr = i; + break; + } + continue; + } + if (tbl->stage->imem_use[i / ACTION_IMEM_COLORS].intersects(act.slot_use)) continue; + act.addr = i; + tbl->stage->imem_use[i / ACTION_IMEM_COLORS] |= act.slot_use; + tbl->stage->imem_addr_use[imem_thread(tbl->gress)][i] = &act; + break; + } + } + if (act.addr < 0) error(act.lineno, "Can't find an available instruction address"); + if (act.code < 0 && !act.default_only) { + if (code < 0 && !code_use[act.addr]) { + act.code = act.addr; + } else if (act.instr.empty() && !use_code_for_next && code0_is_noop) { + act.code = 0; + } else { + while (code >= 0 && code_use[code]) code++; + act.code = code; + } + } else if (code < 0 && act.code != act.addr && !act.default_only) { + error(act.lineno, + "Action code must be the same as action instruction address " + "when there are more than %d actions", + ACTION_INSTRUCTION_SUCCESSOR_TABLE_DEPTH); + if (act.code < 0) + warning(act.lineno, "Code %d is already in use by another action", act.addr); + } + if (act.code >= 0) { + by_code[act.code] = &act; + code_use[act.code] = true; + } + if (act.code >= code_limit) + error(act.lineno, + "Action code %d for %s too large for action specifier in " + "table %s", + act.code, act.name.c_str(), tbl->name()); + if (act.code > max_code) max_code = act.code; + } + actions.sort([](const value_type &a, const value_type &b) -> bool { + return a.second.code < b.second.code; + }); + if (!tbl->default_action.empty()) { + if (!exists(tbl->default_action)) { + error(tbl->default_action_lineno, "no action %s in table %s", + tbl->default_action.c_str(), tbl->name()); + } else { + auto &defact = actions.at(tbl->default_action); + if (!defact.default_allowed) { + // FIXME -- should be an error, but the compiler currently does this? + // FIXME -- see p4_16_programs_tna_lpm_match + warning(tbl->default_action_lineno, + "default action %s in table %s is not allowed " + "to be default?", + tbl->default_action.c_str(), tbl->name()); + defact.default_allowed = true; + } + } + } + auto pred = tbl->find_pred_in_stage(tbl->stage->stageno); + for (auto &p : pred) { + auto *actions = p.first->get_actions(); + if (!actions || actions == this) continue; + if (!slot_use.intersects(actions->slot_use)) continue; + for (auto &a1 : *this) { + bool first = false; + for (auto a2 : p.second) { + if (a1.slot_use.intersects(a2->slot_use)) { + if (!first) + warning(a1.lineno, + "Conflicting instruction slot usage for non-exlusive " + "table %s action %s", + tbl->name(), a1.name.c_str()); + first = true; + warning(a2->lineno, "and table %s action %s", p.first->name(), + a2->name.c_str()); + } + } + } + } +} + +void Table::Actions::stateful_pass2(Table *tbl) { + BUG_CHECK(tbl->table_type() == STATEFUL); + auto *stbl = tbl->to(); + for (auto &act : *this) { + if (act.code >= 4) { + error(act.lineno, "Only 4 actions in a stateful table"); + } else if (act.code >= 0) { + if (code_use[act.code]) { + error(act.lineno, "duplicate use of code %d in SALU", act.code); + warning(by_code[act.code]->lineno, "previous use here"); + } + by_code[act.code] = &act; + code_use[act.code] = true; + } + if (act.code == 3 && stbl->clear_value) + error(act.lineno, "Can't use SALU action 3 with a non-zero clear value"); + for (const auto &inst : act.instr) inst->pass2(tbl, &act); + } + if (stbl->clear_value) code_use[3] = true; + for (auto &act : *this) { + if (act.code < 0) { + if ((act.code = code_use.ffz(0)) >= 4) { + error(act.lineno, "Only 4 actions in a stateful table"); + break; + } + by_code[act.code] = &act; + code_use[act.code] = true; + } + } +} + +template +void Table::Actions::write_regs(REGS ®s, Table *tbl) { + for (auto &act : *this) { + LOG2("# action " << act.name << " code=" << act.code << " addr=" << act.addr); + tbl->write_action_regs(regs, &act); + for (const auto &inst : act.instr) inst->write_regs(regs, tbl, &act); + if (options.fill_noop_slot) { + for (auto slot : Phv::use(tbl->gress) - tbl->stage->imem_use_all()) { + auto tmp = VLIW::genNoopFill(tbl, &act, options.fill_noop_slot, slot); + tmp->pass1(tbl, &act); + tmp->pass2(tbl, &act); + tmp->write_regs(regs, tbl, &act); + } + } + } +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void Table::Actions::write_regs, mau_regs &, + Table *) + +/** + * Indirect Counters, Meters, and Stateful Alus can be addressed in many different ways, e.g. + * Hash Distribution, Overhead Index, Stateful Counter, Constant, etc. + * + * The indexing can be different per individual action. Say one action always uses an indirect + * address, while another one uses a constant. The driver has to know where to put that + * constant into the RAM line. + * + * Also, say an address is from hash, but can have multiple meter types. By using the override + * address of an action, when that action is programmed, the meter type written in overhead will + * be determined by the overhead address. + * + * override_addr - a boolean of whether to use the override value for these parameters. + * This is enabled if the address does not come from overhead. + * + * Override_addr_pfe - Not actually useful, given the override_full_addr contains the per flow + * enable bit + * + * Override_full_addr - the constant value to be written directly into the corresponding bit + * positions in the RAM line + */ +static void gen_override(json::map &cfg, const Table::Call &att) { + auto type = att->table_type(); + // Direct tables currently don't require overrides + // FIXME: Corner cases where miss actions do not use the stateful object should have + // an override of all 0 + if (att->to()->is_direct()) return; + std::string base; + bool override_addr = false; + bool override_addr_pfe = false; + unsigned override_full_addr = 0; + switch (type) { + case Table::COUNTER: + base = "override_stat"; + break; + case Table::METER: + base = "override_meter"; + break; + case Table::STATEFUL: + base = "override_stateful"; + break; + default: + error(att.lineno, "unsupported table type in action call"); + } + // Always true if the call is provided + override_addr_pfe = true; + override_full_addr |= 1U << (type == Table::COUNTER ? STATISTICS_PER_FLOW_ENABLE_START_BIT + : METER_PER_FLOW_ENABLE_START_BIT); + int idx = -1; + for (auto &arg : att.args) { + ++idx; + if (arg.type == Table::Call::Arg::Name) { + if (strcmp(arg.name(), "$hash_dist") == 0 || + strcmp(arg.name(), "$stful_counter") == 0) { + override_addr = true; + } else if (auto *st = att->to()) { + if (auto *act = st->actions->action(arg.name())) { + override_full_addr |= 1 << METER_TYPE_START_BIT; + override_full_addr |= act->code << (METER_TYPE_START_BIT + 1); + } + } + // FIXME -- else assume its a reference to a format field, so doesn't need to + // FIXME -- be in the override. Should check that somewhere, but need access + // FIXME -- to the match_table to do it here. + } else if (arg.type == Table::Call::Arg::Const) { + if (idx == 0 && att.args.size() > 1) { + // The first argument for meters/stateful is the meter type + override_full_addr |= arg.value() << METER_TYPE_START_BIT; + } else { + override_full_addr |= arg.value() << att->address_shift(); + override_addr = true; + } + } else if (arg.type == Table::Call::Arg::Counter) { + // does not affect context json + } else { + error(att.lineno, "argument not a constant"); + } + } + cfg[base + "_addr"] = override_addr; + cfg[base + "_addr_pfe"] = override_addr ? override_addr_pfe : false; + cfg[base + "_full_addr"] = override_addr ? override_full_addr : 0; +} + +bool Table::Actions::Action::is_color_aware() const { + for (auto &att : attached) { + if (att->table_type() != Table::METER) continue; + if (att.args.size() < 2) continue; + auto type_arg = att.args[0]; + if (type_arg.type == Table::Call::Arg::Const && type_arg.value() == METER_COLOR_AWARE) + return true; + } + return false; +} + +void Table::Actions::Action::check_and_add_resource(json::vector &resources, + json::map &resource) const { + // Check if resource already exists in the json::vector. For tables + // spanning multiple stages, the same resource gets added as an attached + // resource for every stage. To avoid duplication only add when not + // present in the resource array + bool found = false; + for (auto &r : resources) { + if (resource == r->to()) { + found = true; + break; + } + } + if (!found) resources.push_back(std::move(resource)); +} + +void Table::Actions::Action::add_direct_resources(json::vector &direct_resources, + const Call &att) const { + json::map direct_resource; + direct_resource["resource_name"] = att->p4_name(); + direct_resource["handle"] = att->handle(); + check_and_add_resource(direct_resources, direct_resource); +} + +void Table::Actions::Action::add_indirect_resources(json::vector &indirect_resources, + const Call &att) const { + auto addr_arg = att.args.back(); + json::map indirect_resource; + if (addr_arg.type == Table::Call::Arg::Name) { + auto *p = has_param(addr_arg.name()); + if (p) { + indirect_resource["access_mode"] = "index"; + indirect_resource["parameter_name"] = p->name; + indirect_resource["parameter_index"] = p->position; + } else { + return; + } + } else if (addr_arg.type == Table::Call::Arg::Const) { + indirect_resource["access_mode"] = "constant"; + indirect_resource["value"] = addr_arg.value(); + } else { + return; + } + indirect_resource["resource_name"] = att->p4_name(); + indirect_resource["handle"] = att->handle(); + check_and_add_resource(indirect_resources, indirect_resource); +} + +void Table::Actions::gen_tbl_cfg(json::vector &actions_cfg) const { + for (auto &act : *this) { + // Use action node if it already exists in json + bool act_json_present = false; + json::map *action_ptr = nullptr; + for (auto &_action_o : actions_cfg) { + auto &_action = _action_o->to(); + if (_action["name"] == act.name) { + action_ptr = &_action; + act_json_present = true; + break; + } + } + if (!act_json_present) action_ptr = new json::map(); + json::map &action_cfg = *action_ptr; + + action_cfg["name"] = act.name; + action_cfg["handle"] = act.handle; // FIXME-JSON + if (act.instr.empty() || action_cfg.count("primitives") == 0) + action_cfg["primitives"] = json::vector(); + auto &direct_resources = action_cfg["direct_resources"] = json::vector(); + auto &indirect_resources = action_cfg["indirect_resources"] = json::vector(); + for (auto &att : act.attached) { + if (att.is_direct_call()) + act.add_direct_resources(direct_resources, att); + else + act.add_indirect_resources(indirect_resources, att); + } + if (!act.hit_allowed && !act.default_allowed) + error(act.lineno, "Action %s must be allowed to be hit and/or default action.", + act.name.c_str()); + action_cfg["allowed_as_hit_action"] = act.hit_allowed; + // TODO: allowed_as_default_action info is directly passed through assembly + // This will be 'false' for following conditions: + // 1. Action requires hardware in hit path i.e. hash distribution or + // random number generator + // 2. There is a default action declared constant in program which + // implies all other actions cannot be set to default + action_cfg["allowed_as_default_action"] = act.default_allowed; + // TODO: "disallowed_as_default_action" is not used by driver. + // Keeping it here as debugging info. Will be set to "none", + // "has_const_default", "has_hash_dist". Once rng support is added + // to the compiler this must reflect "has_rng" or similar string. + if (!act.default_allowed) + action_cfg["disallowed_as_default_action_reason"] = act.default_disallowed_reason; + // TODO: Need to be set through assembly + action_cfg["is_compiler_added_action"] = false; + action_cfg["constant_default_action"] = act.is_constant; + + // TODO: These will be set to 'true' & "" for a keyless table to + // allow any action to be set as default by the control plane + // Exception is TernaryIndirectTables which dont have params list as they are on the main + // TernaryMatchTable, hence check for match_table to query params list + if (table->get_match_table()->p4_params_list.empty()) { + action_cfg["allowed_as_default_action"] = true; + action_cfg["disallowed_as_default_action_reason"] = ""; + } + + json::vector &p4_params = action_cfg["p4_parameters"] = json::vector(); + act.add_p4_params(p4_params); + action_cfg["override_meter_addr"] = false; + action_cfg["override_meter_addr_pfe"] = false; + action_cfg["override_meter_full_addr"] = 0; + action_cfg["override_stat_addr"] = false; + action_cfg["override_stat_addr_pfe"] = false; + action_cfg["override_stat_full_addr"] = 0; + action_cfg["override_stateful_addr"] = false; + action_cfg["override_stateful_addr_pfe"] = false; + action_cfg["override_stateful_full_addr"] = 0; + for (auto &att : act.attached) gen_override(action_cfg, att); + action_cfg["is_action_meter_color_aware"] = act.is_color_aware(); + if (act.context_json) action_cfg.merge(*act.context_json.get()); + if (!act_json_present) actions_cfg.push_back(std::move(action_cfg)); + } +} + +/** + * For action data tables, the entirety of the action configuration is not necessary, as the + * information is per match table, not per action data table. The only required parameters + * are the name, handle, and p4_parameters + * + * Even at some point, even actions that have the different p4_parameters could even share a + * member, if for example, one of the parameters is not stored in the action data table, + * but rather as an index for a counter/meter etc. The compiler/driver do not have support for + * this yet. + */ +void Table::Actions::Action::gen_simple_tbl_cfg(json::vector &actions_cfg) const { + json::map action_cfg; + action_cfg["name"] = name; + action_cfg["handle"] = handle; + json::vector &p4_params = action_cfg["p4_parameters"] = json::vector(); + add_p4_params(p4_params, false); + actions_cfg.push_back(std::move(action_cfg)); +} + +void Table::Actions::Action::add_p4_params(json::vector &cfg, bool include_default) const { + unsigned start_bit = 0; + for (auto &a : p4_params_list) { + json::map param; + param["name"] = a.name; + param["start_bit"] = start_bit; + param["position"] = a.position; + if (include_default && a.defaulted) param["default_value"] = a.default_value; + param["bit_width"] = a.bit_width; + if (a.context_json) param.merge(*a.context_json.get()); + cfg.push_back(std::move(param)); + start_bit += a.bit_width; + } +} + +void Table::Actions::add_p4_params(const Action &act, json::vector &cfg) const { + int index = 0; + unsigned start_bit = 0; + // Add p4 params if present. This will add params even if the action is + // otherwise empty. Driver will always generate an action spec if p4_params + // are present for an action + for (auto &a : act.p4_params_list) { + json::map param; + param["name"] = a.name; + param["start_bit"] = start_bit; + param["position"] = a.position; + if (a.defaulted) param["default_value"] = a.default_value; + param["bit_width"] = a.bit_width; + cfg.push_back(std::move(param)); + start_bit += a.bit_width; + } +} + +void Table::Actions::add_action_format(const Table *table, json::map &tbl) const { + json::vector &action_format = tbl["action_format"] = json::vector(); + for (auto &act : *this) { + json::map action_format_per_action; + unsigned next_table = -1; + + std::string next_table_name = "--END_OF_PIPELINE--"; + if (!act.default_only) { + if (act.next_table_encode >= 0) { + next_table = static_cast(act.next_table_encode); + } else { + // The RAM value is only 8 bits, for JBay must be solved by table placement + next_table = act.next_table_ref.next_table_id() & 0xff; + next_table_name = act.next_table_ref.next_table_name(); + if (next_table_name == "END") next_table_name = "--END_OF_PIPELINE--"; + } + } + unsigned next_table_full = act.next_table_miss_ref.next_table_id(); + + /** + * This following few fields are required on a per stage table action basis. + * The following information is: + * + * - next_table - The value that will be written into the next field RAM line on a hit, + * when the entry is specified with this action. This is either an index into + * the next_table_map_en (if that map is enabled), or the 8 bit next table value. + * + * - next_table_full - The value that will be written into the miss register for next + * table (next_table_format_data.match_next_table_adr_miss_value), if this action + * is set as the default action. This is the full 8 bit (9 bit for JBay) next + * table. + * + * - vliw_instruction - The value that will be written into the action instruction RAM + * entry when the entry is specified with this action. This is either an index + * into into the 8 entry table mau_action_instruction_adr_map_data, if that is + * enabled, or the full word instruction + * + * - vliw_instruction_full - The value that will written into the miss register for + * action_instruction (mau_action_instruction_adr_miss_value), when this + * action is specified as the default action. The full address with the PFE + * bit enabled. + */ + action_format_per_action["action_name"] = act.name; + action_format_per_action["action_handle"] = act.handle; + action_format_per_action["table_name"] = next_table_name; + action_format_per_action["next_table"] = next_table; + action_format_per_action["next_table_full"] = next_table_full; + if (Target::LONG_BRANCH_TAGS() > 0 && !options.disable_long_branch) { + if (Target::NEXT_TABLE_EXEC_COMBINED()) { + action_format_per_action["next_table_exec"] = + ((act.next_table_miss_ref.next_in_stage(table->stage->stageno) & 0xfffe) + << 15) + + (act.next_table_miss_ref.next_in_stage(table->stage->stageno + 1) & 0xffff); + } else { + action_format_per_action["next_table_local_exec"] = + act.next_table_miss_ref.next_in_stage(table->stage->stageno) >> 1; + action_format_per_action["next_table_global_exec"] = + act.next_table_miss_ref.next_in_stage(table->stage->stageno + 1); + } + action_format_per_action["next_table_long_brch"] = + act.next_table_miss_ref.long_branch_tags(); + } + action_format_per_action["vliw_instruction"] = act.code; + action_format_per_action["vliw_instruction_full"] = + ACTION_INSTRUCTION_ADR_ENABLE | act.addr; + + json::vector &next_tables = action_format_per_action["next_tables"] = json::vector(); + for (auto n : act.next_table_ref) { + auto nP4Name = n->p4_name(); + // Gateway next tables dont have a p4 Name + if (nP4Name == nullptr) { + nP4Name = n.name.c_str(); + } + next_tables.push_back( + json::map{{"next_table_name", json::string(nP4Name)}, + {"next_table_logical_id", json::number(n->logical_id)}, + {"next_table_stage_no", json::number(n->stage->stageno)}}); + } + json::vector &action_format_per_action_imm_fields = + action_format_per_action["immediate_fields"] = json::vector(); + for (auto &a : act.alias) { + json::string name = a.first; + int lo = remove_name_tail_range(name); + json::string immed_name = a.second.name; + if (immed_name != "immediate") continue; // output only immediate fields + if (!(act.has_param(name) || a.second.is_constant)) + continue; // and fields that are parameters or constants + json::map action_format_per_action_imm_field; + action_format_per_action_imm_field["param_name"] = name; + action_format_per_action_imm_field["param_type"] = "parameter"; + if (a.second.is_constant) { + action_format_per_action_imm_field["param_type"] = "constant"; + action_format_per_action_imm_field["const_value"] = a.second.value; + action_format_per_action_imm_field["param_name"] = + "constant_" + std::to_string(a.second.value); + } + action_format_per_action_imm_field["param_shift"] = lo; + action_format_per_action_imm_field["dest_start"] = a.second.lo; + action_format_per_action_imm_field["dest_width"] = a.second.size(); + std::string condition; + if (act.immediate_conditional(a.second.lo, a.second.size(), condition)) { + action_format_per_action_imm_field["is_mod_field_conditionally_value"] = true; + action_format_per_action_imm_field["mod_field_conditionally_mask_field_name"] = + condition; + } + action_format_per_action_imm_fields.push_back( + std::move(action_format_per_action_imm_field)); + } + action_format.push_back(std::move(action_format_per_action)); + } +} + +std::ostream &operator<<(std::ostream &out, const Table::Actions::Action::alias_t &a) { + out << "(" << a.name << ", lineno = " << a.lineno << ", lo = " << a.lo << ", hi = " << a.hi + << ", is_constant = " << a.is_constant << ", value = 0x" << std::hex << a.value << std::dec + << ")"; + return out; +} + +std::ostream &operator<<(std::ostream &out, const Table::Actions::Action &a) { + out << a.name << "("; + auto indent = a.name.length() + 10; + for (auto &p : a.p4_params_list) out << p << std::endl << std::setw(indent); + out << ")"; + return out; +} + +std::ostream &operator<<(std::ostream &out, const Table::p4_param &p) { + out << p.name << "[ w =" << p.bit_width << ", w_full =" << p.bit_width_full + << ", start_bit =" << p.start_bit << ", mask = 0x" << p.mask << ", position =" << p.position + << ", default_value =" << p.default_value << ", defaulted =" << p.defaulted + << ", is_valid =" << p.is_valid << ", type =" << p.type << ", alias =" << p.alias + << ", key_name =" << p.key_name << "]"; + return out; +} + +void Table::Actions::add_immediate_mapping(json::map &tbl) { + for (auto &act : *this) { + if (act.alias.empty()) continue; + json::vector &map = tbl["action_to_immediate_mapping"][act.name]; + for (auto &a : act.alias) { + json::string name = a.first; + json::string immed_name = a.second.name; + if (immed_name == "immediate") immed_name = "--immediate--"; + int lo = remove_name_tail_range(name); + map.push_back(json::vector{json::map{ + {"name", std::move(name)}, + {"parameter_least_significant_bit", json::number(lo)}, + {"parameter_most_significant_bit", json::number(lo + a.second.hi - a.second.lo)}, + {"immediate_least_significant_bit", json::number(a.second.lo)}, + {"immediate_most_significant_bit", json::number(a.second.hi)}, + {"field_called", std::move(immed_name)}}}); + } + } +} + +template +void Table::write_mapram_regs(REGS ®s, int row, int col, int vpn, int type) { + auto &mapram_config = regs.rams.map_alu.row[row].adrmux.mapram_config[col]; + // auto &mapram_ctl = map_alu_row.adrmux.mapram_ctl[col]; + mapram_config.mapram_type = type; + mapram_config.mapram_logical_table = logical_id; + mapram_config.mapram_vpn_members = 0; + if (!options.match_compiler) // FIXME -- glass doesn't set this? + mapram_config.mapram_vpn = vpn; + if (gress == INGRESS) + mapram_config.mapram_ingress = 1; + else + mapram_config.mapram_egress = 1; + mapram_config.mapram_enable = 1; + mapram_config.mapram_ecc_check = 1; + mapram_config.mapram_ecc_generate = 1; + if (gress) regs.cfg_regs.mau_cfg_mram_thread[col / 3U] |= 1U << (col % 3U * 8U + row); +} +FOR_ALL_REGISTER_SETS(INSTANTIATE_TARGET_TEMPLATE, void Table::write_mapram_regs, mau_regs &, int, + int, int, int) + +HashDistribution *Table::find_hash_dist(int unit) { + for (auto &hd : hash_dist) + if (hd.id == unit) return &hd; + for (auto t : get_match_tables()) + for (auto &hd : t->hash_dist) + if (hd.id == unit) return &hd; + if (auto *a = get_attached()) + for (auto &call : a->meters) + for (auto &hd : call->hash_dist) + if (hd.id == unit) return &hd; + return nullptr; +} + +int Table::find_on_actionbus(const char *name, TableOutputModifier mod, int lo, int hi, int size, + int *len) { + return action_bus ? action_bus->find(name, mod, lo, hi, size, len) : -1; +} + +void Table::need_on_actionbus(Table *att, TableOutputModifier mod, int lo, int hi, int size) { + if (!action_bus) action_bus = ActionBus::create(); + action_bus->need_alloc(this, att, mod, lo, hi, size); +} + +int Table::find_on_actionbus(const ActionBusSource &src, int lo, int hi, int size, int pos) { + return action_bus ? action_bus->find(src, lo, hi, size, pos) : -1; +} + +void Table::need_on_actionbus(const ActionBusSource &src, int lo, int hi, int size) { + if (!action_bus) action_bus = ActionBus::create(); + action_bus->need_alloc(this, src, lo, hi, size); +} + +int Table::find_on_ixbar(Phv::Slice sl, InputXbar::Group group, InputXbar::Group *found) { + for (auto &ixb : input_xbar) { + if (auto *i = ixb->find(sl, group, found)) { + unsigned bit = (i->lo + sl.lo - i->what->lo); + BUG_CHECK(bit < 128); + return bit / 8; + } + } + if (group.index >= 0) { + for (auto *in : stage->ixbar_use[group]) { + if (auto *i = in->find(sl, group)) { + unsigned bit = (i->lo + sl.lo - i->what->lo); + BUG_CHECK(bit < 128); + return bit / 8; + } + } + } else { + for (auto &g : Keys(stage->ixbar_use)) { + if (g.type != group.type) continue; + int t; + if ((t = find_on_ixbar(sl, g)) >= 0) { + if (found) *found = g; + return t; + } + } + } + return -1; +} + +int Table::json_memunit(const MemUnit &r) const { + if (r.stage >= 0) { + return r.stage * Target::SRAM_STRIDE_STAGE() + r.row * Target::SRAM_STRIDE_ROW() + + r.col * Target::SRAM_STRIDE_COLUMN(); + } else if (r.row >= 0) { + // per-stage physical sram + return r.row * Target::SRAM_UNITS_PER_ROW() + r.col; + } else { + // lamb + return r.col; + } +} + +std::unique_ptr Table::gen_memory_resource_allocation_tbl_cfg( + const char *type, const std::vector &layout, bool skip_spare_bank) const { + int width, depth, period; + const char *period_name; + // FIXME -- calling vpn_params here is only valid when layout == this->layout, but we also + // FIXME -- get here for color_maprams. It works out as we don't use depth or width, only + // FIXME -- period, which will always be 1 for meter layout or color_maprams + vpn_params(width, depth, period, period_name); + json::map mra; + mra["memory_type"] = type; + std::vector> mem_units; + json::vector &mem_units_and_vpns = mra["memory_units_and_vpns"] = json::vector(); + int vpn_ctr = 0; + bool no_vpns = false; + int spare_vpn; + std::vector spare_mem; + + // Retrieve the Spare banks + // skip_spare_bank is only false on tables don't have spare banks, or when building + // memory_units json for map rams + if (skip_spare_bank) { + BUG_CHECK(&layout == &this->layout, "layout not matching"); + spare_mem = determine_spare_bank_memory_units(); + BUG_CHECK(!spare_mem.empty(), "No spare banks in %s?", name()); + // if all the mems are "spare" this is really a DDP table, so we want to + // put the usits/vpns of the spares in the memory_units json + if (spare_mem.size() == layout_size()) skip_spare_bank = false; + } else if (&layout == &this->layout) { + BUG_CHECK(determine_spare_bank_memory_units().empty(), + "%s has spare banks, but we're not skipping them?", name()); + } + + for (auto &row : layout) { + int word = row.word >= 0 ? row.word : 0; + auto vpn_itr = row.vpns.begin(); + for (auto &ram : row.memunits) { + BUG_CHECK(ram.row == row.row, "bogus %s in row %d", ram.desc(), row.row); + if (vpn_itr == row.vpns.end()) + no_vpns = true; + else + vpn_ctr = *vpn_itr++; + if (size_t(vpn_ctr) >= mem_units.size()) mem_units.resize(vpn_ctr + 1); + // Create a vector indexed by vpn no where each element is a map + // having a RAM entry indexed by word number + // VPN WORD RAM + // 0 -> 0 90 + // 1 91 + // 1 -> 0 92 + // 1 93 + // E.g. VPN 0 has Ram 90 with word 0 and Ram 91 with word 1 + int unit = json_memunit(ram); + if (skip_spare_bank && + std::find(spare_mem.begin(), spare_mem.end(), unit) != spare_mem.end()) + continue; + mem_units[vpn_ctr][word] = json_memunit(ram); + } + } + if (mem_units.size() == 0) return nullptr; + int vpn = 0; + for (auto &mem_unit : mem_units) { + json::vector mem; + // Below for loop orders the mem unit as { .., word1, word0 } which is + // assumed to be what driver expects. + for (int word = mem_unit.size() - 1; word >= 0; word--) { + for (auto m : mem_unit) { + if (m.first == word) { + mem.push_back(m.second); + break; + } + } + } + if (mem.size() != 0) { + json::map tmp; + tmp["memory_units"] = std::move(mem); + json::vector vpns; + if (no_vpns) + vpns.push_back(nullptr); + else + vpns.push_back(vpn); + tmp["vpns"] = std::move(vpns); + mem_units_and_vpns.push_back(std::move(tmp)); + } + vpn++; + } + if (skip_spare_bank && spare_mem.size() != 0) { + if (spare_mem.size() == 1) { + mra["spare_bank_memory_unit"] = spare_mem[0]; + } else { + json::vector &spare = mra["spare_bank_memory_unit"]; + for (auto u : spare_mem) spare.push_back(u); + } + } + return json::mkuniq(std::move(mra)); +} + +json::map *Table::base_tbl_cfg(json::vector &out, const char *type, int size) const { + auto tbl = p4_table->base_tbl_cfg(out, size, this); + if (context_json) add_json_node_to_table(*tbl, "user_annotations"); + return tbl; +} + +json::map *Table::add_stage_tbl_cfg(json::map &tbl, const char *type, int size) const { + json::vector &stage_tables = tbl["stage_tables"]; + json::map stage_tbl; + stage_tbl["stage_number"] = stage->stageno; + stage_tbl["size"] = size; + stage_tbl["stage_table_type"] = type; + stage_tbl["logical_table_id"] = logical_id; + if (physical_ids) { + // this is only used by the driver to set miss entry imem/iad/next, so it should + // not matter which physical table it is set on if there are multiple + stage_tbl["physical_table_id"] = *physical_ids.begin(); + } + + if (this->to()) { + stage_tbl["has_attached_gateway"] = false; + if (get_gateway()) stage_tbl["has_attached_gateway"] = true; + } + if (!strcmp(type, "selection") && get_stateful()) + tbl["bound_to_stateful_table_handle"] = get_stateful()->handle(); + if (Target::SUPPORT_ALWAYS_RUN() && (this->to() || this->to())) + stage_tbl["always_run"] = is_always_run(); + + stage_tables.push_back(std::move(stage_tbl)); + return &(stage_tables.back()->to()); +} + +/** + * One can no longer use whether the table is directly or indirectly addressed on whether + * a table is referenced that way. This is due to the corner case on hash action tables + * For a hash action table, an attached table that was previously directly addressed is now + * addressed by hash. However, for the driver, the driver must know which tables used to be + * directly addressed vs. an attached table that is addressed by a hash based index. + * + * Thus, for those corner cases, a how_referenced in the p4 tag of the attached table is + * currently provided. Really for an attached table in hardware, it has no sense of how the + * table is addressed, as it only receives an address, so if somehow two tables, where one was + * direct while another was indirect (which is theoretically supportable if a hash action direct + * counter is shared), would break this parameter. + * + * However, for the moment, there are no realistic attached table not either directly or indirectly + * referenced + * + * If we need to change this, this was the delineation for how this was determined in match tables: + * + * In the call for both of these examples, the address field is a hash_dist object, as this is + * necessary for the set up of the address. This call, unlike every other type table, cannot + * be the place where the address is determined. + * + * Instead, the attached calls in the action is how the assembler can delineate whether the + * reference table is direct or indirect. If the address argument is $DIRECT, then the direct + * table has been converted to a hash, however if the argument is $hash_dist, then the original + * call was from a hash-based index, and is indirect + */ +void Table::add_reference_table(json::vector &table_refs, const Table::Call &c) const { + if (c) { + auto t_name = c->name(); + if (c->p4_table) { + t_name = c->p4_table->p4_name(); + if (!t_name) { + error(-1, "No p4 table name found for table : %s", c->name()); + return; + } + } + // Dont add ref table if already present in table_refs vector + for (auto &tref : table_refs) { + auto tref_name = tref->to()["name"]; + if (!strcmp(tref_name->as_string()->c_str(), t_name)) return; + } + json::map table_ref; + std::string hr = c->to()->how_referenced(); + if (hr.empty()) hr = c->to()->is_direct() ? "direct" : "indirect"; + table_ref["how_referenced"] = hr; + table_ref["handle"] = c->handle(); + table_ref["name"] = t_name; + auto mtr = c->to(); + if (mtr && mtr->uses_colormaprams()) { + BUG_CHECK(mtr->color_mapram_addr != MeterTable::NO_COLOR_MAP, + "inconsistent color mapram address bus for %s", mtr->name()); + table_ref["color_mapram_addr_type"] = + mtr->color_mapram_addr == MeterTable::IDLE_MAP_ADDR ? "idle" : "stats"; + } + + table_refs.push_back(std::move(table_ref)); + } +} + +bool Table::is_directly_referenced(const Table::Call &c) const { + if (c) { + std::string hr = c->to()->how_referenced(); + if (hr.empty()) { + if (c->to()->is_direct()) return true; + } + } + return false; +} + +json::map &Table::add_pack_format(json::map &stage_tbl, int memword, int words, int entries) const { + json::map pack_fmt; + pack_fmt["table_word_width"] = memword * words; + pack_fmt["memory_word_width"] = memword; + if (entries >= 0) pack_fmt["entries_per_table_word"] = entries; + pack_fmt["number_memory_units_per_table_word"] = words; + json::vector &pack_format = stage_tbl["pack_format"]; + pack_format.push_back(std::move(pack_fmt)); + return pack_format.back()->to(); +} + +void Table::canon_field_list(json::vector &field_list) const { + for (auto &field_ : field_list) { + auto &field = field_->to(); + auto &name = field["field_name"]->to(); + if (int lo = remove_name_tail_range(name)) field["start_bit"]->to().val += lo; + } +} + +std::vector Table::get_calls() const { + std::vector rv; + if (action) rv.emplace_back(action); + if (instruction) rv.emplace_back(instruction); + return rv; +} + +/** + * Determines both the start bit and the source name in the context JSON node for a particular + * field. Do not like string matching, and this should potentially be determined by looking + * through a list of fields, but this will work in the short term + */ +void Table::get_cjson_source(const std::string &field_name, std::string &source, + int &start_bit) const { + source = "spec"; + if (field_name == "hash_group") { + source = "proxy_hash"; + } else if (field_name == "version") { + source = "version"; + } else if (field_name == "immediate") { + source = "immediate"; + } else if (field_name == "action") { + source = "instr"; + } else if (field_name == "next") { + source = "next_table"; + } else if (field_name == "action_addr") { + source = "adt_ptr"; + if (auto adt = action->to()) start_bit = std::min(5U, adt->get_log2size() - 2); + } else if (field_name == "counter_addr") { + source = "stats_ptr"; + auto a = get_attached(); + if (a && a->stats.size() > 0) { + auto s = a->stats[0]; + start_bit = s->address_shift(); + } + } else if (field_name == "counter_pfe") { + source = "stats_ptr"; + start_bit = STATISTICS_PER_FLOW_ENABLE_START_BIT; + } else if (field_name == "meter_addr") { + if (auto m = get_meter()) { + source = "meter_ptr"; + start_bit = m->address_shift(); + } else if (auto s = get_selector()) { + source = "sel_ptr"; + start_bit = s->address_shift(); + } else if (auto s = get_stateful()) { + source = "stful_ptr"; + start_bit = s->address_shift(); + } else { + error(lineno, "Table %s has a meter_addr but no attached meter", name()); + } + } else if (field_name == "meter_pfe") { + if (get_meter()) { + source = "meter_ptr"; + } else if (get_selector()) { + source = "sel_ptr"; + } else if (get_stateful()) { + source = "stful_ptr"; + } else { + error(lineno, "Table %s has a meter_pfe but no attached meter", name()); + } + start_bit = METER_PER_FLOW_ENABLE_START_BIT; + } else if (field_name == "meter_type") { + if (get_meter()) + source = "meter_ptr"; + else if (get_selector()) + source = "sel_ptr"; + else if (get_stateful()) + source = "stful_ptr"; + else + error(lineno, "Table %s has a meter_type but no attached meter", name()); + start_bit = METER_TYPE_START_BIT; + } else if (field_name == "sel_len_mod") { + source = "selection_length"; + } else if (field_name == "sel_len_shift") { + source = "selection_length_shift"; + } else if (field_name == "valid") { + source = "valid"; + } +} + +/** + * Adds a field into the format of either a match or action table. Honestly, this is used + * for both action data tables and match tables, and this should be split up into two + * separate functions, as the corner casing for these different cases can be quite different + * and lead to some significant confusion + */ +void Table::add_field_to_pack_format(json::vector &field_list, unsigned basebit, std::string name, + const Table::Format::Field &field, + const Table::Actions::Action *act) const { + decltype(act->reverse_alias()) aliases; + if (act) aliases = act->reverse_alias(); + auto alias = get(aliases, name); + + // we need to add only those aliases that are parameters, and there can be multiple + // such fields that contain slices of one or more other aliases + // FIXME: why aren't we de-aliasing in setup? + for (auto a : alias) { + json::string param_name = a->first; + int lo = remove_name_tail_range(param_name); + if (act->has_param(param_name) || a->second.is_constant) { + auto newField = field; + if (a->second.hi != -1) { + unsigned fieldSize = a->second.hi - a->second.lo + 1; + if (field.bits.size() > 1) warning(0, "multiple bit ranges for %s", name.c_str()); + newField = + Table::Format::Field(field.fmt, fieldSize, a->second.lo + field.bits[0].lo, + static_cast(field.flags)); + } + act->check_conditional(newField); + + if (a->second.is_constant) + output_field_to_pack_format(field_list, basebit, a->first, "constant", 0, newField, + a->second.value); + else + output_field_to_pack_format(field_list, basebit, a->first, "spec", 0, newField); + } + } + + // Determine the source of the field. If called recursively for an alias, + // act will be a nullptr + std::string source = ""; + int start_bit = 0; + if (!act) get_cjson_source(name, source, start_bit); + + if (field.flags == Format::Field::ZERO) source = "zero"; + + if (source != "") + output_field_to_pack_format(field_list, basebit, name, source, start_bit, field); + + // Convert fields with slices embedded in the name, eg. "foo.bar[4:0]", to + // slice-free field names with the start_bit incremented by the low bit of + // the slice. + canon_field_list(field_list); +} + +void Table::output_field_to_pack_format(json::vector &field_list, unsigned basebit, + std::string name, std::string source, unsigned start_bit, + const Table::Format::Field &field, unsigned value) const { + unsigned add_width = 0; + bool pfe_enable = false; + unsigned indirect_addr_start_bit = 0; + int lobit = 0; + for (auto &bits : field.bits) { + json::map field_entry; + field_entry["start_bit"] = lobit + start_bit; + field_entry["field_width"] = bits.size() + add_width; + field_entry["lsb_mem_word_idx"] = bits.lo / MEM_WORD_WIDTH; + field_entry["msb_mem_word_idx"] = bits.hi / MEM_WORD_WIDTH; + field_entry["source"] = json::string(source); + field_entry["enable_pfe"] = false; + if (source == "constant") { + field_entry["const_tuples"] = + json::vector{json::map{{"dest_start", json::number(0)}, + {"value", json::number(value)}, + {"dest_width", json::number(bits.size())}}}; + } + field_entry["lsb_mem_word_offset"] = basebit + (bits.lo % MEM_WORD_WIDTH); + field_entry["field_name"] = json::string(name); + field_entry["global_name"] = json::string(""); + + if (field.conditional_value) { + field_entry["is_mod_field_conditionally_value"] = true; + field_entry["mod_field_conditionally_mask_field_name"] = json::string(field.condition); + } + // field_entry["immediate_name"] = json::string(immediate_name); + // if (this->to()) + if (this->to()) { + // FIXME-JSON : match_mode only matters for ATCAM's not clear if + // 'unused' or 'exact' is used by driver + std::string match_mode = "unused"; + // For version bits field match mode is set to "s1q0" (to match + // glass) + if (name == "version") match_mode = "s1q0"; + field_entry["match_mode"] = match_mode; + } + field_list.push_back(std::move(field_entry)); + lobit += bits.size(); + } +} + +void Table::add_zero_padding_fields(Table::Format *format, Table::Actions::Action *act, + unsigned format_width) const { + if (!format) return; + // For an action with no format pad zeros for action table size + unsigned pad_count = 0; + if (format->log2size == 0) { + if (auto at = this->to()) { + format->size = at->get_size(); + BUG_CHECK(format->size); + format->log2size = at->get_log2size(); + // For wide action formats, entries per word is 1, so plug in a + // single pad field of 256 bits + unsigned action_entries_per_word = std::max(1U, 128U / format->size); + // Add a flag type to specify padding? + Format::Field f(format, format->size, 0, Format::Field::ZERO); + for (unsigned i = 0; i < action_entries_per_word; i++) + format->add_field(f, "--padding--"); + } else { + error(lineno, + "Adding zero padding to a non action table " + "which has no action entries in format"); + } + return; + } + decltype(act->reverse_alias()) alias; + if (act) alias = act->reverse_alias(); + + // Determine the zero padding necessary by creating a bitvector that has all + // bits cleared, and then iterate through parameters and immediates and set the + // bits that are used. Create padding for the remaining bit ranges. + bitvec padbits; + padbits.clrrange(0, format_width - 1); + for (int entry = 0; entry < format->groups(); ++entry) { + for (auto &field : format->group(entry)) { + auto aliases = get(alias, field.first); + for (auto a : aliases) { + auto newField = field.second; + json::string param_name = a->first; + int lo = remove_name_tail_range(param_name); + if (act->has_param(param_name) || a->second.is_constant) { + auto newField = Table::Format::Field( + field.second.fmt, a->second.size(), a->second.lo + field.second.bits[0].lo, + static_cast(field.second.flags)); + newField.set_field_bits(padbits); + } + } + if (aliases.size() == 0) field.second.set_field_bits(padbits); + } + } + + int idx_lo = 0; + for (auto p : padbits) { + if (p > idx_lo) { + Format::Field f(format, p - idx_lo, idx_lo, Format::Field::ZERO); + std::string pad_name = + "--padding_" + std::to_string(idx_lo) + "_" + std::to_string(p - 1) + "--"; + format->add_field(f, pad_name); + } + idx_lo = p + 1; + } + if (idx_lo < int(format_width)) { + Format::Field f(format, format_width - idx_lo, idx_lo, Format::Field::ZERO); + std::string pad_name = + "--padding_" + std::to_string(idx_lo) + "_" + std::to_string(format_width - 1) + "--"; + format->add_field(f, pad_name); + } +} + +json::map &Table::add_pack_format(json::map &stage_tbl, Table::Format *format, bool pad_zeros, + bool print_fields, Table::Actions::Action *act) const { + // Add zero padding fields to format + // FIXME: Can this be moved to a format pass? + if (pad_zeros) + add_zero_padding_fields(format, act, format ? format->get_padding_format_width() : -1); + json::map pack_fmt; + auto mem_word_width = ram_word_width(); + pack_fmt["memory_word_width"] = mem_word_width; + auto table_word_width = format ? format->get_table_word_width() : ram_word_width(); + pack_fmt["table_word_width"] = table_word_width; + pack_fmt["entries_per_table_word"] = format ? format->get_entries_per_table_word() : 1; + pack_fmt["number_memory_units_per_table_word"] = + format ? format->get_mem_units_per_table_word() : 1; + + /** + * Entry number has to be unique for all tables. However, for ATCAM tables specifically, + * the entry with the highest priority starts at entry number 0. The priority decreases + * as the entry number increases. + * + * This is actually reversed in the hardware. The compiler format entries are in priority + * order in the hardware, and have been validated in validate_format. Thus, the context + * JSON is reversed. + */ + if (print_fields) { + BUG_CHECK(format); + int basebit = std::max(0, mem_word_width - (1 << format->log2size)); + json::vector &entry_list = pack_fmt["entries"]; + if (format->is_wide_format()) { + for (int i = format->groups() - 1; i >= 0; --i) { + int entry_number = i; + if (table_type() == ATCAM) entry_number = format->groups() - 1 - i; + json::vector field_list; + for (auto it = format->begin(i); it != format->end(i); ++it) + add_field_to_pack_format(field_list, basebit, it->first, it->second, act); + entry_list.push_back(json::map{{"entry_number", json::number(entry_number)}, + {"fields", std::move(field_list)}}); + } + } else { + for (int i = format->get_entries_per_table_word() - 1; i >= 0; --i) { + int entry_number = i; + if (table_type() == ATCAM) + entry_number = format->get_entries_per_table_word() - 1 - i; + json::vector field_list; + for (auto &field : *format) + add_field_to_pack_format(field_list, basebit, field.first, field.second, act); + entry_list.push_back(json::map{{"entry_number", json::number(entry_number)}, + {"fields", std::move(field_list)}}); + basebit -= 1 << format->log2size; + } + } + } + if (act) pack_fmt["action_handle"] = act->handle; + json::vector &pack_format = stage_tbl["pack_format"]; + pack_format.push_back(std::move(pack_fmt)); + return pack_format.back()->to(); +} + +// Check if node exists in context_json entry in bfa, add entry to the input +// json node and remove the entry from context_json. +// +// Set parameter "append" to true in order to append to existing entries in +// specified section of context_json. Set to false to overwrite. Applies +// only to json::vector containers. +bool Table::add_json_node_to_table(json::map &tbl, const char *name, bool append) const { + if (context_json) { + if (context_json->count(name)) { + std::unique_ptr new_obj = context_json->remove(name); + json::vector *add_vect = nullptr; + if (append && (add_vect = dynamic_cast(new_obj.get()))) { + json::vector &new_vect = tbl[name]; + std::move(add_vect->begin(), add_vect->end(), std::back_inserter(new_vect)); + } else + tbl[name] = std::move(new_obj); + return true; + } + } + return false; +} + +void Table::add_match_key_cfg(json::map &tbl) const { + json::vector ¶ms = tbl["match_key_fields"]; + if ((!p4_params_list.empty()) && this->to()) { + // If a table is splitted to different stages in backend, the + // match_key_fields section will be populated every time the splitted + // tables are emitted. Therefore, we clear the vector before populating + // it again to avoid duplicated keys. + params.clear(); + for (auto &p : p4_params_list) { + json::map param; + std::string name = p.name; + std::string global_name = ""; + if (p.key_name.empty()) { + param["name"] = name; + } else { + // Presence of key name indicates the field has a name + // annotation. If the name annotation is on a field slice, then + // the slice is treated as a field with the key_name as its + // "name". The field output will have the same bit_width and + // bit_width_full indicating its not treated as a slice. We + // also provide the original p4 name as the "global_name" to + // allow driver to use it as a lookup up against the snapshot + // fields published in context.json. These fields will all have + // original p4 field names. + param["name"] = p.key_name; + param["global_name"] = p.name; + } + param["start_bit"] = p.start_bit; + param["bit_width"] = p.bit_width; + param["bit_width_full"] = p.bit_width_full; + if (!p.mask.empty()) { + std::stringstream ss; + ss << "0x" << p.mask; + param["mask"] = ss.str(); + } + param["position"] = p.position; + param["match_type"] = p.type; + param["is_valid"] = p.is_valid; + std::string fieldname, instname; + gen_instfield_name(name, instname, fieldname); + param["instance_name"] = instname; + param["field_name"] = fieldname; + if (!p.alias.empty()) param["alias"] = p.alias; + if (p.context_json) param.merge(*p.context_json.get()); + params.push_back(std::move(param)); + if (p.type == "range") tbl["uses_range"] = true; + } + } +} + +template +void Table::init_json_node(json::map &tbl, const char *name) const { + if (tbl.count(name)) return; + tbl[name] = T(); +} + +void Table::common_tbl_cfg(json::map &tbl) const { + tbl["default_action_handle"] = get_default_action_handle(); + tbl["action_profile"] = action_profile(); + // FIXME -- setting next_table_mask unconditionally only works because we process the + // stage table in stage order (so we'll end up with the value from the last stage table, + // which is what we want.) Should we check in case the ordering ever changes? + tbl["default_next_table_mask"] = next_table_adr_mask; + // FIXME -- the driver currently always assumes this is 0, so we arrange for it to be + // when choosing the action encoding. But we should be able to choose something else + tbl["default_next_table_default"] = 0; + // FIXME-JSON: PD related, check glass examples for false (ALPM) + tbl["is_resource_controllable"] = true; + tbl["uses_range"] = false; + if (p4_table && p4_table->disable_atomic_modify) tbl["disable_atomic_modify"] = true; + add_match_key_cfg(tbl); + init_json_node(tbl, "ap_bind_indirect_res_to_match"); + init_json_node(tbl, "static_entries"); + if (context_json) { + add_json_node_to_table(tbl, "ap_bind_indirect_res_to_match"); + } +} + +void Table::add_result_physical_buses(json::map &stage_tbl) const { + json::vector &result_physical_buses = stage_tbl["result_physical_buses"] = json::vector(); + for (auto l : layout) { + if (l.bus.count(Layout::RESULT_BUS)) + result_physical_buses.push_back(l.row * 2 + l.bus.at(Layout::RESULT_BUS)); + } +} + +void Table::merge_context_json(json::map &tbl, json::map &stage_tbl) const { + if (context_json) { + add_json_node_to_table(tbl, "static_entries", true); + stage_tbl.merge(*context_json); + } +} diff --git a/backends/tofino/bf-asm/tables.h b/backends/tofino/bf-asm/tables.h new file mode 100644 index 00000000000..ad66ae2fcea --- /dev/null +++ b/backends/tofino/bf-asm/tables.h @@ -0,0 +1,2246 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TABLES_H_ +#define BACKENDS_TOFINO_BF_ASM_TABLES_H_ + +#include +#include +#include +#include +#include + +#include "backends/tofino/bf-asm/alloc.h" +#include "backends/tofino/bf-asm/asm-types.h" +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/json.h" +#include "backends/tofino/bf-asm/map.h" +#include "backends/tofino/bf-asm/p4_table.h" +#include "backends/tofino/bf-asm/phv.h" +#include "backends/tofino/bf-asm/slist.h" +#include "backends/tofino/bf-asm/target.h" +#include "constants.h" +#include "hash_dist.h" +#include "input_xbar.h" +#include "lib/algorithm.h" +#include "lib/bitops.h" +#include "lib/bitvec.h" +#include "lib/ordered_map.h" + +class ActionBus; +struct ActionBusSource; +class AttachedTable; +struct AttachedTables; +class GatewayTable; +class IdletimeTable; +class ActionTable; +struct Instruction; +class InputXbar; +class MatchTable; +class SelectionTable; +class StatefulTable; +class MeterTable; +class Synth2Port; +class Stage; +struct HashCol; + +struct RandomNumberGen { + int unit; + explicit RandomNumberGen(int u) : unit(u) {} + bool operator==(const RandomNumberGen &a) const { return unit == a.unit; } +}; + +enum class TableOutputModifier { NONE, Color, Address }; +std::ostream &operator<<(std::ostream &, TableOutputModifier); + +/* a memory storage 'unit' somewhere on the chip */ +struct MemUnit { + int stage = INT_MIN; // current stage (only) for tofino1/2 + // can have negative stage numbers for tcams in egress + int row = -1; + int col; // (lamb) unit when row == -1 + MemUnit() = delete; + MemUnit(const MemUnit &) = default; + MemUnit(MemUnit &&) = default; + MemUnit &operator=(const MemUnit &) = default; + MemUnit &operator=(MemUnit &&) = default; + virtual ~MemUnit() {} + explicit MemUnit(int unit) : col(unit) {} + MemUnit(int r, int c) : row(r), col(c) {} + MemUnit(int s, int r, int c) : stage(s), row(r), col(c) {} + bool operator==(const MemUnit &a) const { + return std::tie(stage, row, col) == std::tie(a.stage, a.row, a.col); + } + bool operator!=(const MemUnit &a) const { + return std::tie(stage, row, col) != std::tie(a.stage, a.row, a.col); + } + bool operator<(const MemUnit &a) const { + return std::tie(stage, row, col) < std::tie(a.stage, a.row, a.col); + } + virtual const char *desc() const; // Short lived temp for messages + friend std::ostream &operator<<(std::ostream &out, const MemUnit &m) { return out << m.desc(); } +}; + +class Table { + public: + struct Layout { + /* Holds the layout of which rams/tcams/busses are used by the table + * These refer to rows/columns in different spaces: + * ternary match refers to tcams (12x2) + * exact match and ternary indirect refer to physical srams (8x12) + * action (and others?) refer to logical srams (16x6) + * vpns contains the (base)vpn index of each ram in the row + * maprams contain the map ram indexes for synthetic 2-port memories + * vpns/maprams (if not empty) must match up to memunits (same size) */ + int lineno = -1; + int row = -1; + enum bus_type_t { SEARCH_BUS, RESULT_BUS, TIND_BUS, IDLE_BUS, L2R_BUS, R2L_BUS }; + std::map bus; + + int word = -1; // which word for wide tables + bool home_row = false; // is this a home row + std::vector memunits; + std::vector vpns, maprams; + Layout() = default; + Layout(int l, int r) : lineno(l), row(r) {} + friend std::ostream &operator<<(std::ostream &, const Layout &); + + bool word_initialized() const { return word >= 0; } + bool operator==(const Layout &) const; + bool operator!=(const Layout &a) const { return !(*this == a); } + }; + + protected: + Table(int line, std::string &&n, gress_t gr, Stage *s, + int lid = -1); // NOLINT(whitespace/operators) + virtual ~Table(); + Table(const Table &) = delete; + Table(Table &&) = delete; + virtual void setup(VECTOR(pair_t) & data) = 0; + virtual void common_init_setup(const VECTOR(pair_t) &, bool, P4Table::type); + virtual bool common_setup(pair_t &, const VECTOR(pair_t) &, P4Table::type); + void setup_context_json(value_t &); + void setup_layout(std::vector &, const VECTOR(pair_t) & data, const char *subname = ""); + int setup_layout_bus_attrib(std::vector &, const value_t &data, const char *what, + Layout::bus_type_t type); + int setup_layout_attrib(std::vector &, const value_t &data, const char *what, + int Layout::*attr); + void setup_logical_id(); + void setup_actions(value_t &); + void setup_maprams(value_t &); + void setup_vpns(std::vector &, VECTOR(value_t) *, bool allow_holes = false); + virtual void vpn_params(int &width, int &depth, int &period, const char *&period_name) const { + BUG(); + } + virtual int get_start_vpn() { return 0; } + void alloc_rams(bool logical, BFN::Alloc2Dbase
&use, + BFN::Alloc2Dbase
*bus_use = 0, + Layout::bus_type_t bus_type = Layout::SEARCH_BUS); + void alloc_global_bus(Layout &, Layout::bus_type_t, int, int, int, int); + virtual void alloc_global_busses(); + void alloc_global_srams(); + void alloc_global_tcams(); + void alloc_busses(BFN::Alloc2Dbase
&bus_use, Layout::bus_type_t bus_type); + void alloc_id(const char *idname, int &id, int &next_id, int max_id, bool order, + BFN::Alloc1Dbase
&use); + void alloc_maprams(); + virtual void alloc_vpns(); + virtual Layout::bus_type_t default_bus_type() const { return Layout::SEARCH_BUS; } + void need_bus(int lineno, BFN::Alloc1Dbase
&use, int idx, const char *name); + static bool allow_ram_sharing(const Table *t1, const Table *t2); + + public: + class Type { + static std::map *all; + std::map::iterator self; + + protected: + explicit Type(std::string &&); // NOLINT(whitespace/operators) + explicit Type(const char *name) : Type(std::string(name)) {} + virtual ~Type(); + + public: + static Type *get(const char *name) { return ::get(all, name); } + static Type *get(const std::string &name) { return ::get(all, name); } + virtual Table *create(int lineno, const char *name, gress_t gress, Stage *stage, int lid, + VECTOR(pair_t) & data) = 0; + }; + + struct Ref { + int lineno; + std::string name; + Ref() : lineno(-1) {} + Ref(const Ref &) = default; + Ref(Ref &&) = default; + Ref &operator=(const Ref &a) & { + name = a.name; + if (lineno < 0) lineno = a.lineno; + return *this; + } + Ref &operator=(Ref &&a) & { + name = a.name; + if (lineno < 0) lineno = a.lineno; + return *this; + } + Ref &operator=(const value_t &a) & { + BUG_CHECK(a.type == tSTR); + name = a.s; + lineno = a.lineno; + return *this; + } + Ref(const std::string &n) : lineno(-1), name(n) {} // NOLINT(runtime/explicit) + Ref(const char *n) : lineno(-1), name(n) {} // NOLINT(runtime/explicit) + Ref(const value_t &a) : lineno(a.lineno) { // NOLINT(runtime/explicit) + if (CHECKTYPE(a, tSTR)) name = a.s; + } + Ref &operator=(const std::string &n) { + name = n; + return *this; + } + operator bool() const { return all && all->count(name) > 0; } + operator Table *() const { return ::get(all, name); } + Table *operator->() const { return ::get(all, name); } + bool set() const { return lineno >= 0; } + bool operator==(const Table *t) const { return name == t->name_; } + bool operator==(const char *t) const { return name == t; } + bool operator==(const std::string &t) const { return name == t; } + bool operator==(const Ref &a) const { return name == a.name; } + bool operator<(const Ref &a) const { return name < a.name; } + bool check() const { + if (set() && !*this) error(lineno, "No table named %s", name.c_str()); + return *this; + } + }; + + class NextTables { + std::set next; + unsigned lb_tags = 0; // long branch tags to use (bitmask) + const Table *next_table_ = nullptr; // table to use as next table (if any) + bool resolved = false; + bool can_use_lb(int stage, const NextTables &); + + public: + int lineno = -1; + NextTables() = default; + NextTables(const NextTables &) = default; + NextTables(NextTables &&) = default; + NextTables &operator=(const NextTables &a) = default; + NextTables &operator=(NextTables &&) = default; + NextTables(value_t &v); // NOLINT(runtime/explicit) + + std::set::iterator begin() const { return next.begin(); } + std::set::iterator end() const { return next.end(); } + int size() const { return next.size(); } + bool operator==(const NextTables &a) const { return next == a.next; } + bool subset_of(const NextTables &a) const { + for (auto &n : next) + if (!a.next.count(n)) return false; + return true; + } + void resolve_long_branch(const Table *tbl, const std::map &lbrch); + bool set() const { return lineno >= 0; } + int next_table_id() const { + BUG_CHECK(resolved); + return next_table_ ? next_table_->table_id() : Target::END_OF_PIPE(); + } + std::string next_table_name() const { + BUG_CHECK(resolved); + if (next_table_) { + if (auto nxt_p4_name = next_table_->p4_name()) return nxt_p4_name; + } + return "END"; + } + const Table *next_table() const { return next_table_; } + unsigned long_branch_tags() const { return lb_tags; } + unsigned next_in_stage(int stage) const; + bool need_next_map_lut() const; + void force_single_next_table(); + }; + + class Format { + public: + struct bitrange_t { + unsigned lo, hi; + bitrange_t(unsigned l, unsigned h) : lo(l), hi(h) {} + bool operator==(const bitrange_t &a) const { return lo == a.lo && hi == a.hi; } + bool disjoint(const bitrange_t &a) const { return lo > a.hi || a.lo > hi; } + bitrange_t overlap(const bitrange_t &a) const { + // only valid if !disjoint + return bitrange_t(std::max(lo, a.lo), std::min(hi, a.hi)); + } + int size() const { return hi - lo + 1; } + }; + struct Field { + unsigned size = 0, group = 0, flags = 0; + std::vector bits; + Field **by_group = 0; + Format *fmt; // containing format + bool operator==(const Field &a) const { return size == a.size; } + /* return the bit in the format that contains bit i of this field */ + unsigned bit(unsigned i) { + unsigned last = 0; + for (auto &chunk : bits) { + if (i < (unsigned)chunk.size()) return chunk.lo + i; + i -= chunk.size(); + last = chunk.hi + 1; + } + if (i == 0) return last; + BUG(); + return 0; // quiet -Wreturn-type warning + } + /* bit(i), adjusted for the immediate shift of the match group of the field + * returns the bit in the post-extract immediate containing bit i */ + unsigned immed_bit(unsigned i) { + auto rv = bit(i); + if (fmt && fmt->immed) rv -= fmt->immed->by_group[group]->bit(0); + return rv; + } + unsigned hi(unsigned bit) { + for (auto &chunk : bits) + if (bit >= chunk.lo && bit <= chunk.hi) return chunk.hi; + BUG(); + return 0; // quiet -Wreturn-type warning + } + enum flags_t { NONE = 0, USED_IMMED = 1, ZERO = 3 }; + bool conditional_value = false; + std::string condition; + explicit Field(Format *f) : fmt(f) {} + Field(Format *f, unsigned size, unsigned lo = 0, enum flags_t fl = NONE) + : size(size), flags(fl), fmt(f) { + if (size) bits.push_back({lo, lo + size - 1}); + } + Field(const Field &f, Format *fmt) + : size(f.size), flags(f.flags), bits(f.bits), fmt(fmt) {} + + /// mark all bits from the field in @param bitset + void set_field_bits(bitvec &bitset) const { + for (auto &b : bits) bitset.setrange(b.lo, b.size()); + } + }; + friend std::ostream &operator<<(std::ostream &, const Field &); + explicit Format(Table *t) : tbl(t) { fmt.resize(1); } + Format(Table *, const VECTOR(pair_t) & data, bool may_overlap = false); + ~Format(); + void pass1(Table *tbl); + void pass2(Table *tbl); + + private: + std::vector> fmt; + std::map::iterator> byindex; + static bool equiv(const ordered_map &, + const ordered_map &); + + public: + int lineno = -1; + Table *tbl; + unsigned size = 0, immed_size = 0; + Field *immed = 0; + unsigned log2size = 0; /* ceil(log2(size)) */ + unsigned overhead_start = 0, overhead_size = 0; // extent of non-match + int overhead_word = -1; + + unsigned groups() const { return fmt.size(); } + const ordered_map &group(int g) const { return fmt.at(g); } + Field *field(const std::string &n, int group = 0) { + BUG_CHECK(group >= 0 && (size_t)group < fmt.size()); + auto it = fmt[group].find(n); + if (it != fmt[group].end()) return &it->second; + return 0; + } + void apply_to_field(const std::string &n, std::function fn) { + for (auto &m : fmt) { + auto it = m.find(n); + if (it != m.end()) fn(&it->second); + } + } + std::string find_field(Field *field) { + for (auto &m : fmt) + for (auto &f : m) + if (field == &f.second) return f.first; + return ""; + } + int find_field_lineno(Field *field) { + for (auto &m : fmt) + for (auto &f : m) + if (field == &f.second) return lineno; + return -1; + } + void add_field(Field &f, std::string name = "dummy", int grp = 0) { + fmt[grp].emplace(name, Field(f, this)); + } + decltype(fmt[0].begin()) begin(int grp = 0) { return fmt[grp].begin(); } + decltype(fmt[0].end()) end(int grp = 0) { return fmt[grp].end(); } + decltype(fmt[0].cbegin()) begin(int grp = 0) const { return fmt[grp].begin(); } + decltype(fmt[0].cend()) end(int grp = 0) const { return fmt[grp].end(); } + bool is_wide_format() const { return (log2size >= 7 || groups() > 1) ? true : false; } + int get_entries_per_table_word() const { + // A phase0 table can only have 1 entry + if (tbl->table_type() == PHASE0) return 1; + if (is_wide_format()) return groups(); + return log2size ? (1U << (ceil_log2(tbl->ram_word_width()) - log2size)) : 0; + } + int get_mem_units_per_table_word() const { + return is_wide_format() ? ((size - 1) / tbl->ram_word_width()) + 1 : 1; + } + int get_table_word_width() const { + return is_wide_format() ? tbl->ram_word_width() * get_mem_units_per_table_word() + : tbl->ram_word_width(); + } + int get_padding_format_width() const { + return is_wide_format() ? get_mem_units_per_table_word() * tbl->ram_word_width() + : (1U << log2size); + } + }; + + struct Call : Ref { /* a Ref with arguments */ + struct Arg { + enum { Field, HashDist, Counter, Const, Name } type; + + private: + union { + Format::Field *fld; + HashDistribution *hd; + intptr_t val; + char *str; + }; + + void set(const Arg &a) { + type = a.type; + switch (type) { + case Field: + fld = a.fld; + return; + case HashDist: + hd = a.hd; + return; + case Counter: + case Const: + val = a.val; + return; + case Name: + str = a.str; + return; + } + } + + public: + Arg() = delete; + Arg(const Arg &a) { + set(a); + if (type == Name) str = strdup(str); + } + Arg(Arg &&a) { + set(a); + a.type = Const; + } + Arg &operator=(const Arg &a) { + if (&a == this) return *this; + if (a == *this) return *this; + if (type == Name) free(str); + set(a); + if (type == Name) str = strdup(a.str); + return *this; + } + Arg &operator=(Arg &&a) { + std::swap(type, a.type); + std::swap(val, a.val); + return *this; + } + Arg(Format::Field *f) : type(Field) { fld = f; } // NOLINT(runtime/explicit) + Arg(HashDistribution *hdist) : type(HashDist) { // NOLINT(runtime/explicit) + hd = hdist; + } + Arg(int v) : type(Const) { val = v; } // NOLINT(runtime/explicit) + Arg(const char *n) : type(Name) { str = strdup(n); } // NOLINT(runtime/explicit) + Arg(decltype(Counter) ctr, int mode) : type(Counter) { + val = mode; + BUG_CHECK(ctr == Counter); + } + ~Arg() { + if (type == Name) free(str); + } + bool operator==(const Arg &a) const { + if (type != a.type) return false; + switch (type) { + case Field: + return fld == a.fld; + case HashDist: + return hd == a.hd; + case Counter: + case Const: + return val == a.val; + case Name: + return !strcmp(str, a.str); + default: + BUG(); + } + return false; + } + bool operator!=(const Arg &a) const { return !operator==(a); } + Format::Field *field() const { return type == Field ? fld : nullptr; } + HashDistribution *hash_dist() const { return type == HashDist ? hd : nullptr; } + const char *name() const { return type == Name ? str : nullptr; } + int count_mode() const { return type == Counter ? val : 0; } + int value() const { return type == Const ? val : 0; } + operator bool() const { return fld != nullptr; } + unsigned size() const; + }; + std::vector args; + void setup(const value_t &v, Table *tbl); + Call() {} + Call(const value_t &v, Table *tbl) { setup(v, tbl); } + bool operator==(const Call &a) const { return Ref::operator==(a) && args == a.args; } + bool operator!=(const Call &a) const { return !(*this == a); } + bool is_direct_call() const { + if (args.size() == 0) return false; + for (auto &a : args) + if (a == "$DIRECT") return true; + return false; + } + }; + + struct p4_param { + std::string name; + std::string alias; + std::string key_name; + unsigned start_bit = 0; + unsigned position = 0; + unsigned bit_width = 0; + unsigned bit_width_full = 0; + bitvec mask; + std::string default_value; // value stored as hex string to accommodate large nos + bool defaulted = false; + bool is_valid = false; + std::string type; + std::unique_ptr context_json; + explicit p4_param(std::string n = "", unsigned p = 0, unsigned bw = 0) + : name(n), position(p), bit_width(bw) {} + }; + friend std::ostream &operator<<(std::ostream &, const p4_param &); + typedef std::vector p4_params; + + class Actions { + public: + struct Action { + struct alias_t { + std::string name; + int lineno = -1, lo = -1, hi = -1; + bool is_constant = false; + unsigned value = 0; + explicit alias_t(value_t &); + unsigned size() const { + if (hi != -1 && lo != -1) + return hi - lo + 1; + else + return 0; + } + std::string to_string() const { + if (hi >= 0 && lo >= 0) + return name + '(' + std::to_string(lo) + ".." + std::to_string(hi) + ')'; + return name; + } + }; + std::string name; + std::string rng_param_name = ""; + int lineno = -1, addr = -1, code = -1; + std::multimap alias; + std::vector> instr; + bitvec slot_use; + unsigned handle = 0; + p4_params p4_params_list; + bool hit_allowed = true; + bool default_allowed = false; + bool default_only = false; + bool is_constant = false; + std::string hit_disallowed_reason = ""; + std::string default_disallowed_reason = ""; + std::vector attached; + int next_table_encode = -1; + NextTables next_table_ref; + NextTables next_table_miss_ref; + std::map> mod_cond_values; + // The hit map points to next tables for actions as ordered in the + // assembly, we use 'position_in_assembly' to map the correct next + // table, as actions can be ordered in the map different from the + // assembly order. + int position_in_assembly = -1; + bool minmax_use = false; // jbay sful min/max + // Predication operand coming into the output ALUs in stateful actions. This attribute + // is used to make sure that all combined predicate outputs from a given stateful action + // have the same form, because the predication operand is always the same in every + // output ALU. + int pred_comb_sel = -1; + std::unique_ptr context_json; + Action(Table *, Actions *, pair_t &, int); + enum mod_cond_loc_t { MC_ADT, MC_IMMED }; + void setup_mod_cond_values(value_t &map); + Action(const char *n, int l); + Action(const Action &) = delete; + Action(Action &&) = delete; + ~Action(); + bool equiv(Action *a); + bool equivVLIW(Action *a); + typedef const decltype(alias)::value_type alias_value_t; + std::map> reverse_alias() const; + std::string alias_lookup(int lineno, std::string name, int &lo, int &hi) const; + bool has_rng() { return !rng_param_name.empty(); } + const p4_param *has_param(std::string param) const { + for (auto &e : p4_params_list) + if (e.name == param) return &e; + return nullptr; + } + void pass1(Table *tbl); + void check_next(Table *tbl); + void check_next_ref(Table *tbl, const Table::Ref &ref) const; + void add_direct_resources(json::vector &direct_resources, const Call &att) const; + void add_indirect_resources(json::vector &indirect_resources, const Call &att) const; + void check_and_add_resource(json::vector &resources, json::map &resource) const; + bool is_color_aware() const; + void gen_simple_tbl_cfg(json::vector &) const; + void add_p4_params(json::vector &, bool include_default = true) const; + void check_conditional(Table::Format::Field &field) const; + bool immediate_conditional(int lo, int sz, std::string &condition) const; + friend std::ostream &operator<<(std::ostream &, const alias_t &); + friend std::ostream &operator<<(std::ostream &, const Action &); + }; + + private: + typedef ordered_map map_t; + map_t actions; + bitvec code_use; + std::map by_code; + bitvec slot_use; + Table *table; + + public: + int max_code = -1; + Actions(Table *tbl, VECTOR(pair_t) &); + typedef map_t::value_type value_type; + typedef IterValues::iterator iterator; + typedef IterValues::iterator const_iterator; + iterator begin() { return iterator(actions.begin()); } + const_iterator begin() const { return const_iterator(actions.begin()); } + iterator end() { return iterator(actions.end()); } + const_iterator end() const { return const_iterator(actions.end()); } + int count() { return actions.size(); } + int hit_actions_count() const; + int default_actions_count() const; + Action *action(const std::string &n) { + auto it = actions.find(n); + return it == actions.end() ? nullptr : &it->second; + } + bool exists(const std::string &n) { return actions.count(n) > 0; } + void pass1(Table *); + void pass2(Table *); + void stateful_pass2(Table *); + template + void write_regs(REGS &, Table *); + void add_p4_params(const Action &, json::vector &) const; + void gen_tbl_cfg(json::vector &) const; + void add_immediate_mapping(json::map &); + void add_action_format(const Table *, json::map &) const; + bool has_hash_dist() { return (table->table_type() == HASH_ACTION); } + size_t size() { return actions.size(); } + }; + + public: + const char *name() const { return name_.c_str(); } + const char *p4_name() const { + if (p4_table) { + return p4_table->p4_name(); + } + return nullptr; + } + unsigned p4_size() const { + if (p4_table) { + return p4_table->p4_size(); + } + return 0; + } + unsigned handle() const { + if (p4_table) { + return p4_table->get_handle(); + } + return -1; + } + std::string action_profile() const { + if (p4_table) { + return p4_table->action_profile; + } + return ""; + } + std::string how_referenced() const { + if (p4_table) { + return p4_table->how_referenced; + } + return ""; + } + int table_id() const; + virtual bool is_always_run() const { return false; } + virtual void pass0() {} // only match tables need pass0 + virtual void pass1(); + virtual void pass2() = 0; + virtual void pass3() = 0; + /* C++ does not allow virtual template methods, so we work around it by explicitly + * instantiating overloads for all the virtual template methods we want. */ + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, virtual void write_action_regs, + (mau_regs &, const Actions::Action *), {}) + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, virtual void write_merge_regs, + (mau_regs &, int type, int bus), { assert(0); }) + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, virtual void write_merge_regs, + (mau_regs &, MatchTable *match, int type, int bus, + const std::vector &args), + { assert(0); }) + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, virtual void write_regs, (mau_regs &), = 0) + + virtual void gen_tbl_cfg(json::vector &out) const = 0; + virtual json::map *base_tbl_cfg(json::vector &out, const char *type, int size) const; + virtual json::map *add_stage_tbl_cfg(json::map &tbl, const char *type, int size) const; + virtual std::unique_ptr gen_memory_resource_allocation_tbl_cfg( + const char *type, const std::vector &layout, bool skip_spare_bank = false) const; + virtual std::vector determine_spare_bank_memory_units() const { return {}; } + virtual void common_tbl_cfg(json::map &tbl) const; + void add_match_key_cfg(json::map &tbl) const; + bool add_json_node_to_table(json::map &tbl, const char *name, bool append = false) const; + void allocate_physical_ids(unsigned usable = ~0U); + template + void init_json_node(json::map &tbl, const char *name) const; + enum table_type_t { + OTHER = 0, + TERNARY_INDIRECT, + GATEWAY, + ACTION, + SELECTION, + COUNTER, + METER, + IDLETIME, + STATEFUL, + HASH_ACTION, + EXACT, + TERNARY, + PHASE0, + ATCAM, + PROXY_HASH + }; + virtual table_type_t table_type() const { return OTHER; } + virtual int instruction_set() { return 0; /* VLIW_ALU */ } + virtual table_type_t set_match_table(MatchTable *m, bool indirect) { + assert(0); + return OTHER; + } + virtual const MatchTable *get_match_table() const { + assert(0); + return nullptr; + } + virtual MatchTable *get_match_table() { + assert(0); + return nullptr; + } + virtual std::set get_match_tables() { return std::set(); } + virtual const AttachedTables *get_attached() const { return 0; } + virtual AttachedTables *get_attached() { return 0; } + virtual const GatewayTable *get_gateway() const { return 0; } + virtual SelectionTable *get_selector() const { return 0; } + virtual MeterTable *get_meter() const { return 0; } + virtual void set_stateful(StatefulTable *s) { BUG(); } + virtual StatefulTable *get_stateful() const { return 0; } + virtual void set_address_used() { + // FIXME -- could use better error message(s) -- lineno is not accurate/useful + error(lineno, + "Tofino does not support extracting the address used on " + "a non-stateful table %s", + name()); + } + virtual void set_color_used() { + error(lineno, "Cannot extract color on a non-meter table %s", name()); + } + virtual void set_output_used() { + error(lineno, "Cannot extract output on a non-stateful table %s", name()); + } + virtual const Call &get_action() const { return action; } + virtual std::vector get_calls() const; + virtual bool is_attached(const Table *) const { + BUG(); + return false; + } + virtual Format::Field *find_address_field(const AttachedTable *) const { + BUG(); + return 0; + } + virtual Format::Field *get_per_flow_enable_param(MatchTable *) const { + BUG(); + return 0; + } + virtual Format::Field *get_meter_address_param(MatchTable *) const { + BUG(); + return 0; + } + virtual Format::Field *get_meter_type_param(MatchTable *) const { + BUG(); + return 0; + } + virtual int direct_shiftcount() const { + BUG(); + return -1; + } + virtual int indirect_shiftcount() const { + BUG(); + return -1; + } + virtual int address_shift() const { + BUG(); + return -1; + } + virtual int home_row() const { + BUG(); + return -1; + } + /* mem unitno mapping -- unit numbers used in context json */ + virtual int json_memunit(const MemUnit &u) const; + virtual int ram_word_width() const { return MEM_WORD_WIDTH; } + virtual int unitram_type() { + BUG(); + return -1; + } + virtual bool uses_colormaprams() const { return false; } + virtual int color_shiftcount(Table::Call &call, int group, int tcam_shift) const { + BUG(); + return -1; + } + virtual bool adr_mux_select_stats() { return false; } + virtual bool run_at_eop() { return false; } + virtual Format *get_format() const { return format.get(); } + virtual unsigned determine_shiftcount(Table::Call &call, int group, unsigned word, + int tcam_shift) const { + assert(0); + return -1; + } + template + void write_mapram_regs(REGS ®s, int row, int col, int vpn, int type); + template + T *to() { + return dynamic_cast(this); + } + template + const T *to() const { + return dynamic_cast(this); + } + virtual void determine_word_and_result_bus() { BUG(); } + virtual int stm_vbus_column() const { BUG(); } + + std::string name_; + int uid; + P4Table *p4_table = 0; + Stage *stage = 0; + gress_t gress; + int lineno = -1; + int logical_id = -1; + bitvec physical_ids; + std::vector dynamic_config; + std::vector> input_xbar; + std::vector layout; + bool no_vpns = false; // for odd actions with null vpns + // generated by compiler + std::unique_ptr format; + int action_enable = -1; + bool enable_action_data_enable = false; + bool enable_action_instruction_enable = false; + Call action; + Call instruction; + std::unique_ptr actions; + std::unique_ptr action_bus; + std::string default_action; + unsigned default_action_handle = 0; + int default_action_lineno = -1; + typedef std::map default_action_params; + default_action_params default_action_parameters; + bool default_only_action = false; + std::vector hit_next; + std::vector extra_next_lut; // extra entries not in the hit_next from gateway + // currently the assembler will add extra elements to the 8 entry next table lut if they + // are needed for a gateway and not present in the lut already. We add these in a separate + // vector from hit_next so that context.json only reports the original hit_next from the source + // and we don't try to get a next table hit index from the action. + NextTables miss_next; + std::map long_branch; + int long_branch_input = -1; + std::map
> pred; // predecessor tables w the actions in + // that table that call this table + std::vector hash_dist; + p4_params p4_params_list; + std::unique_ptr context_json; + // saved here in to extract into the context json + unsigned next_table_adr_mask = 0U; + bitvec reachable_tables_; + + static std::map *all; + static std::vector
*by_uid; + + unsigned layout_size() const { + unsigned rv = 0; + for (auto &row : layout) rv += row.memunits.size(); + return rv; + } + unsigned layout_get_vpn(const MemUnit &m) const { + for (auto &row : layout) { + if (row.row != m.row) continue; + auto u = find(row.memunits.begin(), row.memunits.end(), m); + if (u == row.memunits.end()) continue; + return row.vpns.at(u - row.memunits.begin()); + } + BUG(); + return 0; + } + void layout_vpn_bounds(int &min, int &max, bool spare = false) const { + min = 1000000; + max = -1; + for (const Layout &row : layout) + for (const auto v : row.vpns) { + if (v < min) min = v; + if (v > max) max = v; + } + if (spare && max > min) --max; + } + virtual Format::Field *lookup_field(const std::string &n, const std::string &act = "") const { + return format ? format->field(n) : 0; + } + virtual std::string find_field(Format::Field *field) { + return format ? format->find_field(field) : ""; + } + virtual int find_field_lineno(Format::Field *field) { + return format ? format->find_field_lineno(field) : -1; + } + virtual void apply_to_field(const std::string &n, std::function fn) { + if (format) format->apply_to_field(n, fn); + } + int find_on_ixbar(Phv::Slice sl, InputXbar::Group group, InputXbar::Group *found = nullptr); + int find_on_ixbar(Phv::Slice sl, int group) { + return find_on_ixbar(sl, InputXbar::Group(InputXbar::Group::EXACT, group)); + } + virtual HashDistribution *find_hash_dist(int unit); + virtual int find_on_actionbus(const ActionBusSource &src, int lo, int hi, int size, + int pos = -1); + virtual void need_on_actionbus(const ActionBusSource &src, int lo, int hi, int size); + virtual int find_on_actionbus(const char *n, TableOutputModifier mod, int lo, int hi, int size, + int *len = 0); + int find_on_actionbus(const char *n, int lo, int hi, int size, int *len = 0) { + return find_on_actionbus(n, TableOutputModifier::NONE, lo, hi, size, len); + } + int find_on_actionbus(const std::string &n, TableOutputModifier mod, int lo, int hi, int size, + int *len = 0) { + return find_on_actionbus(n.c_str(), mod, lo, hi, size, len); + } + int find_on_actionbus(const std::string &n, int lo, int hi, int size, int *len = 0) { + return find_on_actionbus(n.c_str(), TableOutputModifier::NONE, lo, hi, size, len); + } + virtual void need_on_actionbus(Table *att, TableOutputModifier mod, int lo, int hi, int size); + static bool allow_bus_sharing(Table *t1, Table *t2); + virtual Call &action_call() { return action; } + virtual Call &instruction_call() { return instruction; } + virtual Actions *get_actions() const { return actions.get(); } + virtual const std::vector &get_hit_next() const { return hit_next; } + virtual const NextTables &get_miss_next() const { return miss_next; } + virtual bool is_directly_referenced(const Table::Call &c) const; + virtual void add_reference_table(json::vector &table_refs, const Table::Call &c) const; + json::map &add_pack_format(json::map &stage_tbl, int memword, int words, + int entries = -1) const; + json::map &add_pack_format(json::map &stage_tbl, Table::Format *format, bool pad_zeros = true, + bool print_fields = true, + Table::Actions::Action *act = nullptr) const; + virtual void add_field_to_pack_format(json::vector &field_list, unsigned basebit, + std::string name, const Table::Format::Field &field, + const Table::Actions::Action *act) const; + virtual bool validate_call(Table::Call &call, MatchTable *self, size_t required_args, + int hash_dist_type, Table::Call &first_call) { + BUG(); + return false; + } + bool validate_instruction(Table::Call &call) const; + // const std::vector &); + // Generate the context json for a field into field list. + // Use the bits specified in field, offset by the base bit. + // If the field is a constant, output a const_tuple map, including the specified value. + void output_field_to_pack_format(json::vector &field_list, unsigned basebit, std::string name, + std::string source, unsigned start_bit, + const Table::Format::Field &field, unsigned value = 0) const; + void add_zero_padding_fields(Table::Format *format, Table::Actions::Action *act = nullptr, + unsigned format_width = 64) const; + void get_cjson_source(const std::string &field_name, std::string &source, int &start_bit) const; + // Result physical buses should be setup for + // Exact/Hash/MatchwithNoKey/ATCAM/Ternary tables + virtual void add_result_physical_buses(json::map &stage_tbl) const; + virtual void merge_context_json(json::map &tbl, json::map &stage_tbl) const; + void canon_field_list(json::vector &field_list) const; + void for_all_next(std::function fn); + void check_next(const Ref &next); + void check_next(NextTables &next); + void check_next(); + virtual void set_pred(); + /* find the predecessors in the given stage that must run iff this table runs. + * includes `this` if it is in the stage. The values are the set of actions that + * (lead to) triggering this table, or empty if any action might */ + std::map
> find_pred_in_stage( + int stageno, const std::set &acts = std::set()); + + bool choose_logical_id(const slist
*work = nullptr); + virtual int hit_next_size() const { return hit_next.size(); } + virtual int get_tcam_id() const { BUG("%s not a TCAM table", name()); } + + const std::vector find_p4_params(std::string s, std::string t = "", + unsigned start_bit = -1, + int width = -1) const { + remove_name_tail_range(s); + std::vector params; + if (start_bit <= -1) return params; + if (width <= -1) return params; + int end_bit = start_bit + width; + for (auto &p : p4_params_list) { + if ((p.name == s) || (p.alias == s)) { + int p_end_bit = p.start_bit + p.bit_width; + if (!t.empty() && (p.type != t)) continue; + if (p.start_bit > start_bit) continue; + if (p_end_bit < end_bit) continue; + params.push_back(&p); + } + } + return params; + } + + const p4_param *find_p4_param(std::string s, std::string t = "", unsigned start_bit = -1, + int width = -1) const { + remove_name_tail_range(s); + std::vector params; + for (auto &p : p4_params_list) { + if ((p.name == s) || (p.alias == s)) { + if (!t.empty() && (p.type != t)) continue; + if ((start_bit > -1) && (start_bit < p.start_bit)) continue; + if ((width > -1) && (p.start_bit + p.bit_width < start_bit + width)) continue; + return &p; + } + } + return nullptr; + } + + const p4_param *find_p4_param_type(std::string &s) const { + for (auto &p : p4_params_list) + if (p.type == s) return &p; + return nullptr; + } + virtual std::string get_default_action() { + return (!default_action.empty()) ? default_action : action ? action->default_action : ""; + } + virtual default_action_params *get_default_action_parameters() { + return (!default_action_parameters.empty()) ? &default_action_parameters + : action ? &action->default_action_parameters + : nullptr; + } + virtual unsigned get_default_action_handle() const { + return default_action_handle > 0 ? default_action_handle + : action ? action->default_action_handle + : 0; + } + int get_format_field_size(std::string s) const { + if (auto field = lookup_field(s)) return field->size; + return 0; + } + virtual bool needs_handle() const { return false; } + virtual bool needs_next() const { return false; } + virtual bitvec compute_reachable_tables(); + bitvec reachable_tables() { + if (!reachable_tables_) reachable_tables_ = compute_reachable_tables(); + return reachable_tables_; + } + std::string loc() const; +}; + +std::ostream &operator<<(std::ostream &, const Table::Layout &); +std::ostream &operator<<(std::ostream &, const Table::Layout::bus_type_t); + +class FakeTable : public Table { + public: + explicit FakeTable(const char *name) : Table(-1, name, INGRESS, 0, -1) {} + void setup(VECTOR(pair_t) & data) override { assert(0); } + void pass1() override { assert(0); } + void pass2() override { assert(0); } + void pass3() override { assert(0); } + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_regs, (mau_regs &), override { assert(0); }) + void gen_tbl_cfg(json::vector &out) const override { assert(0); } +}; + +class AlwaysRunTable : public Table { + /* a 'table' to hold the always run action in a stage */ + public: + AlwaysRunTable(gress_t gress, Stage *stage, pair_t &init); + void setup(VECTOR(pair_t) & data) override { assert(0); } + void pass1() override { actions->pass1(this); } + void pass2() override { actions->pass2(this); } + void pass3() override {} + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_regs, (mau_regs & regs), override) + void gen_tbl_cfg(json::vector &out) const override {} +}; + +struct AttachedTables { + Table::Call selector; + Table::Call selector_length; + std::vector stats, meters, statefuls; + Table::Call meter_color; + SelectionTable *get_selector() const; + MeterTable *get_meter(std::string name = "") const; + StatefulTable *get_stateful(std::string name = "") const; + Table::Format::Field *find_address_field(const AttachedTable *tbl) const; + const Table::Call *get_call(const Table *) const; + bool is_attached(const Table *tbl) const { return get_call(tbl) != nullptr; } + void pass0(MatchTable *self); + void pass1(MatchTable *self); + template + void write_merge_regs(REGS ®s, MatchTable *self, int type, int bus); + template + void write_tcam_merge_regs(REGS ®s, MatchTable *self, int bus, int tcam_shift); + bool run_at_eop(); + bitvec compute_reachable_tables() const; +}; + +#define DECLARE_ABSTRACT_TABLE_TYPE(TYPE, PARENT, ...) \ + class TYPE : public PARENT { \ + protected: \ + TYPE(int l, const char *n, gress_t g, Stage *s, int lid) : PARENT(l, n, g, s, lid) {} \ + __VA_ARGS__ \ + }; + +DECLARE_ABSTRACT_TABLE_TYPE( + MatchTable, Table, GatewayTable *gateway = 0; IdletimeTable *idletime = 0; + AttachedTables attached; bool always_run = false; friend struct AttachedTables; + enum {NONE = 0, TABLE_MISS = 1, TABLE_HIT = 2, DISABLED = 3, GATEWAY_MISS = 4, GATEWAY_HIT = 5, + GATEWAY_INHIBIT = 6} table_counter = NONE; + + using Table::pass1; using Table::write_regs; + template void write_common_regs(typename TARGET::mau_regs &, int, Table *); + template void write_regs(REGS &, int type, Table *result); + template void write_next_table_regs(REGS &, Table *); + void common_init_setup(const VECTOR(pair_t) &, bool, P4Table::type) override; + bool common_setup(pair_t &, const VECTOR(pair_t) &, P4Table::type) override; + int get_address_mau_actiondata_adr_default(unsigned log2size, bool per_flow_enable); public + : bool is_always_run() const override { return always_run; } void pass0() override; + void pass1() override; void pass3() override; bool is_alpm() const { + if (p4_table) { + return p4_table->is_alpm(); + } + return false; + } bool is_attached(const Table *tbl) const override; + const Table::Call *get_call(const Table *tbl) const { + return get_attached()->get_call(tbl); + } const AttachedTables *get_attached() const override { return &attached; } std::vector + get_calls() const override; + AttachedTables * get_attached() override { return &attached; } Format * + get_format() const override; + const GatewayTable *get_gateway() + const override { return gateway; } const MatchTable *get_match_table() const override { + return this; + } MatchTable *get_match_table() override { return this; } std::set + get_match_tables() override { + std::set rv; + rv.insert(this); + return rv; + } Format::Field *find_address_field(const AttachedTable *tbl) const override { + return attached.find_address_field(tbl); + } Format::Field *lookup_field(const std::string &n, const std::string &act = "") + const override; + bool run_at_eop() override { return attached.run_at_eop(); } + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + virtual bool is_ternary() { return false; } + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + void gen_idletime_tbl_cfg(json::map &stage_tbl) const; + int direct_shiftcount() const override { + return 64; + } void gen_hash_bits(const std::map &hash_table, InputXbar::HashTable ht_id, + json::vector &hash_bits, unsigned hash_group_no, bitvec hash_bits_used) + const; + virtual void add_hash_functions(json::map &stage_tbl) const; + void add_all_reference_tables(json::map &tbl, Table *math_table = nullptr) const; + METER_ACCESS_TYPE default_meter_access_type(bool for_stateful); + bool needs_handle() const override { return true; } bool needs_next() + const override { return true; } bitvec compute_reachable_tables() override;) + +#define DECLARE_TABLE_TYPE(TYPE, PARENT, NAME, ...) \ + class TYPE : public PARENT { /* NOLINT */ \ + static struct Type : public Table::Type { \ + Type() : Table::Type(NAME) {} \ + TYPE *create(int lineno, const char *name, gress_t gress, Stage *stage, int lid, \ + VECTOR(pair_t) & data); \ + } table_type_singleton; \ + friend struct Type; \ + \ + protected: \ + TYPE(int l, const char *n, gress_t g, Stage *s, int lid) : PARENT(l, n, g, s, lid) {} \ + void setup(VECTOR(pair_t) & data) override; \ + \ + public: \ + void pass1() override; \ + void pass2() override; \ + void pass3() override; \ + /* gcc gets confused by overloading this template with the virtual \ + * functions if we try to specialize the templates, so we mangle \ + * the name with a _vt extension to help it out. */ \ + template \ + void write_regs_vt(REGS ®s); \ + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_regs, (mau_regs & regs), override) \ + void gen_tbl_cfg(json::vector &out) const override; \ + \ + private: \ + __VA_ARGS__ \ + }; + +#define DEFINE_TABLE_TYPE(TYPE) \ + TYPE::Type TYPE::table_type_singleton; \ + TYPE *TYPE::Type::create(int lineno, const char *name, gress_t gress, Stage *stage, int lid, \ + VECTOR(pair_t) & data) { \ + TYPE *rv = new TYPE(lineno, name, gress, stage, lid); \ + rv->setup(data); \ + return rv; \ + } \ + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void TYPE::write_regs, (mau_regs & regs), \ + { write_regs_vt(regs); }) + +/* Used to create a subclass for a table type */ +#define DEFINE_TABLE_TYPE_WITH_SPECIALIZATION(TYPE, KIND) \ + TYPE::Type TYPE::table_type_singleton; \ + TYPE *TYPE::Type::create(int lineno, const char *name, gress_t gress, Stage *stage, int lid, \ + VECTOR(pair_t) & data) { \ + SWITCH_FOREACH_##KIND(options.target, \ + auto *rv = new TARGET::TYPE(lineno, name, gress, stage, lid); \ + rv->setup(data); return rv;) \ + } \ + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void TYPE::write_regs, (mau_regs & regs), \ + { write_regs_vt(regs); }) + +DECLARE_ABSTRACT_TABLE_TYPE(SRamMatchTable, MatchTable, // exact, atcam, or proxy_hash + + // NOLINTNEXTLINE (whitespace/indent) + public: + struct Ram : public MemUnit { + using MemUnit::MemUnit; + Ram(const MemUnit &m) : MemUnit(m) {} + Ram(MemUnit &&m) : MemUnit(std::move(m)) {} + bool isLamb() const { return stage == INT_MIN && row == -1; } + const char *desc() const; // Short lived temp for messages + }; + struct Way { + int lineno; + int group_xme; // hash group or xme + int index; // first bit of index + int index_hi = -1; // top bit (if set) for sanity checking + int subword_bits; + bitvec select; + std::vector rams; + bool isLamb() const { + BUG_CHECK(!rams.empty(), "no rams in way"); + return rams.at(0).isLamb(); } + bitvec select_bits() const { + bitvec rv = select; + rv.setrange(index, (isLamb() ? LAMB_DEPTH_BITS : SRAM_DEPTH_BITS) + subword_bits); + return rv; + } + }; + + // NOLINTNEXTLINE (whitespace/indent) + protected: + std::vector ways; + struct WayRam { int way, index, word, bank; }; + std::map way_map; + std::vector match; + std::map match_by_bit; + std::vector> match_in_word; + std::vector word_ixbar_group; + struct GroupInfo { + /* info about which word(s) are used per format group with wide matches */ + int overhead_word; /* which word of wide match contains overhead */ + int overhead_bit; /* lowest bit that contains overhead in that word */ + // The word that is going to contain the result bus. Same as the overhead word, if + // the entry actually has overhead + int result_bus_word; + std::map match_group; /* which match group for each word with match */ + std::vector tofino_mask; /* 14-bit tofino byte/nibble mask for each word */ + int vpn_offset; /* which vpn to use for this group */ + GroupInfo() : overhead_word(-1), overhead_bit(-1), result_bus_word(-1), vpn_offset(-1) {} + // important function in order to determine shiftcount for exact match entries + int result_bus_word_group() const { return match_group.at(result_bus_word); } + }; // NOLINT + std::vector group_info; + std::vector> word_info; // which format group corresponds to each + // match group in each word + int mgm_lineno = -1; // match_group_map lineno + friend class GatewayTable; // Gateway needs to examine word group details for compat + friend class Target::Tofino::GatewayTable; + bitvec version_nibble_mask; + // Which hash groups are assigned to the hash_function_number in the hash_function json node + // This is to coordinate with the hash_function_id in the ways + std::map hash_fn_ids; + + // helper function only used/instantiated on tofino1/2 + template + void write_attached_merge_regs(REGS ®s, int bus, int word, int word_group); + + bool parse_ram(const value_t &, std::vector &); + bool parse_way(const value_t &); + void common_sram_setup(pair_t &, const VECTOR(pair_t) &); + void common_sram_checks(); + void alloc_global_busses() override; + void alloc_vpns() override; + int find_problematic_vpn_offset() const; + virtual void setup_ways(); + void setup_hash_function_ids(); + void pass1() override; + template void write_regs_vt(REGS ®s); + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, + void write_regs, (mau_regs ®s), override ) + virtual std::string get_match_mode(const Phv::Ref &pref, int offset) const; + json::map* add_common_sram_tbl_cfgs(json::map &tbl, + std::string match_type, std::string stage_table_type) const; + void add_action_cfgs(json::map &tbl, json::map &stage_tbl) const; + virtual unsigned entry_ram_depth() const { return 1024; } + unsigned get_number_entries() const; + unsigned get_format_width() const; + virtual int determine_pre_byteswizzle_loc(MatchSource *ms, int lo, int hi, int word); + void add_field_to_pack_format(json::vector &field_list, unsigned basebit, std::string name, + const Table::Format::Field &field, + const Table::Actions::Action *act) const override; + std::unique_ptr gen_memory_resource_allocation_tbl_cfg(const Way &) const; + Actions *get_actions() const override { + return actions ? actions.get() : (action ? action->actions.get() : nullptr); + } + void add_hash_functions(json::map &stage_tbl) const override; + virtual void gen_ghost_bits(int hash_function_number, json::vector &ghost_bits_to_hash_bits, + json::vector &ghost_bits_info) const { } + virtual void no_overhead_determine_result_bus_usage(); + + // NOLINTNEXTLINE (whitespace/indent) + public: + Format::Field *lookup_field(const std::string &n, const std::string &act = "") const override; + OVERLOAD_FUNC_FOREACH(TARGET_CLASS, virtual void, setup_word_ixbar_group, (), ()) + OVERLOAD_FUNC_FOREACH(TARGET_CLASS, virtual void, verify_format, (), ()) + OVERLOAD_FUNC_FOREACH(TARGET_CLASS, virtual void, verify_format_pass2, (), ()) + virtual bool verify_match_key(); + void verify_match(unsigned fmt_width); + void vpn_params(int &width, int &depth, int &period, const char *&period_name) const override { + width = (format->size-1)/128 + 1; + period = format->groups(); + depth = period * layout_size() / width; + period_name = "match group size"; } + template void write_merge_regs_vt(REGS ®s, int type, int bus) { + attached.write_merge_regs(regs, this, type, bus); } + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, + void write_merge_regs, (mau_regs ®s, int type, int bus), override { + write_merge_regs_vt(regs, type, bus); }) + bool is_match_bit(const std::string name, const int bit) const { + for (auto *m : match) { + std::string m_name = m->name(); + int m_lo = remove_name_tail_range(m_name) + m->fieldlobit(); + int m_hi = m_lo + m->size() -1; + if (m_name == name) { + if (m_lo <= bit + && m_hi >= bit) + return true; + } + } + return false; + } + void determine_word_and_result_bus() override; + SelectionTable *get_selector() const override { return attached.get_selector(); } + StatefulTable *get_stateful() const override { return attached.get_stateful(); } + MeterTable* get_meter() const override { return attached.get_meter(); } + const Way *way_for_ram(Ram r) const { + return way_map.count(r) ? &ways[way_map.at(r).way] : nullptr; } + const Way *way_for_xme(int xme) const { + for (auto &way : ways) if (way.group_xme == xme) return &way; + return nullptr; } +) + +DECLARE_TABLE_TYPE( + ExactMatchTable, SRamMatchTable, "exact_match", bool dynamic_key_masks = false; + + // The position of the ghost bits in a single hash function + // The key is name of the field and the field bit, the value is one-hot for all + // bits that this ghost bit has an impact on + using GhostBitPositions = std::map, bitvec>; + std::map ghost_bit_positions; std::unique_ptr stash_format; + std::vector stash_rows; std::vector stash_cols; std::vector stash_units; + std::vector stash_overhead_rows; + + // NOLINTNEXTLINE (whitespace/indent) + public + : int unitram_type() override { return UnitRam::MATCH; } table_type_t table_type() + const override { return EXACT; } bool has_group(int grp) { + for (auto &way : ways) + if (way.group_xme == grp) return true; + return false; + } void determine_ghost_bits(); + void gen_ghost_bits(int hash_function_number, json::vector &ghost_bits_to_hash_bits, + json::vector &ghost_bits_info) const override; + void generate_stash_overhead_rows();) + +DECLARE_TABLE_TYPE( + AlgTcamMatchTable, SRamMatchTable, "atcam_match", + // key is column priority, value is way index + std::map col_priority_way; + int number_partitions = 0; int max_subtrees_per_partition = 0; int bins_per_partition = 0; + int atcam_subset_width = 0; int shift_granularity = 0; std::string partition_field_name = ""; + std::vector ixbar_subgroup, ixbar_mask; struct match_element { + Phv::Ref *field; + unsigned offset, width; + }; + bitvec s0q1_nibbles, s1q0_nibbles; std::vector s0q1_prefs, s1q0_prefs; + std::map s0q1, s1q0; table_type_t table_type() + const override { return ATCAM; } void verify_format(Target::Tofino) override; + void verify_entry_priority(); void setup_column_priority(); void find_tcam_match(); + void gen_unit_cfg(json::vector &units, int size) const; + std::unique_ptr gen_memory_resource_allocation_tbl_cfg() const; + void setup_nibble_mask(Table::Format::Field *match, int group, + std::map &elems, bitvec &mask); + std::string get_match_mode(const Phv::Ref &pref, int offset) const override; + void base_alpm_atcam_tbl_cfg(json::map &atcam_tbl, const char *type, int size) const { + if (p4_table) p4_table->base_alpm_tbl_cfg(atcam_tbl, size, this, P4Table::Atcam); + } + // For ATCAM tables, no hash functions are generated for the table, as the current + // interpretation of the table is that the partition index is an identity hash function. + // Potentially this could change at some point + void add_hash_functions(json::map &stage_tbl) + const override {} bool has_directly_attached_synth2port() const; + std::string get_lpm_field_name() const { + std::string lpm = "lpm"; + if (auto *p = find_p4_param_type(lpm)) + return p->key_name.empty() ? p->name : p->key_name; + else + error(lineno, "'lpm' type field not found in alpm atcam '%s-%s' p4 param order", name(), + p4_name()); + return ""; + } std::set + get_partition_action_handle() const { + if (p4_table) return p4_table->get_partition_action_handle(); + return {}; + } void no_overhead_determine_result_bus_usage() override; + std::string get_partition_field_name() const { + if (!p4_table) return ""; + auto name = p4_table->get_partition_field_name(); + if (auto *p = find_p4_param(name)) + if (!p->key_name.empty()) return p->key_name; + return name; + } unsigned entry_ram_depth() const override { + return std::min(number_partitions, 1024); + } void gen_alpm_cfg(json::map &) const;) + +DECLARE_TABLE_TYPE( + ProxyHashMatchTable, SRamMatchTable, "proxy_hash", bool dynamic_key_masks = false; + void setup_ways() override; int proxy_hash_group = -1; std::string proxy_hash_alg = ""; + bool verify_match_key() override; table_type_t table_type() + const override { return PROXY_HASH; } void setup_word_ixbar_group() override; + int determine_pre_byteswizzle_loc(MatchSource *ms, int lo, int hi, int word) override; + void add_proxy_hash_function(json::map &stage_tbl) const;) + +DECLARE_TABLE_TYPE(TernaryMatchTable, MatchTable, "ternary_match", + + // NOLINTNEXTLINE (whitespace/indent) + protected: + void vpn_params(int &width, int &depth, int &period, const char *&period_name) const override; + struct Match { + int lineno = -1, word_group = -1, byte_group = -1, byte_config = 0, dirtcam = 0; + Match() {} + explicit Match(const value_t &); + }; + enum range_match_t { TCAM_NORMAL = 0, DIRTCAM_2B = 1, DIRTCAM_4B_LO = 2, + DIRTCAM_4B_HI = 3, NONE = 4 }; + enum byte_config_t { MIDBYTE_NIBBLE_LO = 0, MIDBYTE_NIBBLE_HI = 1 }; + std::vector match; + int match_word(int word_group) const { + for (unsigned i = 0; i < match.size(); i++) + if (match[i].word_group == word_group) + return i; + return -1; } + unsigned chain_rows[TCAM_UNITS_PER_ROW]; /* bitvector per column */ + enum { ALWAYS_ENABLE_ROW = (1<<2) | (1<<5) | (1<<9) }; + friend class TernaryIndirectTable; + + virtual void check_tcam_match_bus(const std::vector &) = 0; + + // NOLINTNEXTLINE (whitespace/indent) + public: + void pass0() override; + int tcam_id = -1; + Table::Ref indirect; + int indirect_bus = -1; /* indirect bus to use if there's no indirect table */ + void alloc_vpns() override; + range_match_t get_dirtcam_mode(int group, int byte) const { + BUG_CHECK(group >= 0); + BUG_CHECK(byte >= 0); + range_match_t dirtcam_mode = NONE; + for (auto &m : match) { + if (m.word_group == group) { + dirtcam_mode = (range_match_t) ((m.dirtcam >> 2*byte) & 0x3); } } + return dirtcam_mode; } + Format::Field *lookup_field(const std::string &name, const std::string &action) const override; + HashDistribution *find_hash_dist(int unit) override { + return indirect ? indirect->find_hash_dist(unit) : Table::find_hash_dist(unit); } + int find_on_actionbus(const ActionBusSource &src, int lo, int hi, int size, + int pos = -1) override { + return indirect ? indirect->find_on_actionbus(src, lo, hi, size, pos) + : Table::find_on_actionbus(src, lo, hi, size, pos); } + void need_on_actionbus(const ActionBusSource &src, int lo, int hi, int size) override { + indirect ? indirect->need_on_actionbus(src, lo, hi, size) + : Table::need_on_actionbus(src, lo, hi, size); } + int find_on_actionbus(const char *n, TableOutputModifier mod, int lo, int hi, + int size, int *len = 0) override { + return indirect ? indirect->find_on_actionbus(n, mod, lo, hi, size, len) + : Table::find_on_actionbus(n, mod, lo, hi, size, len); } + void need_on_actionbus(Table *att, TableOutputModifier mod, int lo, int hi, int size) override { + indirect ? indirect->need_on_actionbus(att, mod, lo, hi, size) + : Table::need_on_actionbus(att, mod, lo, hi, size); } + const Call &get_action() const override { return indirect ? indirect->get_action() : action; } + Actions *get_actions() const override { return actions ? actions.get() : + (action ? action->actions.get() : indirect ? indirect->actions ? indirect->actions.get() : + indirect->action ? indirect->action->actions.get() : 0 : 0); } + const AttachedTables *get_attached() const override { + return indirect ? indirect->get_attached() : &attached; } + AttachedTables *get_attached() override { + return indirect ? indirect->get_attached() : &attached; } + SelectionTable *get_selector() const override { + return indirect ? indirect->get_selector() : 0; } + StatefulTable *get_stateful() const override { + return indirect ? indirect->get_stateful() : 0; } + MeterTable* get_meter() const override { + return indirect ? indirect->get_meter() : 0; } + bool is_attached(const Table *tbl) const override { + return indirect ? indirect->is_attached(tbl) : MatchTable::is_attached(tbl); } + Format::Field *find_address_field(const AttachedTable *tbl) const override { + return indirect ? indirect->find_address_field(tbl) : attached.find_address_field(tbl); } + std::unique_ptr gen_memory_resource_allocation_tbl_cfg( + const char *type, const std::vector &layout, + bool skip_spare_bank = false) const override; + json::map &get_tbl_top(json::vector &out) const; + Call &action_call() override { return indirect ? indirect->action : action; } + Call &instruction_call() override { return indirect ? indirect->instruction: instruction; } + int json_memunit(const MemUnit &u) const override { + return u.row + u.col*12; } + bool is_ternary() override { return true; } + bool has_indirect() { return indirect; } + int hit_next_size() const override { + if (indirect && indirect->hit_next.size() > 0) + return indirect->hit_next.size(); + return hit_next.size(); } + table_type_t table_type() const override { return TERNARY; } + void gen_entry_cfg(json::vector &out, std::string name, + unsigned lsb_offset, unsigned lsb_idx, unsigned msb_idx, + std::string source, unsigned start_bit, unsigned field_width, + unsigned index, bitvec &tcam_bits, unsigned byte_offset) const; + void gen_entry_cfg2(json::vector &out, std::string field_name, std::string global_name, + unsigned lsb_offset, unsigned lsb_idx, unsigned msb_idx, std::string source, + unsigned start_bit, unsigned field_width, bitvec &tcam_bits) const; + void gen_entry_range_cfg(json::map &entry, bool duplicate, unsigned nibble_offset) const; + void set_partition_action_handle(unsigned handle) { + if (p4_table) p4_table->set_partition_action_handle(handle); } + void set_partition_field_name(std::string name) { + if (p4_table) p4_table->set_partition_field_name(name); } + void base_alpm_pre_classifier_tbl_cfg(json::map &pre_classifier_tbl, + const char *type, int size) const { + if (p4_table) + p4_table->base_alpm_tbl_cfg(pre_classifier_tbl, size, this, P4Table::PreClassifier); + } + virtual void gen_match_fields_pvp(json::vector &match_field_list, unsigned word, + bool uses_versioning, unsigned version_word_group, bitvec &tcam_bits) const; + virtual void gen_match_fields(json::vector &match_field_list, + std::vector &tcam_bits) const; + unsigned get_default_action_handle() const override { + unsigned def_act_handle = Table::get_default_action_handle(); + return def_act_handle > 0 ? def_act_handle : + indirect ? indirect->get_default_action_handle() ? + indirect->get_default_action_handle() : action ? + action->default_action_handle : 0 : 0; + } + std::string get_default_action() override { + std::string def_act = Table::get_default_action(); + return !def_act.empty() ? def_act : indirect ? indirect->default_action : ""; } + Format* get_format() const override { + return indirect ? indirect->get_format() : MatchTable::get_format(); } + template void write_merge_regs_vt(REGS ®s, int type, int bus) { + attached.write_merge_regs(regs, this, type, bus); } + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, + void write_merge_regs, (mau_regs ®s, int type, int bus), override { + write_merge_regs_vt(regs, type, bus); }) + void add_result_physical_buses(json::map &stage_tbl) const override; + default_action_params* get_default_action_parameters() override { + if (!default_action_parameters.empty()) return &default_action_parameters; + auto def_action_params = indirect ? indirect->get_default_action_parameters() : nullptr; + return def_action_params; } + bitvec compute_reachable_tables() override; + int get_tcam_id() const override { return tcam_id; } + virtual void setup_indirect(const value_t &v) { + if (CHECKTYPE(v, tSTR)) + indirect = v; } + + // NOLINTNEXTLINE (whitespace/indent) + private: + template void tcam_table_map(REGS ®s, int row, int col); +) + +DECLARE_TABLE_TYPE( + Phase0MatchTable, MatchTable, "phase0_match", int size = MAX_PORTS; int width = 1; + int constant_value = 0; table_type_t table_type() const override { return PHASE0; } + // Phase0 Tables are not actual tables. They cannot have action data + // or attached tables and do not need a logical id assignment, hence + // we skip pass0 + void pass0() override {} void set_pred() override { return; } bool needs_next() const override { + return false; + } int ram_word_width() const override { return Target::PHASE0_FORMAT_WIDTH(); }) +DECLARE_TABLE_TYPE( + HashActionTable, MatchTable, "hash_action", public + : + // int row = -1, bus = -1; + table_type_t table_type() const override { return HASH_ACTION; } template + void write_merge_regs_vt(REGS ®s, int type, int bus); + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_merge_regs, + (mau_regs & regs, int type, int bus), override) Format::Field * + lookup_field(const std::string &n, const std::string &act = "") const override; + void add_hash_functions(json::map &stage_tbl) const override; + void determine_word_and_result_bus() override; + Layout::bus_type_t default_bus_type() const override { return Layout::RESULT_BUS; }) + +DECLARE_TABLE_TYPE(TernaryIndirectTable, Table, "ternary_indirect", + + // NOLINTNEXTLINE (whitespace/indent) + protected: + TernaryMatchTable *match_table = nullptr; + AttachedTables attached; + table_type_t table_type() const override { return TERNARY_INDIRECT; } + table_type_t set_match_table(MatchTable *m, bool indirect) override; + void vpn_params(int &width, int &depth, int &period, const char *&period_name) const override { + width = (format->size-1)/128 + 1; + depth = layout_size() / width; + period = 1; + period_name = 0; } + Actions *get_actions() const override { + return actions ? actions.get() : (match_table ? match_table->actions.get() : nullptr); + } + const AttachedTables *get_attached() const override { return &attached; } + AttachedTables *get_attached() override { return &attached; } + const GatewayTable *get_gateway() const override { return match_table->get_gateway(); } + const MatchTable *get_match_table() const override { return match_table; } + std::set get_match_tables() override { + std::set rv; + if (match_table) rv.insert(match_table); + return rv; } + SelectionTable *get_selector() const override { return attached.get_selector(); } + StatefulTable *get_stateful() const override { return attached.get_stateful(); } + MeterTable* get_meter() const override { return attached.get_meter(); } + bool is_attached(const Table *tbl) const override { return attached.is_attached(tbl); } + Format::Field *find_address_field(const AttachedTable *tbl) const override { + return attached.find_address_field(tbl); } + template void write_merge_regs_vt(REGS ®s, int type, int bus) { + attached.write_merge_regs(regs, match_table, type, bus); } + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, + void write_merge_regs, (mau_regs ®s, int type, int bus), override { + write_merge_regs_vt(regs, type, bus); }) + int unitram_type() override { return UnitRam::TERNARY_INDIRECTION; } + + // NOLINTNEXTLINE (whitespace/indent) + public: + Format::Field *lookup_field(const std::string &n, + const std::string &act = "") const override; + MatchTable *get_match_table() override { return match_table; } + const std::vector &get_hit_next() const override { + if (hit_next.empty() && match_table) + return match_table->get_hit_next(); + return Table::get_hit_next(); } + const NextTables &get_miss_next() const override { + if (!miss_next.set() && match_table) + return match_table->get_miss_next(); + return Table::get_miss_next(); } + int address_shift() const override { return std::min(5U, format->log2size - 2); } + unsigned get_default_action_handle() const override { + unsigned def_act_handle = Table::get_default_action_handle(); + return def_act_handle ? def_act_handle : action ? action->default_action_handle : 0; } + bool needs_handle() const override { return true; } + bool needs_next() const override { return true; } + void determine_word_and_result_bus() override; + bitvec compute_reachable_tables() override; + int get_tcam_id() const override { return match_table->tcam_id; } + Layout::bus_type_t default_bus_type() const override { return Layout::TIND_BUS; } +) + +DECLARE_ABSTRACT_TABLE_TYPE( + AttachedTable, Table, + /* table that can be attached to multiple match tables to do something */ + std::set match_tables; + bool direct = false, indirect = false; bool per_flow_enable = false; + std::string per_flow_enable_param = ""; + virtual unsigned per_flow_enable_bit(MatchTable *m = nullptr) const; + table_type_t set_match_table(MatchTable * m, bool indirect) override { + if ((indirect && direct) || (!indirect && this->indirect)) + error(lineno, "Table %s is accessed with direct and indirect indices", name()); + this->indirect = indirect; + direct = !indirect; + match_tables.insert(m); + if ((unsigned)m->logical_id < (unsigned)logical_id) logical_id = m->logical_id; + return table_type(); + } const GatewayTable *get_gateway() const override { + return match_tables.size() == 1 ? (*match_tables.begin())->get_gateway() : 0; + } SelectionTable *get_selector() const override; + StatefulTable * get_stateful() const override; MeterTable * get_meter() const override; + Call & + action_call() override { + return match_tables.size() == 1 ? (*match_tables.begin())->action_call() : action; + } + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + int json_memunit(const MemUnit &u) const override; + void pass1() override; + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + virtual unsigned get_alu_index() const { + if (layout.size() > 0) return layout[0].row / 4U; + error(lineno, "Cannot determine ALU Index for table %s", name()); + return 0; + } unsigned determine_meter_shiftcount(Table::Call &call, int group, int word, int tcam_shift) + const; + void determine_meter_merge_regs(MatchTable *match, int type, int bus, + const std::vector &arg, + METER_ACCESS_TYPE default_type, unsigned &adr_mask, + unsigned &per_entry_mux_ctl, unsigned &adr_default, + unsigned &meter_type_position); + + // NOLINTNEXTLINE (whitespace/indent) + protected + : + // Accessed by Meter/Selection/Stateful Tables as "meter_alu_index" + // Accessed by Statistics (Counter) Tables as "stats_alu_index" + void add_alu_index(json::map &stage_tbl, std::string alu_index) const; + + // NOLINTNEXTLINE (whitespace/indent) + public + : + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + const MatchTable *get_match_table() + const override { return match_tables.size() == 1 ? *match_tables.begin() : 0; } + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + MatchTable *get_match_table() + override { return match_tables.size() == 1 ? *match_tables.begin() : 0; } + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + std::set + get_match_tables() override { return match_tables; } bool has_per_flow_enable() + const { return per_flow_enable; } std::string get_per_flow_enable_param() { + return per_flow_enable_param; + } Format::Field *get_per_flow_enable_param(MatchTable *m) const override { + return per_flow_enable ? m->lookup_field(per_flow_enable_param) : nullptr; + } Format::Field *get_meter_address_param(MatchTable *m) const override { + std::string pfe_name = + per_flow_enable_param.substr(0, per_flow_enable_param.find("_pfe")); + return per_flow_enable ? m->lookup_field(pfe_name + "_addr") : nullptr; + } Format::Field *get_meter_type_param(MatchTable *m) const override { + std::string pfe_name = + per_flow_enable_param.substr(0, per_flow_enable_param.find("_pfe")); + return per_flow_enable ? m->lookup_field(pfe_name + "_type") : nullptr; + } + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + bool get_per_flow_enable() { return per_flow_enable; } bool is_direct() const { return direct; } + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + virtual int default_pfe_adjust() const { return 0; } + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + std::string get_default_action() override { + if (!default_action.empty()) return default_action; + for (auto m : match_tables) { + std::string def_action = m->get_default_action(); + if (!def_action.empty()) return def_action; + } + return ""; + } + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + default_action_params *get_default_action_parameters() override { + if (!default_action_parameters.empty()) return &default_action_parameters; + for (auto m : match_tables) { + if (auto def_action_params = m->get_default_action_parameters()) + if (!def_action_params->empty()) return def_action_params; + } + return nullptr; + } bool validate_call(Table::Call &call, MatchTable *self, size_t required_args, + int hash_dist_type, Table::Call &first_call) override; + // used by Selection and Stateful tables. + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, int meter_alu_fifo_enable_from_mask, + (mau_regs &, unsigned bytemask))) + +DECLARE_TABLE_TYPE( + ActionTable, AttachedTable, "action", protected + : int action_id = -1; + std::map home_rows_per_word; int home_lineno = -1; + std::map> action_formats; + std::map pack_actions; + static const std::map> action_data_address_huffman_encoding; + void vpn_params(int &width, int &depth, int &period, const char *&period_name) const override; + int get_start_vpn() override; std::string find_field(Format::Field * field) override; + int find_field_lineno(Format::Field *field) override; + Format::Field * lookup_field(const std::string &name, const std::string &action) const override; + void apply_to_field(const std::string &n, std::function fn) override; + int find_on_actionbus(const char *n, TableOutputModifier mod, int lo, int hi, int size, + int *len) override; + int find_on_actionbus(const ActionBusSource &src, int lo, int hi, int size, int pos = -1) + override; + void need_on_actionbus(const ActionBusSource &src, int lo, int hi, int size) override; + void need_on_actionbus(Table *att, TableOutputModifier mod, int lo, int hi, int size) override; + table_type_t table_type() const override { return ACTION; } int unitram_type() + override { return UnitRam::ACTION; } void pad_format_fields(); + unsigned get_do_care_count(std::string bstring); + unsigned get_lower_huffman_encoding_bits(unsigned width); public + : const std::map> &get_action_formats() + const { return action_formats; } unsigned get_size() const { + unsigned size = 0; + if (format) size = format->size; + for (auto &f : get_action_formats()) { + unsigned fsize = f.second->size; + if (fsize > size) size = fsize; + } + return size; + } unsigned get_log2size() const { + unsigned size = get_size(); + return ceil_log2(size); + } unsigned determine_shiftcount(Table::Call &call, int group, unsigned word, int tcam_shift) + const override; + unsigned determine_default(Table::Call &call) const; + unsigned determine_mask(Table::Call &call) const; + unsigned determine_vpn_shiftcount(Table::Call &call) const; bool needs_handle() + const override { return true; } bool needs_next() const override { return true; }) + +DECLARE_TABLE_TYPE(GatewayTable, Table, "gateway", + + // NOLINTNEXTLINE (whitespace/indent) + protected: + MatchTable *match_table = 0; + uint64_t payload = -1; + int have_payload = -1; + std::vector payload_map; + int match_address = -1; + int gw_unit = -1; + int payload_unit = -1; + enum range_match_t { NONE, DC_2BIT, DC_4BIT } + range_match = NONE; + std::string gateway_name; + std::string gateway_cond; + bool always_run = false; // only for standalone + + // NOLINTNEXTLINE (whitespace/indent) + public: + struct MatchKey { + int offset; + Phv::Ref val; + bool valid; /* implicit valid bit for tofino1 only */ + MatchKey(gress_t gr, int stg, value_t &v) : + offset(-1), val(gr, stg, v), valid(false) {} + MatchKey(int off, gress_t gr, int stg, value_t &v) : + offset(off), val(gr, stg, v), valid(false) {} + // tofino1 only: phv has an implicit valid bit that can be matched in + // gateway or ternary table. + MatchKey(int off, gress_t gr, int stg, value_t &v, bool vld) : + offset(off), val(gr, stg, v), valid(vld) {} + bool operator<(const MatchKey &a) const { return offset < a.offset; } + }; + + // NOLINTNEXTLINE (whitespace/indent) + protected: + std::vector match, xor_match; + struct Match { + int lineno = 0; + uint16_t range[6] = { 0, 0, 0, 0, 0, 0 }; + wmatch_t val; + bool run_table = false; + NextTables next; + std::string action; // FIXME -- need arguments? + int next_map_lut = -1; + Match() {} + Match(value_t *v, value_t &data, range_match_t range_match); + } miss, cond_true, cond_false; + std::vector table; + bool need_next_map_lut = false; + template void payload_write_regs(REGS &, int row, int type, int bus); + template void standalone_write_regs(REGS ®s); + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, + virtual void write_next_table_regs, (mau_regs &), { BUG(); }) + bool gateway_needs_ixbar_group() { + for (auto& m : match) + if (m.offset < 32) + return true; + return !xor_match.empty(); } + + // NOLINTNEXTLINE (whitespace/indent) + public: + table_type_t table_type() const override { return GATEWAY; } + virtual int find_next_lut_entry(Table *tbl, const Match &match); + const MatchTable *get_match_table() const override { return match_table; } + MatchTable *get_match_table() override { return match_table; } + std::set get_match_tables() override { + std::set rv; + if (match_table) rv.insert(match_table); + return rv; } + table_type_t set_match_table(MatchTable *m, bool indirect) override { + match_table = m; + if ((unsigned)m->logical_id < (unsigned)logical_id) logical_id = m->logical_id; + return GATEWAY; } + virtual void setup_map_indexing(Table *tbl) { return; } + static GatewayTable *create(int lineno, const std::string &name, gress_t gress, + Stage *stage, int lid, VECTOR(pair_t) &data) + { return table_type_singleton.create(lineno, name.c_str(), gress, stage, lid, data); } + const GatewayTable *get_gateway() const override { return this; } + AttachedTables *get_attached() const override { + return match_table ? match_table->get_attached() : 0; } + SelectionTable *get_selector() const override { + return match_table ? match_table->get_selector() : 0; } + StatefulTable *get_stateful() const override { + return match_table ? match_table->get_stateful() : 0; } + MeterTable *get_meter() const override { + return match_table ? match_table->get_meter() : 0; } + bool empty_match() const { return match.empty() && xor_match.empty(); } + unsigned input_use() const; + bool needs_handle() const override { return true; } + bool needs_next() const override { return true; } + bool is_branch() const; // Tofino2 needs is_a_brnch set to use next_table + void verify_format(); + bool is_always_run() const override { return always_run; } + virtual bool check_match_key(MatchKey &, const std::vector &, bool); + virtual int gw_memory_unit() const = 0; +) + +DECLARE_TABLE_TYPE( + SelectionTable, AttachedTable, "selection", + bool non_linear_hash = false, /* == enable_sps_scrambling */ + resilient_hash = false; /* false is fair hash */ + int mode_lineno = -1, param = -1; std::vector pool_sizes; + int min_words = -1, max_words = -1; int selection_hash = -1; public + : StatefulTable *bound_stateful = nullptr; + table_type_t table_type() + const override { return SELECTION; } void vpn_params(int &width, int &depth, int &period, + const char *&period_name) + const override { + width = period = 1; + depth = layout_size(); + period_name = 0; + } + + template + void write_merge_regs_vt(REGS ®s, MatchTable *match, int type, int bus, + const std::vector &args); + template void setup_logical_alu_map(REGS ®s, int logical_id, int alu); + template void setup_physical_alu_map(REGS ®s, int type, int bus, int alu); + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_merge_regs, + (mau_regs & regs, MatchTable *match, int type, int bus, + const std::vector &args), + override) int address_shift() + const override { return 7; } std::vector + determine_spare_bank_memory_units() const override; + unsigned meter_group() const { return layout.at(0).row / 4U; } int home_row() const override { + return layout.at(0).row | 3; + } int unitram_type() override { return UnitRam::SELECTOR; } StatefulTable *get_stateful() + const override { + return bound_stateful; + } unsigned determine_shiftcount(Table::Call &call, int group, unsigned word, int tcam_shift) + const override; + void set_stateful(StatefulTable *s) override { + bound_stateful = s; + } unsigned per_flow_enable_bit(MatchTable *m = nullptr) const override; + int indirect_shiftcount() const override; + unsigned determine_length_shiftcount(const Table::Call &call, int group, int word) const; + unsigned determine_length_mask(const Table::Call &call) const; + unsigned determine_length_default(const Table::Call &call) const; + bool validate_length_call(const Table::Call &call);) + +class IdletimeTable : public Table { + MatchTable *match_table = 0; + int sweep_interval = 7, precision = 3; + bool disable_notification = false; + bool two_way_notification = false; + bool per_flow_enable = false; + + IdletimeTable(int lineno, const char *name, gress_t gress, Stage *stage, int lid) + : Table(lineno, name, gress, stage, lid) {} + void setup(VECTOR(pair_t) & data) override; + + public: + table_type_t table_type() const override { return IDLETIME; } + table_type_t set_match_table(MatchTable *m, bool indirect) override { + match_table = m; + if ((unsigned)m->logical_id < (unsigned)logical_id) logical_id = m->logical_id; + return IDLETIME; + } + void vpn_params(int &width, int &depth, int &period, const char *&period_name) const override { + width = period = 1; + depth = layout_size(); + period_name = 0; + } + int json_memunit(const MemUnit &u) const override; + int precision_shift() const; + int direct_shiftcount() const override; + void pass1() override; + void pass2() override; + void pass3() override; + template + void write_merge_regs_vt(REGS ®s, int type, int bus); + template + void write_regs_vt(REGS ®s); + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_regs, (mau_regs & regs), override) + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_merge_regs, + (mau_regs & regs, int type, int bus), override) + void gen_tbl_cfg(json::vector &out) const override { /* nothing at top level */ } + void gen_stage_tbl_cfg(json::map &out) const; + static IdletimeTable *create(int lineno, const std::string &name, gress_t gress, Stage *stage, + int lid, VECTOR(pair_t) & data) { + IdletimeTable *rv = new IdletimeTable(lineno, name.c_str(), gress, stage, lid); + rv->setup(data); + return rv; + } + bool needs_handle() const override { return true; } + bool needs_next() const override { return true; } + Layout::bus_type_t default_bus_type() const override { return Layout::IDLE_BUS; } +}; + +DECLARE_ABSTRACT_TABLE_TYPE( + Synth2Port, AttachedTable, + void vpn_params(int &width, int &depth, int &period, const char *&period_name) const override { + width = period = 1; + depth = layout_size(); + period_name = 0; + } bool global_binding = false; + bool output_used = false; int home_lineno = -1; std::set> home_rows; + json::map * add_stage_tbl_cfg(json::map & tbl, const char *type, int size) const override; + public + : int get_home_row_for_row(int row) const; + void add_alu_indexes(json::map &stage_tbl, std::string alu_indexes) const; + OVERLOAD_FUNC_FOREACH(TARGET_CLASS, std::vector, determine_spare_bank_memory_units, + () const, (), override) + OVERLOAD_FUNC_FOREACH(TARGET_CLASS, void, alloc_vpns, (), ()) template + void write_regs_vt(REGS ®s); + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_regs, (mau_regs & regs), override) + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_merge_regs, + (mau_regs & regs, MatchTable *match, int type, int bus, + const std::vector &args), + override = 0) + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + void common_init_setup(const VECTOR(pair_t) &, bool, P4Table::type) override; + bool common_setup(pair_t &, const VECTOR(pair_t) &, P4Table::type) override; + void pass1() override; void pass2() override; void pass3() override;) + +DECLARE_TABLE_TYPE( + CounterTable, Synth2Port, "counter", + enum {NONE = 0, PACKETS = 1, BYTES = 2, BOTH = 3} type = NONE; + int teop = -1; bool teop_initialized = false; int bytecount_adjust = 0; + table_type_t table_type() const override { return COUNTER; } + // FIXME: This comment is necessary to stop cpplint from complaining. The format is off because + // this code is within a macro. + template + void write_merge_regs_vt(REGS ®s, MatchTable *match, int type, int bus, + const std::vector &args); + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_merge_regs, + (mau_regs & regs, MatchTable *match, int type, int bus, + const std::vector &args), + override) + + template + void setup_teop_regs(REGS ®s, int stats_group_index); + template void write_alu_vpn_range(REGS ®s); + template void setup_teop_regs_2(REGS ®s, int stats_group_index); + template void write_alu_vpn_range_2(REGS ®s); + + struct lrt_params { // largest recent with threshold paramters + int lineno; + int64_t threshold; + int interval; + lrt_params(int l, int64_t t, int i) : lineno(l), threshold(t), interval(i) {} + explicit lrt_params(const value_t &); + }; + std::vector lrt; public + : int home_row() const override { return layout.at(0).row; } int direct_shiftcount() + const override; + int indirect_shiftcount() const override; + unsigned determine_shiftcount(Table::Call &call, int group, unsigned word, int tcam_shift) + const override; + int address_shift() const override; + bool run_at_eop() override { return (type & BYTES) != 0; } bool adr_mux_select_stats() + override { return true; } int unitram_type() override { return UnitRam::STATISTICS; }) + +DECLARE_TABLE_TYPE( + MeterTable, Synth2Port, "meter", int red_nodrop_value = -1; int red_drop_value = -1; + int green_value = 0; int yellow_value = 1; int red_value = 3; int profile = 0; int teop = -1; + bool teop_initialized = false; int bytecount_adjust = 0; + enum {NONE = 0, STANDARD = 1, LPF = 2, RED = 3} type = NONE; + enum {NONE_ = 0, PACKETS = 1, BYTES = 2} count = NONE_; std::vector color_maprams; + table_type_t table_type() const override { return METER; } template + void write_merge_regs_vt(REGS ®s, MatchTable *match, int type, int bus, + const std::vector &args); + template void meter_color_logical_to_phys(REGS ®s, int logical_id, int alu); + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, void write_merge_regs, + (mau_regs & regs, MatchTable *match, int type, int bus, + const std::vector &args), + override) + + template + void setup_teop_regs(REGS ®s, int meter_group_index); + template void write_alu_vpn_range(REGS ®s); + template void write_regs_home_row(REGS ®s, unsigned row); + template void write_mapram_color_regs(REGS ®s, bool &push_on_overflow); + + template void setup_teop_regs_2(REGS ®s, int stats_group_index); + template void write_alu_vpn_range_2(REGS ®s); + + int sweep_interval = 2; public + : enum {NO_COLOR_MAP, IDLE_MAP_ADDR, STATS_MAP_ADDR} color_mapram_addr = NO_COLOR_MAP; + int direct_shiftcount() const override; int indirect_shiftcount() const override; + int address_shift() const override; bool color_aware = false; + bool color_aware_per_flow_enable = false; bool color_used = false; + int pre_color_hash_dist_unit = -1; int pre_color_bit_lo = -1; + bool run_at_eop() override { return type == STANDARD; } int unitram_type() override { + return UnitRam::METER; + } int home_row() const override { return layout.at(0).row | 3; } unsigned meter_group() + const { return layout.at(0).row / 4U; } bool uses_colormaprams() const override { + return !color_maprams.empty(); + } unsigned determine_shiftcount(Table::Call &call, int group, unsigned word, int tcam_shift) + const override; + void add_cfg_reg(json::vector &cfg_cache, std::string full_name, std::string name, unsigned val, + unsigned width); + Layout::bus_type_t default_bus_type() const override; int default_pfe_adjust() const override { + return color_aware ? -METER_TYPE_BITS : 0; + } void set_color_used() override { color_used = true; } void set_output_used() override { + output_used = true; + } int color_shiftcount(Table::Call &call, int group, int tcam_shift) const override; + template + void setup_exact_shift(REGS &merge, int bus, int group, int word, int word_group, + Call &meter_call, Call &color_call); + template + void setup_tcam_shift(REGS &merge, int bus, int tcam_shift, Call &meter_call, Call &color_call); + template void write_color_regs(REGS ®s, MatchTable *match, int type, int bus, + const std::vector &args);) + +namespace StatefulAlu { +struct TMatchOP; +struct TMatchInfo { + const Table::Actions::Action *act; + const TMatchOP *op; +}; + +Instruction *genNoop(StatefulTable *tbl, Table::Actions::Action *act); +} // namespace StatefulAlu + +DECLARE_TABLE_TYPE(StatefulTable, Synth2Port, "stateful", + table_type_t table_type() const override { return STATEFUL; } + bool setup_jbay(const pair_t &kv); + template void write_action_regs_vt(REGS ®s, const Actions::Action *); + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, + void write_action_regs, (mau_regs ®s, const Actions::Action *act), override) + template void write_merge_regs_vt(REGS ®s, MatchTable *match, int type, int bus, + const std::vector &args); + template void write_logging_regs(REGS ®s); + FOR_ALL_REGISTER_SETS(TARGET_OVERLOAD, + void write_merge_regs, (mau_regs ®s, MatchTable *match, int type, + int bus, const std::vector &args), override) + template void write_tofino2_common_regs(REGS ®s); + struct const_info_t { + int lineno; + int64_t value; + bool is_param; + std::string param_name; + unsigned param_handle; + static unsigned unique_register_param_handle; + const_info_t() = default; + const_info_t(int lineno, + int64_t value, + bool is_param = false, + std::string param_name = "", + unsigned param_handle = 0) + : lineno(lineno), value(value), is_param(is_param), + param_name(param_name), param_handle(param_handle) { + if (is_param) this->param_handle = unique_register_param_handle++; + } + }; + std::vector const_vals; + struct MathTable { + int lineno = -1; + std::vector data; + bool invert = false; + int shift = 0, scale = 0; + explicit operator bool() { return lineno >= 0; } + void check(); + } math_table; + bool dual_mode = false; + bool offset_vpn = false; + bool address_used = false; + int meter_adr_shift = 0; + int stateful_counter_mode = 0; + int watermark_level = 0; + int watermark_pop_not_push = 0; + uint64_t initial_value_lo = 0; + uint64_t initial_value_hi = 0; + unsigned data_bytemask = 0; + unsigned hash_bytemask = 0; + int logvpn_lineno = -1; + int logvpn_min = -1, logvpn_max = -1; + int pred_shift = 0, pred_comb_shift = 0; + int stage_alu_id = -1; + Ref underflow_action, overflow_action; + + // NOLINTNEXTLINE (whitespace/indent) + public: + Ref bound_selector; + unsigned phv_byte_mask = 0; + std::vector sbus_learn, sbus_match; + enum { SBUS_OR = 0, SBUS_AND = 1 } sbus_comb = SBUS_OR; + int phv_hash_shift = 0; + bitvec phv_hash_mask = bitvec(0, 128); + Instruction *output_lmatch = nullptr; // output instruction using lmatch + bitvec clear_value; + uint32_t busy_value = 0; + bool divmod_used = false; + int instruction_set() override { return 1; /* STATEFUL_ALU */ } + int direct_shiftcount() const override; + int indirect_shiftcount() const override; + int address_shift() const override; + int unitram_type() override { return UnitRam::STATEFUL; } + int get_const(int lineno, int64_t v); + bool is_dual_mode() const { return dual_mode; } + int alu_size() const { return 1 << std::min(5U, format->log2size - is_dual_mode()); } + int home_row() const override { return layout.at(0).row | 3; } + unsigned meter_group() const { return layout.at(0).row/4U; } + unsigned determine_shiftcount(Table::Call &call, int group, unsigned word, + int tcam_shift) const override; + unsigned per_flow_enable_bit(MatchTable *m = nullptr) const override; + void set_address_used() override { address_used = true; } + void set_output_used() override { output_used = true; } + void parse_register_params(int idx, const value_t &val); + int64_t get_const_val(int index) const { return const_vals.at(index).value; } + Actions::Action *action_for_table_action(const MatchTable *tbl, const Actions::Action *) const; + OVERLOAD_FUNC_FOREACH(REGISTER_SET, static int, parse_counter_mode, (const value_t &v), (v)) + OVERLOAD_FUNC_FOREACH(REGISTER_SET, void, set_counter_mode, (int mode), (mode)) + OVERLOAD_FUNC_FOREACH(REGISTER_SET, + void, gen_tbl_cfg, (json::map &tbl, json::map &stage_tbl) const, (tbl, stage_tbl)) + BFN::Alloc1D tmatch_use; + + bool p4c_5192_workaround(const Actions::Action *) const; +) + +#endif /* BACKENDS_TOFINO_BF_ASM_TABLES_H_ */ diff --git a/backends/tofino/bf-asm/target.cpp b/backends/tofino/bf-asm/target.cpp new file mode 100644 index 00000000000..41b3b1f7c50 --- /dev/null +++ b/backends/tofino/bf-asm/target.cpp @@ -0,0 +1,321 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/target.h" + +#include "asm-types.h" +#include "backends/tofino/bf-asm/config.h" +#include "backends/tofino/bf-asm/tables.h" +#include "bson.h" +#include "parser.h" +#include "ubits.h" + +void declare_registers(const Target::Tofino::top_level_regs *regs) { + declare_registers(®s->mem_top, sizeof(regs->mem_top), + [=](std::ostream &out, const char *addr, const void *end) { + out << "memories.top"; + regs->mem_top.emit_fieldname(out, addr, end); + }); + declare_registers(®s->mem_pipe, sizeof(regs->mem_pipe), + [=](std::ostream &out, const char *addr, const void *end) { + out << "memories.pipe"; + regs->mem_pipe.emit_fieldname(out, addr, end); + }); + declare_registers(®s->reg_top, sizeof(regs->reg_top), + [=](std::ostream &out, const char *addr, const void *end) { + out << "registers.top"; + regs->reg_top.emit_fieldname(out, addr, end); + }); + declare_registers(®s->reg_pipe, sizeof(regs->reg_pipe), + [=](std::ostream &out, const char *addr, const void *end) { + out << "registers.pipe"; + regs->reg_pipe.emit_fieldname(out, addr, end); + }); +} +void undeclare_registers(const Target::Tofino::top_level_regs *regs) { + undeclare_registers(®s->mem_top); + undeclare_registers(®s->mem_pipe); + undeclare_registers(®s->reg_top); + undeclare_registers(®s->reg_pipe); +} + +void declare_registers(const Target::Tofino::parser_regs *regs) { + declare_registers(®s->memory[INGRESS], sizeof regs->memory[INGRESS], + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.mem[INGRESS]"; + regs->memory[INGRESS].emit_fieldname(out, addr, end); + }); + declare_registers(®s->memory[EGRESS], sizeof regs->memory[EGRESS], + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.mem[EGRESS]"; + regs->memory[EGRESS].emit_fieldname(out, addr, end); + }); + declare_registers(®s->ingress, sizeof regs->ingress, + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.ibp_reg"; + regs->ingress.emit_fieldname(out, addr, end); + }); + declare_registers(®s->egress, sizeof regs->egress, + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.ebp_reg"; + regs->egress.emit_fieldname(out, addr, end); + }); + declare_registers(®s->merge, sizeof regs->merge, + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.merge"; + regs->merge.emit_fieldname(out, addr, end); + }); +} +void undeclare_registers(const Target::Tofino::parser_regs *regs) { + undeclare_registers(®s->memory[INGRESS]); + undeclare_registers(®s->memory[EGRESS]); + undeclare_registers(®s->ingress); + undeclare_registers(®s->egress); + undeclare_registers(®s->merge); +} +void declare_registers(const Target::Tofino::mau_regs *regs, bool, int stage) { + declare_registers(regs, sizeof *regs, + [=](std::ostream &out, const char *addr, const void *end) { + out << "mau[" << stage << "]"; + regs->emit_fieldname(out, addr, end); + }); +} +void declare_registers(const Target::Tofino::deparser_regs *regs) { + declare_registers(®s->input, sizeof(regs->input), + [=](std::ostream &out, const char *addr, const void *end) { + out << "deparser.input_phase"; + regs->input.emit_fieldname(out, addr, end); + }); + declare_registers(®s->header, sizeof(regs->header), + [=](std::ostream &out, const char *addr, const void *end) { + out << "deparser.header_phase"; + regs->header.emit_fieldname(out, addr, end); + }); +} +void undeclare_registers(const Target::Tofino::deparser_regs *regs) { + undeclare_registers(®s->input); + undeclare_registers(®s->header); +} + +void emit_parser_registers(const Target::Tofino::top_level_regs *regs, std::ostream &out) { + std::set emitted_parsers; + // The driver can reprogram parser blocks at runtime. We output parser + // blocks in the binary with the same base address. The driver uses the + // parser handle at the start of each block to associate the parser block + // with its respective parser node in context.json. + // In a p4 program, the user can associate multiple parsers to a + // multi-parser configuration but only map a few ports. The unmapped + // parser(s) will be output in context.json node and binary but not have an + // associated port map in context.json. The driver will not initialize any + // parsers with these unmapped parser(s) but use them to reconfigure at + // runtime if required. + uint64_t pipe_mem_base_addr = 0x200000000000; + uint64_t prsr_mem_base_addr = (pipe_mem_base_addr + 0x1C800000000) >> 4; + uint64_t pipe_regs_base_addr = 0x2000000; + uint64_t prsr_regs_base_addr = pipe_regs_base_addr + 0x700000; + for (auto ig : regs->parser_ingress) { + out << binout::tag('P') << binout::byte4(ig.first); + ig.second->emit_binary(out, prsr_regs_base_addr); + } + for (auto ig : regs->parser_memory[INGRESS]) { + out << binout::tag('P') << binout::byte4(ig.first); + ig.second->emit_binary(out, prsr_mem_base_addr); + } + prsr_regs_base_addr = pipe_regs_base_addr + 0x740000; + for (auto eg : regs->parser_egress) { + out << binout::tag('P') << binout::byte4(eg.first); + eg.second->emit_binary(out, prsr_regs_base_addr); + } + prsr_mem_base_addr = (pipe_mem_base_addr + 0x1C800400000) >> 4; + for (auto eg : regs->parser_memory[EGRESS]) { + out << binout::tag('P') << binout::byte4(eg.first); + eg.second->emit_binary(out, prsr_mem_base_addr); + } +} + +void declare_registers(const Target::JBay::top_level_regs *regs) { + declare_registers(®s->mem_top, sizeof(regs->mem_top), + [=](std::ostream &out, const char *addr, const void *end) { + out << "memories.top"; + regs->mem_top.emit_fieldname(out, addr, end); + }); + declare_registers(®s->mem_pipe, sizeof(regs->mem_pipe), + [=](std::ostream &out, const char *addr, const void *end) { + out << "memories.pipe"; + regs->mem_pipe.emit_fieldname(out, addr, end); + }); + declare_registers(®s->reg_top, sizeof(regs->reg_top), + [=](std::ostream &out, const char *addr, const void *end) { + out << "registers.top"; + regs->reg_top.emit_fieldname(out, addr, end); + }); + declare_registers(®s->reg_pipe, sizeof(regs->reg_pipe), + [=](std::ostream &out, const char *addr, const void *end) { + out << "registers.pipe"; + regs->reg_pipe.emit_fieldname(out, addr, end); + }); +} +void undeclare_registers(const Target::JBay::top_level_regs *regs) { + undeclare_registers(®s->mem_top); + undeclare_registers(®s->mem_pipe); + undeclare_registers(®s->reg_top); + undeclare_registers(®s->reg_pipe); +} +void declare_registers(const Target::JBay::parser_regs *regs) { + declare_registers(®s->memory[INGRESS], sizeof regs->memory[INGRESS], + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.mem[INGRESS]"; + regs->memory[INGRESS].emit_fieldname(out, addr, end); + }); + declare_registers(®s->memory[EGRESS], sizeof regs->memory[EGRESS], + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.mem[EGRESS]"; + regs->memory[EGRESS].emit_fieldname(out, addr, end); + }); + declare_registers(®s->ingress, sizeof regs->ingress, + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.ipb_reg"; + regs->ingress.emit_fieldname(out, addr, end); + }); + declare_registers(®s->egress, sizeof regs->egress, + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.epb_reg"; + regs->egress.emit_fieldname(out, addr, end); + }); + declare_registers(®s->main[INGRESS], sizeof regs->main[INGRESS], + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.ingress.main"; + regs->main[INGRESS].emit_fieldname(out, addr, end); + }); + declare_registers(®s->main[EGRESS], sizeof regs->main[EGRESS], + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.egress.main"; + regs->main[EGRESS].emit_fieldname(out, addr, end); + }); + declare_registers(®s->merge, sizeof regs->merge, + [=](std::ostream &out, const char *addr, const void *end) { + out << "parser.merge"; + regs->merge.emit_fieldname(out, addr, end); + }); +} +void undeclare_registers(const Target::JBay::parser_regs *regs) { + undeclare_registers(®s->memory[INGRESS]); + undeclare_registers(®s->memory[EGRESS]); + undeclare_registers(®s->ingress); + undeclare_registers(®s->egress); + undeclare_registers(®s->main[INGRESS]); + undeclare_registers(®s->main[EGRESS]); + undeclare_registers(®s->merge); +} +void declare_registers(const Target::JBay::mau_regs *regs, bool, int stage) { + declare_registers(regs, sizeof *regs, + [=](std::ostream &out, const char *addr, const void *end) { + out << "mau[" << stage << "]"; + regs->emit_fieldname(out, addr, end); + }); +} +void declare_registers(const Target::JBay::deparser_regs *regs) { + declare_registers(regs, sizeof *regs, + [=](std::ostream &out, const char *addr, const void *end) { + out << "deparser.regs"; + regs->emit_fieldname(out, addr, end); + }); +} + +void emit_parser_registers(const Target::JBay::top_level_regs *regs, std::ostream &out) { + std::set emitted_parsers; + for (auto ig : regs->parser_ingress) { + json::map header; + header["handle"] = ig.first; + out << binout::tag('P') << json::binary(header); + ig.second->emit_binary(out, 0); + } + for (auto eg : regs->parser_egress) { + json::map header; + header["handle"] = eg.first; + out << binout::tag('P') << json::binary(header); + eg.second->emit_binary(out, 0); + } + for (auto ig : regs->parser_main[INGRESS]) { + json::map header; + header["handle"] = ig.first; + out << binout::tag('P') << json::binary(header); + ig.second->emit_binary(out, 0); + } + for (auto eg : regs->parser_main[EGRESS]) { + json::map header; + header["handle"] = eg.first; + out << binout::tag('P') << json::binary(header); + eg.second->emit_binary(out, 0); + } + for (auto ig : regs->parser_memory[INGRESS]) { + json::map header; + header["handle"] = ig.first; + out << binout::tag('P') << json::binary(header); + ig.second->emit_binary(out, 0); + } + for (auto eg : regs->parser_memory[EGRESS]) { + json::map header; + header["handle"] = eg.first; + out << binout::tag('P') << json::binary(header); + eg.second->emit_binary(out, 0); + } +} + +int Target::numMauStagesOverride = 0; + +int Target::encodeConst(int src) { + SWITCH_FOREACH_TARGET(options.target, return TARGET::encodeConst(src);); + BUG(); + return 0; +} + +void Target::OVERRIDE_NUM_MAU_STAGES(int num) { + int allowed = NUM_MAU_STAGES_PRIVATE(); + BUG_CHECK(num > 0 && num <= allowed, + "Invalid override for NUM_MAU_STAGES. Allowed range is <1, %d>, got %d.", allowed, + num); + + numMauStagesOverride = num; + return; +} + +int Target::NUM_BUS_OF_TYPE_v(int bus_type) const { + // default values for Tofino1/2 + switch (static_cast(bus_type)) { + case Table::Layout::SEARCH_BUS: + case Table::Layout::RESULT_BUS: + case Table::Layout::TIND_BUS: + return 2; + case Table::Layout::IDLE_BUS: + return 20; + default: + return 0; + } +} + +int Target::NUM_BUS_OF_TYPE(int bus_type) { + SWITCH_FOREACH_TARGET(options.target, return TARGET().NUM_BUS_OF_TYPE_v(bus_type);) +} + +// should these be inline in the header file? +#define DEFINE_PER_TARGET_CONSTANT(TYPE, NAME) \ + TYPE Target::NAME() { \ + SWITCH_FOREACH_TARGET(options.target, return TARGET::NAME;) \ + return std::conditional_t, std::nullptr_t, TYPE>(); \ + } +PER_TARGET_CONSTANTS(DEFINE_PER_TARGET_CONSTANT) diff --git a/backends/tofino/bf-asm/target.h b/backends/tofino/bf-asm/target.h new file mode 100644 index 00000000000..6c6b6103ffa --- /dev/null +++ b/backends/tofino/bf-asm/target.h @@ -0,0 +1,710 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef TARGET_H_ +#define TARGET_H_ + +#include "asm-types.h" +#include "backends/tofino/bf-asm/config.h" +#include "bfas.h" +#include "map.h" + +struct MemUnit; + +/** FOR_ALL_TARGETS -- metamacro that expands a macro for each defined target + * FOR_ALL_REGISTER_SETS -- metamacro that expands for each distinct register set; + * basically a subset of targets with one per distinct register set + * FOR_ALL_TARGET_CLASSES -- metamacro that expands for each distinct target class + * a subset of the register sets + */ +#define FOR_ALL_TARGETS(M, ...) \ + M(Tofino, ##__VA_ARGS__) \ + M(JBay, ##__VA_ARGS__) \ + M(Tofino2H, ##__VA_ARGS__) \ + M(Tofino2M, ##__VA_ARGS__) \ + M(Tofino2U, ##__VA_ARGS__) \ + M(Tofino2A0, ##__VA_ARGS__) +#define FOR_ALL_REGISTER_SETS(M, ...) \ + M(Tofino, ##__VA_ARGS__) \ + M(JBay, ##__VA_ARGS__) +#define FOR_ALL_TARGET_CLASSES(M, ...) M(Tofino, ##__VA_ARGS__) + +// alias FOR_ALL -> FOR_EACH so the the group name does need to be plural +#define FOR_EACH_TARGET FOR_ALL_TARGETS +#define FOR_EACH_REGISTER_SET FOR_ALL_REGISTER_SETS +#define FOR_EACH_TARGET_CLASS FOR_ALL_TARGET_CLASSES + +#define TARGETS_IN_CLASS_Tofino(M, ...) \ + M(Tofino, ##__VA_ARGS__) \ + M(JBay, ##__VA_ARGS__) \ + M(Tofino2H, ##__VA_ARGS__) \ + M(Tofino2M, ##__VA_ARGS__) \ + M(Tofino2U, ##__VA_ARGS__) \ + M(Tofino2A0, ##__VA_ARGS__) +#define REGSETS_IN_CLASS_Tofino(M, ...) \ + M(Tofino, ##__VA_ARGS__) \ + M(JBay, ##__VA_ARGS__) + +#define TARGETS_USING_REGS_JBay(M, ...) \ + M(JBay, ##__VA_ARGS__) \ + M(Tofino2H, ##__VA_ARGS__) \ + M(Tofino2M, ##__VA_ARGS__) \ + M(Tofino2U, ##__VA_ARGS__) \ + M(Tofino2A0, ##__VA_ARGS__) +#define TARGETS_USING_REGS_Tofino(M, ...) M(Tofino, ##__VA_ARGS__) + +#define TARGETS_IN_CLASS(CL, ...) TARGETS_IN_CLASS_##CL(__VA_ARGS__) +#define TARGETS_USING_REGS(CL, ...) TARGETS_USING_REGS_##CL(__VA_ARGS__) +#define REGSETS_IN_CLASS(CL, ...) REGSETS_IN_CLASS_##CL(__VA_ARGS__) + +#define EXPAND(...) __VA_ARGS__ +#define EXPAND_COMMA(...) , ##__VA_ARGS__ +#define EXPAND_COMMA_CLOSE(...) ,##__VA_ARGS__ ) +#define INSTANTIATE_TARGET_TEMPLATE(TARGET, FUNC, ...) template FUNC(Target::TARGET::__VA_ARGS__); +#define DECLARE_TARGET_CLASS(TARGET, ...) class TARGET __VA_ARGS__; +#define FRIEND_TARGET_CLASS(TARGET, ...) friend class Target::TARGET __VA_ARGS__; +#define TARGET_OVERLOAD(TARGET, FN, ARGS, ...) FN(Target::TARGET::EXPAND ARGS) __VA_ARGS__; + +#define PER_TARGET_CONSTANTS(M) \ + M(const char *, name) \ + M(target_t, register_set) \ + M(int, ARAM_UNITS_PER_STAGE) \ + M(int, DEPARSER_CHECKSUM_UNITS) \ + M(int, DEPARSER_CONSTANTS) \ + M(int, DEPARSER_MAX_FD_ENTRIES) \ + M(int, DEPARSER_MAX_POV_BYTES) \ + M(int, DEPARSER_MAX_POV_PER_USE) \ + M(int, DP_UNITS_PER_STAGE) \ + M(int, DYNAMIC_CONFIG) \ + M(int, DYNAMIC_CONFIG_INPUT_BITS) \ + M(bool, EGRESS_SEPARATE) \ + M(int, END_OF_PIPE) \ + M(int, EXACT_HASH_GROUPS) \ + M(int, EXACT_HASH_TABLES) \ + M(int, EXTEND_ALU_8_SLOTS) \ + M(int, EXTEND_ALU_16_SLOTS) \ + M(int, EXTEND_ALU_32_SLOTS) \ + M(bool, GATEWAY_INHIBIT_INDEX) \ + M(int, GATEWAY_MATCH_BITS) \ + M(bool, GATEWAY_NEEDS_SEARCH_BUS) \ + M(int, GATEWAY_PAYLOAD_GROUPS) \ + M(int, GATEWAY_ROWS) \ + M(bool, GATEWAY_SINGLE_XBAR_GROUP) \ + M(bool, HAS_MPR) \ + M(int, INSTR_SRC2_BITS) \ + M(int, IMEM_COLORS) \ + M(int, IXBAR_HASH_GROUPS) \ + M(int, IXBAR_HASH_INDEX_MAX) \ + M(int, IXBAR_HASH_INDEX_STRIDE) \ + M(int, LOCAL_TIND_UNITS) \ + M(int, LONG_BRANCH_TAGS) \ + M(int, MAX_IMMED_ACTION_DATA) \ + M(int, MAX_OVERHEAD_OFFSET) \ + M(int, MAX_OVERHEAD_OFFSET_NEXT) \ + M(int, MATCH_BYTE_16BIT_PAIRS) \ + M(int, MATCH_REQUIRES_PHYSID) \ + M(int, MAU_BASE_DELAY) \ + M(int, MAU_BASE_PREDICATION_DELAY) \ + M(int, MAU_ERROR_DELAY_ADJUST) \ + M(int, METER_ALU_GROUP_DATA_DELAY) \ + M(int, MINIMUM_INSTR_CONSTANT) \ + M(bool, NEXT_TABLE_EXEC_COMBINED) \ + M(int, NEXT_TABLE_SUCCESSOR_TABLE_DEPTH) \ + M(int, NUM_MAU_STAGES_PRIVATE) \ + M(int, NUM_EGRESS_STAGES_PRIVATE) \ + M(int, NUM_PARSERS) \ + M(int, NUM_PIPES) \ + M(bool, OUTPUT_STAGE_EXTENSION_PRIVATE) \ + M(int, PARSER_CHECKSUM_UNITS) \ + M(bool, PARSER_EXTRACT_BYTES) \ + M(int, PARSER_DEPTH_MAX_BYTES_INGRESS) \ + M(int, PARSER_DEPTH_MAX_BYTES_EGRESS) \ + M(int, PARSER_DEPTH_MAX_BYTES_MULTITHREADED_EGRESS) \ + M(int, PARSER_DEPTH_MIN_BYTES_INGRESS) \ + M(int, PARSER_DEPTH_MIN_BYTES_EGRESS) \ + M(int, PHASE0_FORMAT_WIDTH) \ + M(bool, REQUIRE_TCAM_ID) \ + M(int, SRAM_EGRESS_ROWS) \ + M(bool, SRAM_GLOBAL_ACCESS) \ + M(int, SRAM_HBUS_SECTIONS_PER_STAGE) \ + M(int, SRAM_HBUSSES_PER_ROW) \ + M(int, SRAM_INGRESS_ROWS) \ + M(int, SRAM_LOGICAL_UNITS_PER_ROW) \ + M(int, SRAM_LAMBS_PER_STAGE) \ + M(int, SRAM_REMOVED_COLUMNS) \ + M(int, SRAM_STRIDE_COLUMN) \ + M(int, SRAM_STRIDE_ROW) \ + M(int, SRAM_STRIDE_STAGE) \ + M(int, SRAM_UNITS_PER_ROW) \ + M(int, STATEFUL_ALU_ADDR_WIDTH) \ + M(int, STATEFUL_ALU_CONST_MASK) \ + M(int, STATEFUL_ALU_CONST_MAX) \ + M(int, STATEFUL_ALU_CONST_MIN) \ + M(int, STATEFUL_ALU_CONST_WIDTH) \ + M(int, STATEFUL_CMP_ADDR_WIDTH) \ + M(int, STATEFUL_CMP_CONST_MASK) \ + M(int, STATEFUL_CMP_CONST_MAX) \ + M(int, STATEFUL_CMP_CONST_MIN) \ + M(int, STATEFUL_CMP_CONST_WIDTH) \ + M(int, STATEFUL_CMP_UNITS) \ + M(int, STATEFUL_OUTPUT_UNITS) \ + M(int, STATEFUL_PRED_MASK) \ + M(int, STATEFUL_REGFILE_CONST_WIDTH) \ + M(int, STATEFUL_REGFILE_ROWS) \ + M(int, STATEFUL_TMATCH_UNITS) \ + M(bool, SUPPORT_ALWAYS_RUN) \ + M(bool, SUPPORT_CONCURRENT_STAGE_DEP) \ + M(bool, SUPPORT_OVERFLOW_BUS) \ + M(bool, SUPPORT_SALU_FAST_CLEAR) \ + M(bool, SUPPORT_TRUE_EOP) \ + M(bool, SYNTH2PORT_NEED_MAPRAMS) \ + M(bool, TCAM_EXTRA_NIBBLE) \ + M(bool, TCAM_GLOBAL_ACCESS) \ + M(int, TCAM_MATCH_BUSSES) \ + M(int, TCAM_MEMORY_FULL_WIDTH) \ + M(int, TCAM_ROWS) \ + M(int, TCAM_UNITS_PER_ROW) \ + M(int, TCAM_XBAR_GROUPS) \ + M(bool, TABLES_REQUIRE_ROW) + +#define DECLARE_PER_TARGET_CONSTANT(TYPE, NAME) static TYPE NAME(); + +#define TARGET_CLASS_SPECIFIC_CLASSES \ + class ActionTable; \ + class CounterTable; \ + class ExactMatchTable; \ + class GatewayTable; \ + class MeterTable; \ + class StatefulTable; \ + class TernaryIndirectTable; \ + class TernaryMatchTable; +#define REGISTER_SET_SPECIFIC_CLASSES /* none */ +#define TARGET_SPECIFIC_CLASSES /* none */ + +class Target { + public: + class Phv; + FOR_ALL_TARGETS(DECLARE_TARGET_CLASS) + PER_TARGET_CONSTANTS(DECLARE_PER_TARGET_CONSTANT) + + static int encodeConst(int src); + + static int NUM_MAU_STAGES() { + return numMauStagesOverride ? numMauStagesOverride : NUM_MAU_STAGES_PRIVATE(); + } + static int NUM_EGRESS_STAGES() { + int egress_stages = NUM_EGRESS_STAGES_PRIVATE(); + return numMauStagesOverride && numMauStagesOverride < egress_stages ? numMauStagesOverride + : egress_stages; + } + static int NUM_STAGES(gress_t gr) { + return gr == EGRESS ? NUM_EGRESS_STAGES() : NUM_MAU_STAGES(); + } + + static int OUTPUT_STAGE_EXTENSION() { + return numMauStagesOverride ? 1 : OUTPUT_STAGE_EXTENSION_PRIVATE(); + } + + static void OVERRIDE_NUM_MAU_STAGES(int num); + + static int SRAM_ROWS(gress_t gr) { + return gr == EGRESS ? SRAM_EGRESS_ROWS() : SRAM_INGRESS_ROWS(); + } + + // FIXME -- bus_type here is a Table::Layout::bus_type_t, but can't forward + // declare a nested type. + virtual int NUM_BUS_OF_TYPE_v(int bus_type) const; + static int NUM_BUS_OF_TYPE(int bus_type); + + private: + static int numMauStagesOverride; +}; + +#include "backends/tofino/bf-asm/gen/tofino/memories.pipe_addrmap.h" +#include "backends/tofino/bf-asm/gen/tofino/memories.pipe_top_level.h" +#include "backends/tofino/bf-asm/gen/tofino/memories.prsr_mem_main_rspec.h" +#include "backends/tofino/bf-asm/gen/tofino/regs.dprsr_hdr.h" +#include "backends/tofino/bf-asm/gen/tofino/regs.dprsr_inp.h" +#include "backends/tofino/bf-asm/gen/tofino/regs.ebp_rspec.h" +#include "backends/tofino/bf-asm/gen/tofino/regs.ibp_rspec.h" +#include "backends/tofino/bf-asm/gen/tofino/regs.mau_addrmap.h" +#include "backends/tofino/bf-asm/gen/tofino/regs.pipe_addrmap.h" +#include "backends/tofino/bf-asm/gen/tofino/regs.prsr_reg_merge_rspec.h" +#include "backends/tofino/bf-asm/gen/tofino/regs.tofino.h" + +class Target::Tofino : public Target { + public: + static constexpr const char *const name = "tofino"; + static constexpr target_t tag = TOFINO; + static constexpr target_t register_set = TOFINO; + typedef Target::Tofino target_type; + typedef Target::Tofino register_type; + class Phv; + struct top_level_regs { + typedef ::Tofino::memories_top _mem_top; + typedef ::Tofino::memories_pipe _mem_pipe; + typedef ::Tofino::regs_top _regs_top; + typedef ::Tofino::regs_pipe _regs_pipe; + + ::Tofino::memories_top mem_top; + ::Tofino::memories_pipe mem_pipe; + ::Tofino::regs_top reg_top; + ::Tofino::regs_pipe reg_pipe; + + // map from handle to parser regs + std::map parser_memory[2]; + std::map parser_ingress; + std::map parser_egress; + ::Tofino::regs_all_parse_merge parser_merge; + }; + struct parser_regs : public ParserRegisterSet { + typedef ::Tofino::memories_all_parser_ _memory; + typedef ::Tofino::regs_all_parser_ingress _ingress; + typedef ::Tofino::regs_all_parser_egress _egress; + typedef ::Tofino::regs_all_parse_merge _merge; + + ::Tofino::memories_all_parser_ memory[2]; + ::Tofino::regs_all_parser_ingress ingress; + ::Tofino::regs_all_parser_egress egress; + ::Tofino::regs_all_parse_merge merge; + }; + + typedef ::Tofino::regs_match_action_stage_ mau_regs; + struct deparser_regs { + typedef ::Tofino::regs_all_deparser_input_phase _input; + typedef ::Tofino::regs_all_deparser_header_phase _header; + + ::Tofino::regs_all_deparser_input_phase input; + ::Tofino::regs_all_deparser_header_phase header; + }; + enum { + ARAM_UNITS_PER_STAGE = 0, + PARSER_CHECKSUM_UNITS = 2, + PARSER_EXTRACT_BYTES = false, + PARSER_DEPTH_MAX_BYTES_INGRESS = (((1 << 10) - 1) * 16), + PARSER_DEPTH_MAX_BYTES_EGRESS = (((1 << 10) - 1) * 16), + PARSER_DEPTH_MAX_BYTES_MULTITHREADED_EGRESS = 160, + PARSER_DEPTH_MIN_BYTES_INGRESS = 0, + PARSER_DEPTH_MIN_BYTES_EGRESS = 65, + MATCH_BYTE_16BIT_PAIRS = true, + MATCH_REQUIRES_PHYSID = false, + MAX_IMMED_ACTION_DATA = 32, + MAX_OVERHEAD_OFFSET = 64, + MAX_OVERHEAD_OFFSET_NEXT = 40, + NUM_MAU_STAGES_PRIVATE = 12, + NUM_EGRESS_STAGES_PRIVATE = NUM_MAU_STAGES_PRIVATE, + ACTION_INSTRUCTION_MAP_WIDTH = 7, + DEPARSER_CHECKSUM_UNITS = 6, + DEPARSER_CONSTANTS = 0, + DEPARSER_MAX_POV_BYTES = 32, + DEPARSER_MAX_POV_PER_USE = 1, + DEPARSER_MAX_FD_ENTRIES = 192, + DP_UNITS_PER_STAGE = 0, + DYNAMIC_CONFIG = 0, + DYNAMIC_CONFIG_INPUT_BITS = 0, + EGRESS_SEPARATE = false, + END_OF_PIPE = 0xff, + EXACT_HASH_GROUPS = 8, + EXACT_HASH_TABLES = 16, + EXTEND_ALU_8_SLOTS = 0, + EXTEND_ALU_16_SLOTS = 0, + EXTEND_ALU_32_SLOTS = 0, + GATEWAY_INHIBIT_INDEX = false, + GATEWAY_MATCH_BITS = 56, // includes extra expansion for range match + GATEWAY_NEEDS_SEARCH_BUS = true, + GATEWAY_PAYLOAD_GROUPS = 1, + GATEWAY_ROWS = 8, + GATEWAY_SINGLE_XBAR_GROUP = true, + SUPPORT_TRUE_EOP = 0, + INSTR_SRC2_BITS = 4, + IMEM_COLORS = 2, + IXBAR_HASH_GROUPS = 8, + IXBAR_HASH_INDEX_MAX = 40, + IXBAR_HASH_INDEX_STRIDE = 10, + LOCAL_TIND_UNITS = 0, + LONG_BRANCH_TAGS = 0, + MAU_BASE_DELAY = 20, + MAU_BASE_PREDICATION_DELAY = 11, + MAU_ERROR_DELAY_ADJUST = 2, + METER_ALU_GROUP_DATA_DELAY = 13, + // To avoid under run scenarios, there is a minimum egress pipeline latency required + MINIMUM_REQUIRED_EGRESS_PIPELINE_LATENCY = 160, + NEXT_TABLE_EXEC_COMBINED = false, // no next_exec on tofino1 at all + NEXT_TABLE_SUCCESSOR_TABLE_DEPTH = 8, + PHASE0_FORMAT_WIDTH = 64, + REQUIRE_TCAM_ID = false, // miss-only tables do not need a tcam id + SRAM_EGRESS_ROWS = 8, + SRAM_GLOBAL_ACCESS = false, + SRAM_HBUS_SECTIONS_PER_STAGE = 0, + SRAM_HBUSSES_PER_ROW = 0, + SRAM_INGRESS_ROWS = 8, + SRAM_LAMBS_PER_STAGE = 0, + SRAM_LOGICAL_UNITS_PER_ROW = 6, + SRAM_REMOVED_COLUMNS = 2, + SRAM_STRIDE_COLUMN = 1, + SRAM_STRIDE_ROW = 12, + SRAM_STRIDE_STAGE = 0, + SRAM_UNITS_PER_ROW = 12, + STATEFUL_CMP_UNITS = 2, + STATEFUL_CMP_ADDR_WIDTH = 2, + STATEFUL_CMP_CONST_WIDTH = 4, + STATEFUL_CMP_CONST_MASK = 0xf, + STATEFUL_CMP_CONST_MIN = -8, + STATEFUL_CMP_CONST_MAX = 7, + STATEFUL_TMATCH_UNITS = 0, + STATEFUL_OUTPUT_UNITS = 1, + STATEFUL_PRED_MASK = (1U << (1 << STATEFUL_CMP_UNITS)) - 1, + STATEFUL_REGFILE_ROWS = 4, + STATEFUL_REGFILE_CONST_WIDTH = 32, + SUPPORT_ALWAYS_RUN = 0, + HAS_MPR = 0, + SUPPORT_CONCURRENT_STAGE_DEP = 1, + SUPPORT_OVERFLOW_BUS = 1, + SUPPORT_SALU_FAST_CLEAR = 0, + STATEFUL_ALU_ADDR_WIDTH = 2, + STATEFUL_ALU_CONST_WIDTH = 4, + STATEFUL_ALU_CONST_MASK = 0xf, + STATEFUL_ALU_CONST_MIN = -8, // TODO Is the same as the following one? + STATEFUL_ALU_CONST_MAX = 7, + MINIMUM_INSTR_CONSTANT = -8, // TODO + NUM_PARSERS = 18, + NUM_PIPES = 4, + OUTPUT_STAGE_EXTENSION_PRIVATE = 0, + SYNTH2PORT_NEED_MAPRAMS = true, + TCAM_EXTRA_NIBBLE = true, + TCAM_GLOBAL_ACCESS = false, + TCAM_MATCH_BUSSES = 2, + TCAM_MEMORY_FULL_WIDTH = 47, + TCAM_ROWS = 12, + TCAM_UNITS_PER_ROW = 2, + TCAM_XBAR_GROUPS = 12, + TABLES_REQUIRE_ROW = 1, + }; + static int encodeConst(int src) { return (src >> 10 << 15) | (0x8 << 10) | (src & 0x3ff); } + TARGET_SPECIFIC_CLASSES + REGISTER_SET_SPECIFIC_CLASSES + TARGET_CLASS_SPECIFIC_CLASSES +}; + +void declare_registers(const Target::Tofino::top_level_regs *regs); +void undeclare_registers(const Target::Tofino::top_level_regs *regs); +void declare_registers(const Target::Tofino::parser_regs *regs); +void undeclare_registers(const Target::Tofino::parser_regs *regs); +void declare_registers(const Target::Tofino::mau_regs *regs, bool ignore, int stage); +void declare_registers(const Target::Tofino::deparser_regs *regs); +void undeclare_registers(const Target::Tofino::deparser_regs *regs); +void emit_parser_registers(const Target::Tofino::top_level_regs *regs, std::ostream &); + +#include "backends/tofino/bf-asm/gen/jbay/memories.jbay_mem.h" +#include "backends/tofino/bf-asm/gen/jbay/memories.pipe_addrmap.h" +#include "backends/tofino/bf-asm/gen/jbay/memories.prsr_mem_main_rspec.h" +#include "backends/tofino/bf-asm/gen/jbay/regs.dprsr_reg.h" +#include "backends/tofino/bf-asm/gen/jbay/regs.epb_prsr4_reg.h" +#include "backends/tofino/bf-asm/gen/jbay/regs.ipb_prsr4_reg.h" +#include "backends/tofino/bf-asm/gen/jbay/regs.jbay_reg.h" +#include "backends/tofino/bf-asm/gen/jbay/regs.mau_addrmap.h" +#include "backends/tofino/bf-asm/gen/jbay/regs.pipe_addrmap.h" +#include "backends/tofino/bf-asm/gen/jbay/regs.pmerge_reg.h" +#include "backends/tofino/bf-asm/gen/jbay/regs.prsr_reg_main_rspec.h" + +class Target::JBay : public Target { + public: + static constexpr const char *const name = "tofino2"; + static constexpr target_t tag = JBAY; + static constexpr target_t register_set = JBAY; + typedef Target::JBay target_type; + typedef Target::JBay register_type; + class Phv; + struct top_level_regs { + typedef ::JBay::memories_top _mem_top; + typedef ::JBay::memories_pipe _mem_pipe; + typedef ::JBay::regs_top _regs_top; + typedef ::JBay::regs_pipe _regs_pipe; + + ::JBay::memories_top mem_top; + ::JBay::memories_pipe mem_pipe; + ::JBay::regs_top reg_top; + ::JBay::regs_pipe reg_pipe; + + // map from handle to parser regs + std::map parser_memory[2]; + std::map parser_ingress; + std::map parser_egress; + std::map parser_main[2]; + ::JBay::regs_parse_merge parser_merge; + }; + struct parser_regs : public ParserRegisterSet { + typedef ::JBay::memories_parser_ _memory; + typedef ::JBay::regs_parser_ingress _ingress; // [9] + typedef ::JBay::regs_parser_egress _egress; // [9] + typedef ::JBay::regs_parser_main_ _main; // [9] + typedef ::JBay::regs_parse_merge _merge; // [1] + + ::JBay::memories_parser_ memory[2]; + ::JBay::regs_parser_ingress ingress; + ::JBay::regs_parser_egress egress; + ::JBay::regs_parser_main_ main[2]; + ::JBay::regs_parse_merge merge; + }; + + typedef ::JBay::regs_match_action_stage_ mau_regs; + typedef ::JBay::regs_deparser deparser_regs; + enum : int { + ARAM_UNITS_PER_STAGE = 0, + PARSER_CHECKSUM_UNITS = 5, + PARSER_EXTRACT_BYTES = true, + PARSER_DEPTH_MAX_BYTES_INGRESS = (((1 << 10) - 1) * 16), + PARSER_DEPTH_MAX_BYTES_EGRESS = (32 * 16), + PARSER_DEPTH_MAX_BYTES_MULTITHREADED_EGRESS = (32 * 16), + PARSER_DEPTH_MIN_BYTES_INGRESS = 0, + PARSER_DEPTH_MIN_BYTES_EGRESS = 0, + MATCH_BYTE_16BIT_PAIRS = false, + MATCH_REQUIRES_PHYSID = false, + MAX_IMMED_ACTION_DATA = 32, + MAX_OVERHEAD_OFFSET = 64, + MAX_OVERHEAD_OFFSET_NEXT = 40, +#ifdef EMU_OVERRIDE_STAGE_COUNT + NUM_MAU_STAGES_PRIVATE = EMU_OVERRIDE_STAGE_COUNT, + OUTPUT_STAGE_EXTENSION_PRIVATE = 1, +#else + NUM_MAU_STAGES_PRIVATE = 20, + OUTPUT_STAGE_EXTENSION_PRIVATE = 0, +#endif + NUM_EGRESS_STAGES_PRIVATE = NUM_MAU_STAGES_PRIVATE, + ACTION_INSTRUCTION_MAP_WIDTH = 8, + DEPARSER_CHECKSUM_UNITS = 8, + DEPARSER_CONSTANTS = 8, + DEPARSER_MAX_POV_BYTES = 16, + DEPARSER_MAX_POV_PER_USE = 1, + DEPARSER_CHUNKS_PER_GROUP = 8, + DEPARSER_CHUNK_SIZE = 8, + DEPARSER_CHUNK_GROUPS = 16, + DEPARSER_CLOTS_PER_GROUP = 4, + DEPARSER_TOTAL_CHUNKS = DEPARSER_CHUNK_GROUPS * DEPARSER_CHUNKS_PER_GROUP, + DEPARSER_MAX_FD_ENTRIES = DEPARSER_TOTAL_CHUNKS, + DP_UNITS_PER_STAGE = 0, + DYNAMIC_CONFIG = 0, + DYNAMIC_CONFIG_INPUT_BITS = 0, + EGRESS_SEPARATE = false, + END_OF_PIPE = 0x1ff, + EXACT_HASH_GROUPS = 8, + EXACT_HASH_TABLES = 16, + EXTEND_ALU_8_SLOTS = 0, + EXTEND_ALU_16_SLOTS = 0, + EXTEND_ALU_32_SLOTS = 0, + GATEWAY_INHIBIT_INDEX = false, + GATEWAY_MATCH_BITS = 56, // includes extra expansion for range match + GATEWAY_NEEDS_SEARCH_BUS = true, + GATEWAY_PAYLOAD_GROUPS = 5, + GATEWAY_ROWS = 8, + GATEWAY_SINGLE_XBAR_GROUP = true, + SUPPORT_TRUE_EOP = 1, + INSTR_SRC2_BITS = 5, + IMEM_COLORS = 2, + IXBAR_HASH_GROUPS = 8, + IXBAR_HASH_INDEX_MAX = 40, + IXBAR_HASH_INDEX_STRIDE = 10, + LOCAL_TIND_UNITS = 0, + LONG_BRANCH_TAGS = 8, + MAU_BASE_DELAY = 23, + MAU_BASE_PREDICATION_DELAY = 13, + MAU_ERROR_DELAY_ADJUST = 3, + METER_ALU_GROUP_DATA_DELAY = 15, + NEXT_TABLE_EXEC_COMBINED = true, + NEXT_TABLE_SUCCESSOR_TABLE_DEPTH = 8, + PHASE0_FORMAT_WIDTH = 128, + REQUIRE_TCAM_ID = false, // miss-only tables do not need a tcam id + SRAM_EGRESS_ROWS = 8, + SRAM_GLOBAL_ACCESS = false, + SRAM_HBUS_SECTIONS_PER_STAGE = 0, + SRAM_HBUSSES_PER_ROW = 0, + SRAM_INGRESS_ROWS = 8, + SRAM_LAMBS_PER_STAGE = 0, + SRAM_LOGICAL_UNITS_PER_ROW = 6, + SRAM_REMOVED_COLUMNS = 2, + SRAM_STRIDE_COLUMN = 1, + SRAM_STRIDE_ROW = 12, + SRAM_STRIDE_STAGE = 0, + SRAM_UNITS_PER_ROW = 12, + STATEFUL_CMP_UNITS = 4, + STATEFUL_CMP_ADDR_WIDTH = 2, + STATEFUL_CMP_CONST_WIDTH = 6, + STATEFUL_CMP_CONST_MASK = 0x3f, + STATEFUL_CMP_CONST_MIN = -32, + STATEFUL_CMP_CONST_MAX = 31, + STATEFUL_TMATCH_UNITS = 2, + STATEFUL_OUTPUT_UNITS = 4, + STATEFUL_PRED_MASK = (1U << (1 << STATEFUL_CMP_UNITS)) - 1, + STATEFUL_REGFILE_ROWS = 4, + STATEFUL_REGFILE_CONST_WIDTH = 34, + SUPPORT_ALWAYS_RUN = 1, + HAS_MPR = 1, + SUPPORT_CONCURRENT_STAGE_DEP = 0, + SUPPORT_OVERFLOW_BUS = 0, + SUPPORT_SALU_FAST_CLEAR = 1, + STATEFUL_ALU_ADDR_WIDTH = 2, + STATEFUL_ALU_CONST_WIDTH = 4, + STATEFUL_ALU_CONST_MASK = 0xf, + STATEFUL_ALU_CONST_MIN = -8, // TODO Is the same as the following one? + STATEFUL_ALU_CONST_MAX = 7, + MINIMUM_INSTR_CONSTANT = -4, // TODO + NUM_PARSERS = 36, + NUM_PIPES = 4, + TABLES_REQUIRE_ROW = 1, + SYNTH2PORT_NEED_MAPRAMS = true, + TCAM_EXTRA_NIBBLE = true, + TCAM_GLOBAL_ACCESS = false, + TCAM_MATCH_BUSSES = 2, + TCAM_MEMORY_FULL_WIDTH = 47, + TCAM_ROWS = 12, + TCAM_UNITS_PER_ROW = 2, + TCAM_XBAR_GROUPS = 12, + }; + static int encodeConst(int src) { return (src >> 11 << 16) | (0x8 << 11) | (src & 0x7ff); } + TARGET_SPECIFIC_CLASSES + REGISTER_SET_SPECIFIC_CLASSES +}; +void declare_registers(const Target::JBay::top_level_regs *regs); +void undeclare_registers(const Target::JBay::top_level_regs *regs); +void declare_registers(const Target::JBay::parser_regs *regs); +void undeclare_registers(const Target::JBay::parser_regs *regs); +void declare_registers(const Target::JBay::mau_regs *regs, bool ignore, int stage); +void declare_registers(const Target::JBay::deparser_regs *regs); + +class Target::Tofino2H : public Target::JBay { + public: + static constexpr const char *const name = "tofino2h"; + static constexpr target_t tag = TOFINO2H; + typedef Target::Tofino2H target_type; + class Phv; + enum { + NUM_MAU_STAGES_PRIVATE = 6, + NUM_EGRESS_STAGES_PRIVATE = NUM_MAU_STAGES_PRIVATE, + OUTPUT_STAGE_EXTENSION_PRIVATE = 1, + }; + TARGET_SPECIFIC_CLASSES +}; + +class Target::Tofino2M : public Target::JBay { + public: + static constexpr const char *const name = "tofino2m"; + static constexpr target_t tag = TOFINO2M; + typedef Target::Tofino2M target_type; + class Phv; + enum { + NUM_MAU_STAGES_PRIVATE = 12, + NUM_EGRESS_STAGES_PRIVATE = NUM_MAU_STAGES_PRIVATE, + OUTPUT_STAGE_EXTENSION_PRIVATE = 1, + }; + TARGET_SPECIFIC_CLASSES +}; + +class Target::Tofino2U : public Target::JBay { + public: + static constexpr const char *const name = "tofino2u"; + static constexpr target_t tag = TOFINO2U; + typedef Target::Tofino2U target_type; + class Phv; + enum { + NUM_MAU_STAGES_PRIVATE = 20, + NUM_EGRESS_STAGES_PRIVATE = NUM_MAU_STAGES_PRIVATE, + }; + TARGET_SPECIFIC_CLASSES +}; + +class Target::Tofino2A0 : public Target::JBay { + public: + static constexpr const char *const name = "tofino2a0"; + static constexpr target_t tag = TOFINO2A0; + typedef Target::Tofino2A0 target_type; + class Phv; + enum { + NUM_MAU_STAGES_PRIVATE = 20, + NUM_EGRESS_STAGES_PRIVATE = NUM_MAU_STAGES_PRIVATE, + }; + TARGET_SPECIFIC_CLASSES +}; + +void emit_parser_registers(const Target::JBay::top_level_regs *regs, std::ostream &); + +/** Macro to buid a switch table switching on a target_t, expanding to the same + * code for each target, with TARGET being a typedef for the target type */ +#define SWITCH_FOREACH_TARGET(VAR, ...) \ + switch (VAR) { \ + FOR_ALL_TARGETS(DO_SWITCH_FOREACH_TARGET, __VA_ARGS__) \ + default: \ + BUG("invalid target"); \ + } + +#define DO_SWITCH_FOREACH_TARGET(TARGET_, ...) \ + case Target::TARGET_::tag: { \ + typedef Target::TARGET_ TARGET; \ + __VA_ARGS__ \ + break; \ + } + +#define SWITCH_FOREACH_REGISTER_SET(VAR, ...) \ + switch (VAR) { \ + FOR_ALL_REGISTER_SETS(DO_SWITCH_FOREACH_REGISTER_SET, __VA_ARGS__) \ + default: \ + BUG("invalid target"); \ + } + +#define DO_SWITCH_FOREACH_REGISTER_SET(REGS_, ...) \ + TARGETS_USING_REGS(REGS_, CASE_FOR_TARGET) { \ + typedef Target::REGS_ TARGET; \ + __VA_ARGS__ \ + break; \ + } + +#define SWITCH_FOREACH_TARGET_CLASS(VAR, ...) \ + switch (VAR) { \ + FOR_ALL_TARGET_CLASSES(DO_SWITCH_FOREACH_TARGET_CLASS, __VA_ARGS__) \ + default: \ + BUG("invalid target"); \ + } + +#define DO_SWITCH_FOREACH_TARGET_CLASS(CLASS_, ...) \ + TARGETS_IN_CLASS(CLASS_, CASE_FOR_TARGET) { \ + typedef Target::CLASS_ TARGET; \ + __VA_ARGS__ \ + break; \ + } + +#define CASE_FOR_TARGET(TARGET) case Target::TARGET::tag: + +/* macro to define a function that overloads over a GROUP of types -- will declare all the + * functions that overload on a Target::type argument and a 'generic' overload that calls + * the right specific overload based on options.target + * GROUP can be one of + * TARGET -- overload on all the different targets + * REGISTER_SET -- overload just on the register sets (targets that share a register + * set will only have one overload) + * TARGET_CLASS -- overload based on the CLASS + * RTYPE NAME ARGDECL together make the declaration of the (generic) function, the overloads + * will all have a Target::type argument prepended. The final ARGS argument is the argument + * list that that will be forwarded (basically ARGDECL without the types) + */ +#define DECL_OVERLOAD_FUNC(TARGET, RTYPE, NAME, ARGDECL, ARGS) \ + RTYPE NAME(Target::TARGET EXPAND_COMMA_CLOSE ARGDECL; +#define OVERLOAD_FUNC_FOREACH(GROUP, RTYPE, NAME, ARGDECL, ARGS, ...) \ + FOR_EACH_##GROUP(DECL_OVERLOAD_FUNC, RTYPE, NAME, ARGDECL, ARGS) \ + RTYPE NAME ARGDECL __VA_ARGS__ { \ + SWITCH_FOREACH_##GROUP(options.target, return NAME(TARGET() EXPAND_COMMA ARGS);) \ + } + +#endif /* TARGET_H_ */ diff --git a/backends/tofino/bf-asm/ternary_match.cpp b/backends/tofino/bf-asm/ternary_match.cpp new file mode 100644 index 00000000000..688c0e3a869 --- /dev/null +++ b/backends/tofino/bf-asm/ternary_match.cpp @@ -0,0 +1,1226 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "tofino/ternary_match.h" + +#include "action_bus.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "input_xbar.h" +#include "instruction.h" +#include "lib/algorithm.h" +#include "lib/range.h" +#include "misc.h" + +Table::Format::Field *TernaryMatchTable::lookup_field(const std::string &n, + const std::string &act) const { + auto *rv = format ? format->field(n) : nullptr; + if (!rv && gateway) rv = gateway->lookup_field(n, act); + if (!rv && indirect) rv = indirect->lookup_field(n, act); + if (!rv && !act.empty()) { + if (auto call = get_action()) { + rv = call->lookup_field(n, act); + } + } + return rv; +} + +Table::Format::Field *TernaryIndirectTable::lookup_field(const std::string &n, + const std::string &act) const { + auto *rv = format ? format->field(n) : nullptr; + if (!rv && !act.empty()) { + if (auto call = get_action()) rv = call->lookup_field(n, act); + } + return rv; +} + +void TernaryMatchTable::vpn_params(int &width, int &depth, int &period, + const char *&period_name) const { + if ((width = match.size()) == 0) { + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + width = input_xbar[0]->tcam_width(); + } + depth = width ? layout_size() / width : 0; + period = 1; + period_name = 0; +} + +void TernaryMatchTable::alloc_vpns() { + if (no_vpns || layout.size() == 0 || layout[0].vpns.size() > 0) return; + int period, width, depth; + const char *period_name; + vpn_params(width, depth, period, period_name); + if (width == 0) return; + std::vector rows; + std::set> stage_cols; + for (auto &r : layout) { + for (auto &mem : r.memunits) stage_cols.emplace(mem.stage, mem.col); + rows.push_back(&r); + r.vpns.resize(r.memunits.size()); + } + std::sort(rows.begin(), rows.end(), + [](Layout *const &a, Layout *const &b) -> bool { return a->row < b->row; }); + int vpn = 0; + for (auto [stage, col] : stage_cols) { + for (auto *r : rows) { + unsigned idx = find(r->memunits, MemUnit(stage, r->row, col)) - r->memunits.begin(); + if (idx < r->vpns.size()) r->vpns[idx] = vpn++ / width; + } + if (vpn % width != 0) + error(layout[0].lineno, + "%d-wide ternary match must use a multiple of %d tcams " + "in each column", + width, width); + } +} + +TernaryMatchTable::Match::Match(const value_t &v) : lineno(v.lineno) { + if (v.type == tVEC) { + if (v.vec.size < 2 || v.vec.size > 3) { + error(v.lineno, "Syntax error"); + return; + } + if (!CHECKTYPE(v[0], tINT) || !CHECKTYPE(v[v.vec.size - 1], tINT)) return; + if ((word_group = v[0].i) < 0 || v[0].i >= Target::TCAM_XBAR_GROUPS()) + error(v[0].lineno, "Invalid input xbar group %" PRId64, v[0].i); + if (Target::TCAM_EXTRA_NIBBLE() && v.vec.size == 3 && CHECKTYPE(v[1], tINT)) { + if ((byte_group = v[1].i) < 0 || v[1].i >= Target::TCAM_XBAR_GROUPS() / 2) + error(v[1].lineno, "Invalid input xbar group %" PRId64, v[1].i); + } else { + byte_group = -1; + } + if ((byte_config = v[v.vec.size - 1].i) < 0 || byte_config >= 4) + error(v[v.vec.size - 1].lineno, "Invalid input xbar byte control %d", byte_config); + } else if (CHECKTYPE(v, tMAP)) { + for (auto &kv : MapIterChecked(v.map)) { + if (kv.key == "group") { + if (kv.value.type != tINT || kv.value.i < 0 || + kv.value.i >= Target::TCAM_XBAR_GROUPS()) + error(kv.value.lineno, "Invalid input xbar group %s", value_desc(kv.value)); + else + word_group = kv.value.i; + } else if (Target::TCAM_EXTRA_NIBBLE() && kv.key == "byte_group") { + if (kv.value.type != tINT || kv.value.i < 0 || + kv.value.i >= Target::TCAM_XBAR_GROUPS() / 2) + error(kv.value.lineno, "Invalid input xbar group %s", value_desc(kv.value)); + else + byte_group = kv.value.i; + } else if (Target::TCAM_EXTRA_NIBBLE() && kv.key == "byte_config") { + if (kv.value.type != tINT || kv.value.i < 0 || kv.value.i >= 4) + error(kv.value.lineno, "Invalid byte group config %s", value_desc(kv.value)); + else + byte_config = kv.value.i; + } else if (kv.key == "dirtcam") { + if (kv.value.type != tINT || kv.value.i < 0 || kv.value.i > 0xfff) + error(kv.value.lineno, "Invalid dirtcam mode %s", value_desc(kv.value)); + else + dirtcam = kv.value.i; + } else { + error(kv.key.lineno, "Unknown key '%s' in ternary match spec", value_desc(kv.key)); + } + } + } +} + +void TernaryMatchTable::setup(VECTOR(pair_t) & data) { + tcam_id = -1; + indirect_bus = -1; + common_init_setup(data, true, P4Table::MatchEntry); + if (input_xbar.empty()) input_xbar.emplace_back(InputXbar::create(this)); + if (auto *m = get(data, "match")) { + if (CHECKTYPE2(*m, tVEC, tMAP)) { + if (m->type == tVEC) + for (auto &v : m->vec) match.emplace_back(v); + else + match.emplace_back(*m); + } + } + for (auto &kv : MapIterChecked(data, {"meter", "stats", "stateful"})) { + if (common_setup(kv, data, P4Table::MatchEntry)) { + } else if (kv.key == "match") { + /* done above to be done before vpns */ + } else if (kv.key == "indirect") { + setup_indirect(kv.value); + } else if (kv.key == "indirect_bus") { + if (CHECKTYPE(kv.value, tINT)) { + if (kv.value.i < 0 || kv.value.i >= 16) { + error(kv.value.lineno, "Invalid ternary indirect bus number"); + } else { + indirect_bus = kv.value.i; + if (auto *old = + stage->tcam_indirect_bus_use[indirect_bus / 2][indirect_bus & 1]) + error(kv.value.lineno, "Indirect bus %d already in use by table %s", + indirect_bus, old->name()); + } + } + } else if (kv.key == "tcam_id") { + if (CHECKTYPE(kv.value, tINT)) { + if ((tcam_id = kv.value.i) < 0 || tcam_id >= TCAM_TABLES_PER_STAGE) + error(kv.key.lineno, "Invalid tcam_id %d", tcam_id); + else if (stage->tcam_id_use[tcam_id]) + error(kv.key.lineno, "Tcam id %d already in use by table %s", tcam_id, + stage->tcam_id_use[tcam_id]->name()); + else + stage->tcam_id_use[tcam_id] = this; + physical_ids[tcam_id] = 1; + } + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } + if (Target::TCAM_GLOBAL_ACCESS()) + alloc_global_tcams(); + else + alloc_rams(false, stage->tcam_use, &stage->tcam_match_bus_use); + check_tcam_match_bus(layout); + if (indirect_bus >= 0) { + stage->tcam_indirect_bus_use[indirect_bus / 2][indirect_bus & 1] = this; + } + if (indirect.set()) { + if (indirect_bus >= 0) + error(lineno, "Table %s has both ternary indirect table and explicit indirect bus", + name()); + if (!attached.stats.empty() || !attached.meters.empty() || !attached.statefuls.empty()) + error(lineno, + "Table %s has ternary indirect table and directly attached stats/meters" + " -- move them to indirect table", + name()); + } else if (!action.set() && !actions) { + error(lineno, "Table %s has no indirect, action table or immediate actions", name()); + } + if (action && !action_bus) action_bus = ActionBus::create(); +} + +bitvec TernaryMatchTable::compute_reachable_tables() { + MatchTable::compute_reachable_tables(); + if (indirect) reachable_tables_ |= indirect->reachable_tables(); + return reachable_tables_; +} + +void TernaryMatchTable::pass0() { + MatchTable::pass0(); + if (indirect.check() && indirect->set_match_table(this, false) != TERNARY_INDIRECT) + error(indirect.lineno, "%s is not a ternary indirect table", indirect->name()); +} + +void TernaryMatchTable::pass1() { + LOG1("### Ternary match table " << name() << " pass1 " << loc()); + if (action_bus) action_bus->pass1(this); + MatchTable::pass1(); + stage->table_use[timing_thread(gress)] |= Stage::USE_TCAM; + if (layout_size() == 0) layout.clear(); + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + if (match.empty() && input_xbar[0]->tcam_width() && layout.size() != 0) { + match.resize(input_xbar[0]->tcam_width()); + for (unsigned i = 0; i < match.size(); i++) { + match[i].word_group = input_xbar[0]->tcam_word_group(i); + match[i].byte_group = input_xbar[0]->tcam_byte_group(i / 2); + match[i].byte_config = i & 1; + } + match.back().byte_config = 3; + } + if (match.size() == 0) { + if (layout.size() != 0) + error(layout[0].lineno, "No match or input_xbar in non-empty ternary table %s", name()); + } else if (layout.size() % match.size() != 0) { + error(layout[0].lineno, "Rows not a multiple of the match width in tables %s", name()); + } else if (layout.size() == 0) { + error(lineno, "Empty ternary table with non-empty match"); + } else { + auto mg = match.begin(); + for (auto &row : layout) { + if (!row.bus.count(Layout::SEARCH_BUS)) + row.bus[Layout::SEARCH_BUS] = row.memunits.at(0).col; + auto bus = row.bus.at(Layout::SEARCH_BUS); + if (mg->byte_group >= 0) { + auto &bg_use = stage->tcam_byte_group_use[row.row / 2][bus]; + if (bg_use.first) { + if (bg_use.second != mg->byte_group) { + error(mg->lineno, + "Conflicting tcam byte group between rows %d and %d " + "in col %d for table %s", + row.row, row.row ^ 1, bus, name()); + if (bg_use.first != this) + error(bg_use.first->lineno, "...also used in table %s", + bg_use.first->name()); + } + } else { + bg_use.first = this; + bg_use.second = mg->byte_group; + } + } + if (++mg == match.end()) mg = match.begin(); + } + } + if (error_count > 0) return; + for (auto &chain_rows_col : chain_rows) chain_rows_col = 0; + unsigned row_use = 0; + for (auto &row : layout) row_use |= 1U << row.row; + unsigned word = 0, wide_row_use = 0; + int prev_row = -1; + std::vector *memunits = nullptr; + for (auto &row : layout) { + if (row.memunits.empty()) { + error(row.lineno, "Empty row in ternary table %s", name()); + continue; + } + if (memunits) { + if (row.memunits.size() != memunits->size()) + error(row.lineno, "Column mismatch across rows in wide tcam match"); + for (size_t i = 0; i < row.memunits.size(); ++i) + if (row.memunits[i].stage != memunits->at(i).stage || + row.memunits[i].col != memunits->at(i).col) + error(row.lineno, "Column mismatch across rows in wide tcam match"); + } else { + memunits = &row.memunits; + } + wide_row_use |= 1U << row.row; + if (++word == match.size()) { + int top_row = floor_log2(wide_row_use); + int bottom_row = top_row + 1 - match.size(); + if (wide_row_use + (1U << bottom_row) != 1U << (top_row + 1)) { + error(row.lineno, + "Ternary match rows must be contiguous " + "within each group of rows in a wide match"); + } else { + // rows chain towards row 6 + if (top_row < 6) + wide_row_use -= 1U << top_row; + else if (bottom_row > 6) + wide_row_use -= 1U << bottom_row; + else + wide_row_use -= 1U << 6; + for (auto &memunit : *memunits) { + int col = memunit.col; + if (col < 0 || col >= TCAM_UNITS_PER_ROW) + error(row.lineno, "Invalid column %d in table %s", col, name()); + else + chain_rows[col] |= wide_row_use; + } + } + word = 0; + memunits = nullptr; + wide_row_use = 0; + } + } + if (indirect) { + if (hit_next.size() > 0 && indirect->hit_next.size() > 0) + error(lineno, "Ternary Match table with both direct and indirect next tables"); + if (!indirect->p4_table) indirect->p4_table = p4_table; + if (hit_next.size() > 1 || indirect->hit_next.size() > 1) { + if (auto *next = indirect->format->field("next")) { + if (next->bit(0) != 0) + error(indirect->format->lineno, + "ternary indirect 'next' field must be" + " at bit 0"); + } else if (auto *action = indirect->format->field("action")) { + if (action->bit(0) != 0) + error(indirect->format->lineno, + "ternary indirect 'action' field must be" + " at bit 0 to be used as next table selector"); + } else { + error(indirect->format->lineno, "No 'next' or 'action' field in format"); + } + } + if (format) + error(format->lineno, + "Format unexpected in Ternary Match table %s with separate " + "Indirect table %s", + name(), indirect->name()); + } else if (format) { + format->pass1(this); + } + attached.pass1(this); + if (hit_next.size() > 2 && !indirect) + error(lineno, "Ternary Match tables cannot directly specify more than 2 hit next tables"); +} + +void TernaryMatchTable::pass2() { + LOG1("### Ternary match table " << name() << " pass2 " << loc()); + if (logical_id < 0) choose_logical_id(); + for (auto &ixb : input_xbar) ixb->pass2(); + if (!indirect && indirect_bus < 0) { + for (int i = 0; i < 16; i++) + if (!stage->tcam_indirect_bus_use[i / 2][i & 1]) { + indirect_bus = i; + stage->tcam_indirect_bus_use[i / 2][i & 1] = this; + break; + } + if (indirect_bus < 0) + error(lineno, "No ternary indirect bus available for table %s", name()); + } + if (actions) actions->pass2(this); + if (action_bus) action_bus->pass2(this); + if (gateway) gateway->pass2(); + if (idletime) idletime->pass2(); + if (is_alpm()) { + if (auto *acts = get_actions()) { + for (auto act = acts->begin(); act != acts->end(); act++) { + set_partition_action_handle(act->handle); + if (act->p4_params_list.size() > 0) { + // assume first parameter is partition_field_name + set_partition_field_name(act->p4_params_list[0].name); + } + } + } + } + for (auto &hd : hash_dist) hd.pass2(this); +} + +void TernaryMatchTable::pass3() { + LOG1("### Ternary match table " << name() << " pass3 " << loc()); + MatchTable::pass3(); + if (action_bus) action_bus->pass3(this); +} + +extern int get_address_mau_actiondata_adr_default(unsigned log2size, bool per_flow_enable); + +template +inline static void tcam_ghost_enable(REGS ®s, int row, int col) { + regs.tcams.col[col].tcam_ghost_thread_en[row] = 1; +} +template <> +void tcam_ghost_enable(Target::Tofino::mau_regs ®s, int row, int col) {} + +template +void TernaryMatchTable::tcam_table_map(REGS ®s, int row, int col) { + if (tcam_id >= 0) { + if (!((chain_rows[col] >> row) & 1)) + regs.tcams.col[col].tcam_table_map[tcam_id] |= 1U << row; + } +} + +static void set_tcam_mode_logical_table(ubits<4> ®, int tcam_id, int logical_id) { + reg = logical_id; +} +// TODO: Unused? +// static void set_tcam_mode_logical_table(ubits<8> ®, int tcam_id, int logical_id) { +// reg |= 1U << tcam_id; +// } + +template +void TernaryMatchTable::write_regs_vt(REGS ®s) { + LOG1("### Ternary match table " << name() << " write_regs " << loc()); + MatchTable::write_regs(regs, 1, indirect); + unsigned word = 0; + auto &merge = regs.rams.match.merge; + for (Layout &row : layout) { + auto vpn = row.vpns.begin(); + for (const auto &tcam : row.memunits) { + BUG_CHECK(tcam.stage == INT_MIN && tcam.row == row.row, "bogus tcam %s in row %d", + tcam.desc(), row.row); + auto &tcam_mode = regs.tcams.col[tcam.col].tcam_mode[row.row]; + // tcam_mode.tcam_data1_select = row.bus; -- no longer used + if (options.match_compiler) tcam_mode.tcam_data1_select = tcam.col; + tcam_mode.tcam_chain_out_enable = (chain_rows[tcam.col] >> row.row) & 1; + if (gress == INGRESS) + tcam_mode.tcam_ingress = 1; + else if (gress == EGRESS) + tcam_mode.tcam_egress = 1; + else if (gress == GHOST) + tcam_ghost_enable(regs, row.row, tcam.col); + tcam_mode.tcam_match_output_enable = + ((~chain_rows[tcam.col] | ALWAYS_ENABLE_ROW) >> row.row) & 1; + tcam_mode.tcam_vpn = *vpn++; + set_tcam_mode_logical_table(tcam_mode.tcam_logical_table, tcam_id, logical_id); + tcam_mode.tcam_data_dirtcam_mode = match[word].dirtcam & 0x3ff; + tcam_mode.tcam_vbit_dirtcam_mode = match[word].dirtcam >> 10; + /* TODO -- always disable tcam_validbit_xbar? */ + auto &tcam_vh_xbar = regs.tcams.vh_data_xbar; + if (options.match_compiler) { + for (int i = 0; i < 8; i++) + tcam_vh_xbar.tcam_validbit_xbar_ctl[tcam.col][row.row / 2][i] |= 15; + } + auto &halfbyte_mux_ctl = tcam_vh_xbar.tcam_row_halfbyte_mux_ctl[tcam.col][row.row]; + halfbyte_mux_ctl.tcam_row_halfbyte_mux_ctl_select = match[word].byte_config; + halfbyte_mux_ctl.tcam_row_halfbyte_mux_ctl_enable = 1; + halfbyte_mux_ctl.tcam_row_search_thread = timing_thread(gress); + if (match[word].word_group >= 0) + setup_muxctl(tcam_vh_xbar.tcam_row_output_ctl[tcam.col][row.row], + match[word].word_group); + if (match[word].byte_group >= 0) + setup_muxctl(tcam_vh_xbar.tcam_extra_byte_ctl[tcam.col][row.row / 2], + match[word].byte_group); + tcam_table_map(regs, row.row, tcam.col); + } + if (++word == match.size()) word = 0; + } + if (tcam_id >= 0) + setup_muxctl(merge.tcam_hit_to_logical_table_ixbar_outputmap[tcam_id], logical_id); + if (tcam_id >= 0) { + if (stage->table_use[timing_thread(gress)] & Stage::USE_TCAM) + merge.tcam_table_prop[tcam_id].tcam_piped = 1; + merge.tcam_table_prop[tcam_id].thread = timing_thread(gress); + merge.tcam_table_prop[tcam_id].enabled = 1; + regs.tcams.tcam_output_table_thread[tcam_id] = 1 << timing_thread(gress); + } + if (indirect_bus >= 0) { + /* FIXME -- factor into corresponding code in MatchTable::write_regs */ + setup_muxctl(merge.match_to_logical_table_ixbar_outputmap[1][indirect_bus], logical_id); + setup_muxctl(merge.match_to_logical_table_ixbar_outputmap[3][indirect_bus], logical_id); + if (tcam_id >= 0) { + setup_muxctl(merge.tcam_match_adr_to_physical_oxbar_outputmap[indirect_bus], tcam_id); + } + if (action) { + /* FIXME -- factor with TernaryIndirect code below */ + if (auto adt = action->to()) { + merge.mau_actiondata_adr_default[1][indirect_bus] = adt->determine_default(action); + merge.mau_actiondata_adr_mask[1][indirect_bus] = adt->determine_mask(action); + merge.mau_actiondata_adr_vpn_shiftcount[1][indirect_bus] = + adt->determine_vpn_shiftcount(action); + merge.mau_actiondata_adr_tcam_shiftcount[indirect_bus] = + adt->determine_shiftcount(action, 0, 0, 0); + } + } + attached.write_tcam_merge_regs(regs, this, indirect_bus, 0); + merge.tind_bus_prop[indirect_bus].tcam_piped = 1; + merge.tind_bus_prop[indirect_bus].thread = timing_thread(gress); + merge.tind_bus_prop[indirect_bus].enabled = 1; + if (idletime) + merge.mau_idletime_adr_tcam_shiftcount[indirect_bus] = idletime->direct_shiftcount(); + } + if (actions) actions->write_regs(regs, this); + if (gateway) gateway->write_regs(regs); + if (idletime) idletime->write_regs(regs); + for (auto &hd : hash_dist) hd.write_regs(regs, this); + merge.exact_match_logical_result_delay |= 1 << logical_id; + regs.cfg_regs.mau_cfg_movereg_tcam_only |= 1U << logical_id; + + // FIXME -- this is wrong; when should we use the actionbit? glass never does any more? + // if (hit_next.size() > 1 && !indirect) + // merge.next_table_tcam_actionbit_map_en |= 1 << logical_id; + // if (!indirect) + // merge.mau_action_instruction_adr_tcam_actionbit_map_en |= 1 << logical_id; +} + +std::unique_ptr TernaryMatchTable::gen_memory_resource_allocation_tbl_cfg( + const char *type, const std::vector &layout, bool skip_spare_bank) const { + if (layout.size() == 0) return nullptr; + BUG_CHECK(!skip_spare_bank); // never spares in tcam + json::map mra{{"memory_type", json::string(type)}}; + json::vector &mem_units_and_vpns = mra["memory_units_and_vpns"]; + json::vector mem_units; + unsigned word = 0; + bool done = false; + unsigned lrow = 0; + for (auto colnum = 0U; !done; colnum++) { + done = true; + for (auto &row : layout) { + if (colnum >= row.memunits.size()) continue; + auto mu = row.memunits[colnum]; + auto vpn = row.vpns[colnum]; + mem_units.push_back(json_memunit(mu)); + lrow = json_memunit(mu); + if (++word == match.size()) { + mem_units_and_vpns.push_back(json::map{{"memory_units", std::move(mem_units)}, + {"vpns", json::vector{json::number(vpn)}}}); + mem_units = json::vector(); + word = 0; + } + done = false; + } + } + // For keyless table, add empty vectors + if (mem_units_and_vpns.size() == 0) + mem_units_and_vpns.push_back( + json::map{{"memory_units", json::vector()}, {"vpns", json::vector()}}); + mra["spare_bank_memory_unit"] = lrow; + return json::mkuniq(std::move(mra)); +} + +void TernaryMatchTable::gen_entry_cfg2(json::vector &out, std::string field_name, + std::string global_name, unsigned lsb_offset, + unsigned lsb_idx, unsigned msb_idx, std::string source, + unsigned start_bit, unsigned field_width, + bitvec &tcam_bits) const { + json::map entry; + entry["field_name"] = field_name; + entry["global_name"] = global_name; + entry["lsb_mem_word_offset"] = lsb_offset; + entry["lsb_mem_word_idx"] = lsb_idx; + entry["msb_mem_word_idx"] = msb_idx; + entry["source"] = source; + entry["start_bit"] = start_bit; + entry["field_width"] = field_width; + out.push_back(std::move(entry)); + // For a range with field width < nibble width, mark the entire + // nibble in tcam_bits as used. The driver expects no overlap with other + // format entries with the unused bits in the nibble. + int tcam_bit_width = source == "range" ? 4 : field_width; + tcam_bits.setrange(lsb_offset, tcam_bit_width); +} + +void TernaryMatchTable::gen_entry_range_cfg(json::map &entry, bool duplicate, + unsigned nibble_offset) const { + json::map &entry_range = entry["range"]; + entry_range["type"] = 4; + entry_range["is_duplicate"] = duplicate; + entry_range["nibble_offset"] = nibble_offset; +} + +void TernaryMatchTable::gen_entry_cfg(json::vector &out, std::string name, unsigned lsb_offset, + unsigned lsb_idx, unsigned msb_idx, std::string source, + unsigned start_bit, unsigned field_width, unsigned index, + bitvec &tcam_bits, unsigned nibble_offset = 0) const { + LOG3("Adding entry to Ternary Table : name: " + << name << " lsb_offset: " << lsb_offset << " lsb_idx: " << lsb_idx + << " msb_idx: " << msb_idx << " source: " << source << " start_bit: " << start_bit + << " field_width: " << field_width << " index: " << index << " tcam_bits: " << tcam_bits + << " nibble_offset: " << nibble_offset); + std::string field_name(name); + + // If the name has a slice in it, remove it and add the lo bit of + // the slice to field_bit. This takes the place of + // canon_field_list(), rather than extracting the slice component + // of the field name, if present, and appending it to the key name. + int slice_offset = remove_name_tail_range(field_name); + LOG4(" Field Name: " << field_name << " slice_offset: " << slice_offset); + + // Get the key name, if any. + int param_start_bit = slice_offset + start_bit; + auto params = find_p4_params(field_name, "", param_start_bit, field_width); + std::string global_name = ""; + if (params.size() == 0) { + gen_entry_cfg2(out, field_name, global_name, lsb_offset, lsb_idx, msb_idx, source, + param_start_bit, field_width, tcam_bits); + } else { + for (auto param : params) { + if (!param) continue; + if (!param->key_name.empty()) { + LOG4(" Found param : " << *param); + field_name = param->key_name; + global_name = param->name; + } + // For multiple params concatenated within the field width, we only + // chose the param width which represents the slice. + field_width = std::min(param->bit_width, field_width); + + // For range match we need bytes to decide which nibble is being used, hence + // split the field in bytes. For normal match entire slice can be used + // directly. + auto *p = find_p4_param(name, "range", param_start_bit, field_width); + if (p) { + int lsb_lo = lsb_offset - TCAM_MATCH_BITS_START; + int lsb_hi = lsb_lo + field_width - 1; + /** + * For each byte of range match, the range match happens over either the lower + * nibble or higher nibble given the encoding scheme. The nibble is transformed + * into an encoding over a byte. This breaks up the range over each match nibble on + * a byte by byte boundary, and outputs the JSON for that nibble + * + * @seealso bf-p4c/mau/table_format.cpp comments on range + * @seealso bf-p4c/mau/resource_estimate.cpp comments on range + * + * The range context JSON encoding is the following: + * - The lsb_mem_word_offset is always the beginning of the byte (as the + * encoding takes the whole byte) + * - The width is the width of the field in the nibble (up to 4 bits) + * - The nibble_offset is where in the nibble the key starts in the ixbar byte + * + * A "is_duplicate" nibble is provided.The driver uses this for not double counting, + * maybe. Henry and I both agree that is really doesn't make any sense and can be + * deleted, but remains in there now + */ + for (int bit = (lsb_lo / 8) * 8; bit <= (lsb_hi / 8) * 8; bit += 8) { + int lsb_lo_bit_in_byte = std::max(lsb_lo, bit) % 8; + int lsb_hi_bit_in_byte = std::min(lsb_hi, bit + 7) % 8; + auto dirtcam_mode = get_dirtcam_mode(index, (bit / 8)); + + if (!(DIRTCAM_4B_LO == dirtcam_mode || DIRTCAM_4B_HI == dirtcam_mode)) continue; + + bitvec nibbles_of_range; + nibbles_of_range.setbit(lsb_lo_bit_in_byte / 4); + nibbles_of_range.setbit(lsb_hi_bit_in_byte / 4); + int range_start_bit = start_bit + slice_offset; + int range_width; + int nibble_offset; + + // Determine which section of the byte based on which nibble is provided + if (dirtcam_mode == DIRTCAM_4B_LO) { + BUG_CHECK(nibbles_of_range.getbit(0)); + // Add the difference from the first bit of this byte and the lowest bit + range_start_bit += bit + lsb_lo_bit_in_byte - lsb_lo; + range_width = + std::min(static_cast(field_width), 4 - lsb_lo_bit_in_byte); + range_width = std::min(static_cast(range_width), lsb_hi - bit + 1); + nibble_offset = lsb_lo_bit_in_byte % 4; + } else { + BUG_CHECK(nibbles_of_range.getbit(1)); + // Because the bit starts at the upper nibble, the start bit is either the + // beginning of the nibble or more + range_start_bit += bit + std::max(4, lsb_lo_bit_in_byte) - lsb_lo; + range_width = + std::min(static_cast(field_width), lsb_hi_bit_in_byte - 3); + range_width = std::min(static_cast(range_width), + lsb_hi_bit_in_byte - lsb_lo_bit_in_byte + 1); + nibble_offset = std::max(4, lsb_lo_bit_in_byte) % 4; + } + + // Add the range entry + gen_entry_cfg2(out, field_name, global_name, bit + TCAM_MATCH_BITS_START, + lsb_idx, msb_idx, "range", range_start_bit, range_width, + tcam_bits); + auto &last_entry = out.back()->to(); + gen_entry_range_cfg(last_entry, false, nibble_offset); + + // Adding the duplicate range entry + gen_entry_cfg2(out, field_name, global_name, bit + TCAM_MATCH_BITS_START + 4, + lsb_idx, msb_idx, "range", range_start_bit, range_width, + tcam_bits); + auto &last_entry_dup = out.back()->to(); + gen_entry_range_cfg(last_entry_dup, true, nibble_offset); + } + + } else { + gen_entry_cfg2(out, field_name, global_name, lsb_offset, lsb_idx, msb_idx, source, + param_start_bit, field_width, tcam_bits); + } + param_start_bit += field_width; + } + } +} + +void TernaryMatchTable::gen_match_fields_pvp(json::vector &match_field_list, unsigned word, + bool uses_versioning, unsigned version_word_group, + bitvec &tcam_bits) const { + // Tcam bits are arranged as follows in each tcam word + // LSB -------------------------------------MSB + // PAYLOAD BIT - TCAM BITS - [VERSION] - PARITY + auto start_bit = 0; // always 0 for fields not on input xbar + auto dirtcam_index = 0; // not relevant for fields not on input xbar + auto payload_name = "--tcam_payload_" + std::to_string(word) + "--"; + auto parity_name = "--tcam_parity_" + std::to_string(word) + "--"; + auto version_name = "--version--"; + gen_entry_cfg(match_field_list, payload_name, TCAM_PAYLOAD_BITS_START, word, word, "payload", + start_bit, TCAM_PAYLOAD_BITS, dirtcam_index, tcam_bits); + if (uses_versioning && (version_word_group == word)) { + gen_entry_cfg(match_field_list, version_name, TCAM_VERSION_BITS_START, word, word, + "version", start_bit, TCAM_VERSION_BITS, dirtcam_index, tcam_bits); + } + gen_entry_cfg(match_field_list, parity_name, TCAM_PARITY_BITS_START, word, word, "parity", + start_bit, TCAM_PARITY_BITS, dirtcam_index, tcam_bits); +} + +void TernaryMatchTable::gen_match_fields(json::vector &match_field_list, + std::vector &tcam_bits) const { + unsigned match_index = match.size() - 1; + for (auto &ixb : input_xbar) { + for (const auto &[field_group, field_phv] : *ixb) { + switch (field_group.type) { + case InputXbar::Group::EXACT: + continue; + case InputXbar::Group::TERNARY: { + int word = match_index - match_word(field_group.index); + if (word < 0) continue; + std::string source = "spec"; + std::string field_name = field_phv.what.name(); + unsigned lsb_mem_word_offset = 0; + if (field_phv.hi > 40) { + // FIXME -- no longer needed if we always convert these to Group::BYTE? + // a field in the (mid) byte group, which is shared with the adjacent word + // group each word gets only 4 bits of the byte group and is placed at msb + // Check mid-byte field does not cross byte boundary (40-47) + BUG_CHECK(field_phv.hi < 48); + // Check mid-byte field is associated with even group + // | == 5 == | == 1 == | == 5 == | == 5 == | == 1 == | == 5 == | + // | Grp 0 | Midbyte0| Grp 1 | Grp 2 | Midbyte1| Grp 3 | + BUG_CHECK((field_group.index & 1) == 0); + // Find groups to place this byte nibble. Check group which has this + // group as the byte_group + for (auto &m : match) { + if (m.byte_group * 2 == field_group.index) { + // Check byte_config to determine where to place the nibble + lsb_mem_word_offset = 1 + field_phv.lo; + int nibble_offset = 0; + int hwidth = 44 - field_phv.lo; + int start_bit = 0; + if (m.byte_config == MIDBYTE_NIBBLE_HI) { + nibble_offset += 4; + start_bit = hwidth; + hwidth = field_phv.hi - 43; + } + int midbyte_word_group = match_index - match_word(m.word_group); + gen_entry_cfg(match_field_list, field_name, lsb_mem_word_offset, + midbyte_word_group, midbyte_word_group, source, + field_phv.what.lobit() + start_bit, hwidth, + field_group.index, tcam_bits[midbyte_word_group]); + } + } + } else { + lsb_mem_word_offset = 1 + field_phv.lo; + gen_entry_cfg(match_field_list, field_name, lsb_mem_word_offset, word, word, + source, field_phv.what.lobit(), + field_phv.hi - field_phv.lo + 1, field_group.index, + tcam_bits[word], field_phv.what->lo % 4); + } + break; + } + case InputXbar::Group::BYTE: + // The byte group represents what goes in top nibble in the tcam + // word. Based on the byte config, the corresponding match word is + // selected and the field (slice) is placed in the nibble. + // byte group 5: { 0: HillTop.Lamona.Whitefish(0..1) , + // 2: HillTop.RossFork.Adona(0..5) } + // match: + // - { group: 10, byte_group: 5, byte_config: 0, dirtcam: 0x555 } + // - { group: 11, byte_group: 5, byte_config: 1, dirtcam: 0x555 } + // Placement + // -------------------------- + // Group 10 - Midbyte Nibble Lo + // -------------------------- + // Word 1 : 41 42 43 44 + // Whitefish : 0 1 X X + // Adona : X X 0 1 + // -------------------------- + // Group 11 - Midbyte Nibble Hi + // -------------------------- + // Word 0 : 41 42 43 44 + // Whitefish : X X X X + // Adona : 2 3 4 5 + // -------------------------- + for (size_t word = 0; word < match.size(); word++) { + if (match[word].byte_group != field_group.index) continue; + auto source = "spec"; + auto field_name = field_phv.what.name(); + int byte_lo = field_phv.lo; + int field_lo = field_phv.what.lobit(); + int width = field_phv.what.size(); + int nibble_lo = byte_lo; + if (match[word].byte_config == MIDBYTE_NIBBLE_HI) { + if (byte_lo >= 4) { + // NIBBLE HI | NIBBLE LO + // 7 6 5 4 | 3 2 1 0 + // x x x | + // byte_lo = 5 (start of byte) + nibble_lo = byte_lo - 4; // Get nibble_lo from nibble boundary + // nibble_lo = 1 + } else { + // NIBBLE HI | NIBBLE LO + // 7 6 5 4 | 3 2 1 0 + // x x | x x + // say field f1(3..7) + // field_lo = 3 + // byte_lo = 2 (start of byte) + // width = 4 + width -= 4 - byte_lo; // Adjust width to what must + // fit in the nibble + if (width <= 0) continue; // No field in nibble, skip + // width = 2 + nibble_lo = 0; // Field starts at nibble boundary + field_lo += 4 - byte_lo; // Adjust field lo bit to start of nibble + // field_lo = 5 + } + } else if (match[word].byte_config == MIDBYTE_NIBBLE_LO) { + if (byte_lo >= 4) { + // NIBBLE HI | NIBBLE LO + // 7 6 5 4 | 3 2 1 0 + // x x x | + // byte_lo = 5 (start of byte) + continue; // No field in nibble, skip + } else { + // NIBBLE HI | NIBBLE LO + // 7 6 5 4 | 3 2 1 0 + // x x x | x x + // byte_lo = 2 (start of byte) + // width = 5 + nibble_lo = byte_lo; + int nibble_left = 4 - nibble_lo; + width = (width > nibble_left) ? nibble_left : width; + // width = 2 + } + } + gen_entry_cfg(match_field_list, field_name, 41 + nibble_lo, + match_index - word, match_index - word, source, field_lo, + width, match[word].byte_group, tcam_bits[match_index - word]); + } + break; + default: + BUG("Unknown group type"); + } + } + } +} + +json::map &TernaryMatchTable::get_tbl_top(json::vector &out) const { + unsigned number_entries = match.size() ? layout_size() / match.size() * 512 : 0; + // For ALPM tables, this sets up the top level ALPM table and this ternary + // table as its preclassifier. As the pre_classifier is always in the + // previous stage as the atcams, this function will be called before the + // atcam cfg generation. The atcam will check for presence of this table and + // add the atcam cfg gen + if (is_alpm()) { + json::map *alpm_ptr = base_tbl_cfg(out, "match_entry", number_entries); + json::map &alpm = *alpm_ptr; + json::map &match_attributes = alpm["match_attributes"]; + match_attributes["match_type"] = "algorithmic_lpm"; + json::map &alpm_pre_classifier = match_attributes["pre_classifier"]; + base_alpm_pre_classifier_tbl_cfg(alpm_pre_classifier, "match_entry", number_entries); + // top level alpm table has the same key as alpm preclassifier + add_match_key_cfg(alpm); + return alpm_pre_classifier; + } else { + return *base_tbl_cfg(out, "match_entry", number_entries); + } +} + +void TernaryMatchTable::gen_tbl_cfg(json::vector &out) const { + unsigned number_entries = match.size() ? layout_size() / match.size() * 512 : 0; + json::map &tbl = get_tbl_top(out); + bool uses_versioning = false; + unsigned version_word_group = -1; + unsigned match_index = match.size() - 1; + unsigned index = 0; + json::vector match_field_list; + for (auto &m : match) { + if (m.byte_config == 3) { + uses_versioning = true; + version_word_group = match_index - index; + break; + } + index++; + } + // Determine the zero padding necessary by creating a bitvector (for each + // word). While creating entries for pack format set bits used. The unused + // bits must be padded with zero field entries. + std::vector tcam_bits(match.size()); + // Set pvp bits for each tcam word + for (unsigned i = 0; i < match.size(); i++) { + gen_match_fields_pvp(match_field_list, i, uses_versioning, version_word_group, + tcam_bits[i]); + } + json::map &match_attributes = tbl["match_attributes"]; + json::vector &stage_tables = match_attributes["stage_tables"]; + json::map &stage_tbl = *add_stage_tbl_cfg(match_attributes, "ternary_match", number_entries); + // This is a only a glass required field, as it is only required when no default action + // is specified, which is impossible for Brig through p4-16 + stage_tbl["default_next_table"] = Stage::end_of_pipe(); + json::map &pack_fmt = + add_pack_format(stage_tbl, Target::TCAM_MEMORY_FULL_WIDTH(), match.size(), 1); + stage_tbl["memory_resource_allocation"] = + gen_memory_resource_allocation_tbl_cfg("tcam", layout); + // FIXME-JSON: If the next table is modifiable then we set it to what it's mapped + // to. Otherwise, set it to the default next table for this stage. + // stage_tbl["default_next_table"] = Target::END_OF_PIPE(); + // FIXME: How to deal with multiple next hit tables? + stage_tbl["default_next_table"] = + hit_next.size() > 0 ? hit_next[0].next_table_id() : Target::END_OF_PIPE(); + add_result_physical_buses(stage_tbl); + gen_match_fields(match_field_list, tcam_bits); + + // For keyless table, just add parity & payload bits + if (p4_params_list.empty()) { + tcam_bits.resize(1); + gen_match_fields_pvp(match_field_list, 0, false, -1, tcam_bits[0]); + } + + // tcam_bits is a vector indexed by tcam word and has all used bits set. We + // loop through this bitvec for each word and add a zero padding entry for + // the unused bits. + // For ternary all unused bits must be marked as source + // 'zero' for correctness during entry encoding. + for (unsigned word = 0; word < match.size(); word++) { + bitvec &pb = tcam_bits[word]; + unsigned start_bit = 0; // always 0 for padded fields + int dirtcam_index = -1; // irrelevant in this context + if (pb != bitvec(0)) { + int idx_lo = 0; + std::string pad_name = "--unused--"; + for (auto p : pb) { + if (p > idx_lo) { + gen_entry_cfg(match_field_list, pad_name, idx_lo, word, word, "zero", start_bit, + p - idx_lo, dirtcam_index, tcam_bits[word]); + } + idx_lo = p + 1; + } + auto fw = TCAM_VERSION_BITS; + if (idx_lo < fw) { + gen_entry_cfg(match_field_list, pad_name, idx_lo, word, word, "zero", start_bit, + fw - idx_lo, dirtcam_index, tcam_bits[word]); + } + } + } + + pack_fmt["entries"] = json::vector{ + json::map{{"entry_number", json::number(0)}, {"fields", std::move(match_field_list)}}}; + add_all_reference_tables(tbl); + json::map &tind = stage_tbl["ternary_indirection_stage_table"] = json::map(); + if (indirect) { + unsigned fmt_width = 1U << indirect->format->log2size; + // json::map tind; + tind["stage_number"] = stage->stageno; + tind["stage_table_type"] = "ternary_indirection"; + tind["size"] = indirect->layout_size() * 128 / fmt_width * 1024; + indirect->add_pack_format(tind, indirect->format.get()); + tind["memory_resource_allocation"] = + indirect->gen_memory_resource_allocation_tbl_cfg("sram", indirect->layout); + // Add action formats for actions present in table or attached action table + auto *acts = indirect->get_actions(); + if (acts) acts->add_action_format(this, tind); + add_all_reference_tables(tbl, indirect); + if (indirect->actions) + indirect->actions->gen_tbl_cfg(tbl["actions"]); + else if (indirect->action && indirect->action->actions) + indirect->action->actions->gen_tbl_cfg(tbl["actions"]); + indirect->common_tbl_cfg(tbl); + } else { + // FIXME: Add a fake ternary indirect table (as otherwise driver complains) + // if tind not present - to be removed with update on driver side + auto *acts = get_actions(); + if (acts) acts->add_action_format(this, tind); + tind["memory_resource_allocation"] = nullptr; + json::vector &pack_format = tind["pack_format"] = json::vector(); + json::map pack_format_entry; + pack_format_entry["memory_word_width"] = 128; + pack_format_entry["entries_per_table_word"] = 1; + json::vector &entries = pack_format_entry["entries"] = json::vector(); + entries.push_back(json::map{{"entry_number", json::number(0)}, {"fields", json::vector()}}); + pack_format_entry["table_word_width"] = 0; + pack_format_entry["number_memory_units_per_table_word"] = 0; + pack_format.push_back(std::move(pack_format_entry)); + tind["logical_table_id"] = logical_id; + tind["stage_number"] = stage->stageno; + tind["stage_table_type"] = "ternary_indirection"; + tind["size"] = 0; + } + common_tbl_cfg(tbl); + if (actions) + actions->gen_tbl_cfg(tbl["actions"]); + else if (action && action->actions) + action->actions->gen_tbl_cfg(tbl["actions"]); + gen_idletime_tbl_cfg(stage_tbl); + merge_context_json(tbl, stage_tbl); + match_attributes["match_type"] = "ternary"; +} + +void TernaryIndirectTable::setup(VECTOR(pair_t) & data) { + match_table = 0; + common_init_setup(data, true, P4Table::MatchEntry); + if (format) { + if (format->size > 64) error(format->lineno, "ternary indirect format larger than 64 bits"); + if (format->size < 4) { + /* pad out to minumum size */ + format->size = 4; + format->log2size = 2; + } + } else { + error(lineno, "No format specified in table %s", name()); + } + for (auto &kv : MapIterChecked(data, {"meter", "stats", "stateful"})) { + if (common_setup(kv, data, P4Table::MatchEntry)) { + } else if (kv.key == "input_xbar") { + if (CHECKTYPE(kv.value, tMAP)) + input_xbar.emplace_back(InputXbar::create(this, false, kv.key, kv.value.map)); + } else if (kv.key == "hash_dist") { + /* parsed in common_init_setup */ + } else if (kv.key == "selector") { + attached.selector.setup(kv.value, this); + } else if (kv.key == "selector_length") { + attached.selector_length.setup(kv.value, this); + } else if (kv.key == "meter_color") { + attached.meter_color.setup(kv.value, this); + } else if (kv.key == "stats") { + if (kv.value.type == tVEC) + for (auto &v : kv.value.vec) attached.stats.emplace_back(v, this); + else + attached.stats.emplace_back(kv.value, this); + } else if (kv.key == "meter") { + if (kv.value.type == tVEC) + for (auto &v : kv.value.vec) attached.meters.emplace_back(v, this); + else + attached.meters.emplace_back(kv.value, this); + } else if (kv.key == "stateful") { + if (kv.value.type == tVEC) + for (auto &v : kv.value.vec) attached.statefuls.emplace_back(v, this); + else + attached.statefuls.emplace_back(kv.value, this); + } else { + warning(kv.key.lineno, "ignoring unknown item %s in table %s", value_desc(kv.key), + name()); + } + } + if (Target::SRAM_GLOBAL_ACCESS()) + alloc_global_srams(); + else + alloc_rams(false, stage->sram_use, &stage->tcam_indirect_bus_use, Layout::TIND_BUS); + if (!action.set() && !actions) + error(lineno, "Table %s has neither action table nor immediate actions", name()); + if (actions && !action_bus) action_bus = ActionBus::create(); +} + +Table::table_type_t TernaryIndirectTable::set_match_table(MatchTable *m, bool indirect) { + if (match_table) { + error(lineno, "Multiple references to ternary indirect table %s", name()); + } else if (!(match_table = dynamic_cast(m))) { + error(lineno, "Trying to link ternary indirect table %s to non-ternary table %s", name(), + m->name()); + } else { + if (action.check() && action->set_match_table(m, !action.is_direct_call()) != ACTION) + error(action.lineno, "%s is not an action table", action->name()); + attached.pass0(m); + logical_id = m->logical_id; + p4_table = m->p4_table; + } + return TERNARY_INDIRECT; +} + +bitvec TernaryIndirectTable::compute_reachable_tables() { + Table::compute_reachable_tables(); + if (match_table) reachable_tables_ |= match_table->reachable_tables(); + reachable_tables_ |= attached.compute_reachable_tables(); + return reachable_tables_; +} + +void TernaryIndirectTable::pass1() { + LOG1("### Ternary indirect table " << name() << " pass1"); + determine_word_and_result_bus(); + Table::pass1(); + if (action_enable >= 0) + if (action.args.size() < 1 || action.args[0].size() <= (unsigned)action_enable) + error(lineno, "Action enable bit %d out of range for action selector", action_enable); + if (format) format->pass1(this); + for (auto &hd : hash_dist) { + hd.pass1(this, HashDistribution::OTHER, false); + } +} + +/** + * The bus by definition for ternary indirect is the result bus, and all TernaryIndirect tables + * are at most 64 bits, meaning that all their words are equal to 0. + */ +void TernaryIndirectTable::determine_word_and_result_bus() { + for (auto &row : layout) { + row.word = 0; + } +} + +void TernaryIndirectTable::pass2() { + LOG1("### Ternary indirect table " << name() << " pass2"); + if (logical_id < 0 && match_table) logical_id = match_table->logical_id; + if (!match_table) error(lineno, "No match table for ternary indirect table %s", name()); + if (actions) actions->pass2(this); + if (action_bus) action_bus->pass2(this); + if (format) format->pass2(this); +} + +void TernaryIndirectTable::pass3() { + LOG1("### Ternary indirect table " << name() << " pass3"); + if (action_bus) action_bus->pass3(this); +} + +template +void TernaryIndirectTable::write_regs_vt(REGS ®s) { + LOG1("### Ternary indirect table " << name() << " write_regs"); + int tcam_id = match_table->tcam_id; + int tcam_shift = format->log2size - 2; + if (tcam_id >= 0) regs.tcams.tcam_match_adr_shift[tcam_id] = tcam_shift; + auto &merge = regs.rams.match.merge; + for (Layout &row : layout) { + int bus = row.bus.at(Layout::TIND_BUS); + auto vpn = row.vpns.begin(); + auto &ram_row = regs.rams.array.row[row.row]; + for (auto &memunit : row.memunits) { + int col = memunit.col; + BUG_CHECK(memunit.stage == INT_MIN && memunit.row == row.row, "bogus %s in row %d", + memunit.desc(), row.row); + auto &unit_ram_ctl = ram_row.ram[col].unit_ram_ctl; + unit_ram_ctl.match_ram_write_data_mux_select = 7; /* disable */ + unit_ram_ctl.match_ram_read_data_mux_select = 7; /* disable */ + unit_ram_ctl.tind_result_bus_select = 1U << bus; + auto &mux_ctl = + regs.rams.map_alu.row[row.row].adrmux.ram_address_mux_ctl[col / 6][col % 6]; + mux_ctl.ram_unitram_adr_mux_select = bus + 2; + auto &unitram_config = + regs.rams.map_alu.row[row.row].adrmux.unitram_config[col / 6][col % 6]; + unitram_config.unitram_type = 6; + unitram_config.unitram_vpn = *vpn++; + unitram_config.unitram_logical_table = logical_id; + if (gress == INGRESS || gress == GHOST) + unitram_config.unitram_ingress = 1; + else + unitram_config.unitram_egress = 1; + unitram_config.unitram_enable = 1; + auto &xbar_ctl = + regs.rams.map_alu.row[row.row].vh_xbars.adr_dist_tind_adr_xbar_ctl[bus]; + if (tcam_id >= 0) setup_muxctl(xbar_ctl, tcam_id); + if (gress == EGRESS) + regs.cfg_regs.mau_cfg_uram_thread[col / 4U] |= 1U << (col % 4U * 8U + row.row); + ram_row.tind_ecc_error_uram_ctl[timing_thread(gress)] |= 1 << (col - 2); + } + int r_bus = row.row * 2 + bus; + merge.tind_ram_data_size[r_bus] = format->log2size - 1; + if (tcam_id >= 0) + setup_muxctl(merge.tcam_match_adr_to_physical_oxbar_outputmap[r_bus], tcam_id); + merge.tind_bus_prop[r_bus].tcam_piped = 1; + merge.tind_bus_prop[r_bus].thread = timing_thread(gress); + merge.tind_bus_prop[r_bus].enabled = 1; + if (instruction) { + int shiftcount = 0; + if (auto field = instruction.args[0].field()) + shiftcount = field->bit(0); + else if (auto field = instruction.args[1].field()) + shiftcount = field->immed_bit(0); + merge.mau_action_instruction_adr_tcam_shiftcount[r_bus] = shiftcount; + } + if (format->immed) merge.mau_immediate_data_tcam_shiftcount[r_bus] = format->immed->bit(0); + if (action) { + if (auto adt = action->to()) { + merge.mau_actiondata_adr_default[1][r_bus] = adt->determine_default(action); + merge.mau_actiondata_adr_mask[1][r_bus] = adt->determine_mask(action); + merge.mau_actiondata_adr_vpn_shiftcount[1][r_bus] = + adt->determine_vpn_shiftcount(action); + merge.mau_actiondata_adr_tcam_shiftcount[r_bus] = + adt->determine_shiftcount(action, 0, 0, tcam_shift); + } + } + if (attached.selector) { + auto sel = get_selector(); + merge.mau_meter_adr_tcam_shiftcount[r_bus] = + sel->determine_shiftcount(attached.selector, 0, 0, format->log2size - 2); + merge.mau_selectorlength_shiftcount[1][r_bus] = + sel->determine_length_shiftcount(attached.selector_length, 0, 0); + merge.mau_selectorlength_mask[1][r_bus] = + sel->determine_length_mask(attached.selector_length); + merge.mau_selectorlength_default[1][r_bus] = + sel->determine_length_default(attached.selector_length); + } + if (match_table->idletime) + merge.mau_idletime_adr_tcam_shiftcount[r_bus] = + 66 + format->log2size - match_table->idletime->precision_shift(); + attached.write_tcam_merge_regs(regs, match_table, r_bus, tcam_shift); + } + if (actions) actions->write_regs(regs, this); + for (auto &hd : hash_dist) hd.write_regs(regs, this); +} + +void TernaryIndirectTable::gen_tbl_cfg(json::vector &out) const {} + +void TernaryMatchTable::add_result_physical_buses(json::map &stage_tbl) const { + json::vector &result_physical_buses = stage_tbl["result_physical_buses"] = json::vector(); + if (indirect) { + for (auto l : indirect->layout) { + if (l.bus.count(Layout::TIND_BUS)) { + result_physical_buses.push_back(l.row * 2 + l.bus.at(Layout::TIND_BUS)); + } + } + } else { + result_physical_buses.push_back(indirect_bus); + } +} + +DEFINE_TABLE_TYPE_WITH_SPECIALIZATION(TernaryMatchTable, TARGET_CLASS) +DEFINE_TABLE_TYPE_WITH_SPECIALIZATION(TernaryIndirectTable, TARGET_CLASS) diff --git a/backends/tofino/bf-asm/tofino/CMakeLists.txt b/backends/tofino/bf-asm/tofino/CMakeLists.txt new file mode 100644 index 00000000000..e1d922b3e7e --- /dev/null +++ b/backends/tofino/bf-asm/tofino/CMakeLists.txt @@ -0,0 +1,60 @@ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 + +set (GEN_TOFINO + memories.pipe_addrmap + memories.pipe_top_level + memories.prsr_mem_main_rspec + regs.dprsr_hdr + regs.dprsr_inp + regs.ebp_rspec + regs.ibp_rspec + regs.mau_addrmap + regs.pipe_addrmap + regs.prsr_reg_merge_rspec + regs.tofino + ) + +foreach(f IN LISTS GEN_TOFINO) + list (APPEND GEN_TOFINO_SRCS ${BFASM_BINARY_DIR}/gen/tofino/${f}.cpp) + list (APPEND GEN_TOFINO_HDRS ${BFASM_BINARY_DIR}/gen/tofino/${f}.h) +endforeach() + +set_source_files_properties(${GEN_TOFINO_SRCS} ${GEN_TOFINO_HDRS} PROPERTIES GENERATED TRUE) + +add_custom_command(OUTPUT ${GEN_TOFINO_HDRS} ${GEN_TOFINO_SRCS} + COMMAND ${BFASM_WALLE} --schema chip.schema --generate-cpp template_objects.yaml -o ${BFASM_BINARY_DIR}/gen/tofino + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS template_objects.yaml chip.schema ${WALLE_SOURCES} + COMMENT "Generating cpp code for tofino from tofino/chip.schema") + +set (BFAS_TOFINO_SRCS + tofino/exact_match.cpp + tofino/gateway.cpp + tofino/input_xbar.cpp + tofino/parser.cpp + tofino/sram_match.cpp + tofino/stateful.cpp + tofino/ternary_match.cpp + PARENT_SCOPE + ) + + +add_library (regs_tofino ${GEN_TOFINO_SRCS}) +target_link_libraries (regs_tofino p4ctoolkit) +# Disable errors for warnings. FIXME: Get rid of this. +target_compile_options(regs_tofino PUBLIC -Wno-error -Wno-unused-parameter -Wno-unused-variable -Wno-type-limits -Wno-sign-compare) diff --git a/backends/tofino/bf-asm/tofino/action_table.h b/backends/tofino/bf-asm/tofino/action_table.h new file mode 100644 index 00000000000..1c8c39ac142 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/action_table.h @@ -0,0 +1,29 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TOFINO_ACTION_TABLE_H_ +#define BACKENDS_TOFINO_BF_ASM_TOFINO_ACTION_TABLE_H_ + +#include "backends/tofino/bf-asm/tables.h" + +class Target::Tofino::ActionTable : public ::ActionTable { + friend class ::ActionTable; + ActionTable(int line, const char *n, gress_t gr, Stage *s, int lid) + : ::ActionTable(line, n, gr, s, lid) {} +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_TOFINO_ACTION_TABLE_H_ */ diff --git a/backends/tofino/bf-asm/tofino/chip.schema b/backends/tofino/bf-asm/tofino/chip.schema new file mode 100644 index 00000000000..c99cc1a9d88 Binary files /dev/null and b/backends/tofino/bf-asm/tofino/chip.schema differ diff --git a/backends/tofino/bf-asm/tofino/counter.h b/backends/tofino/bf-asm/tofino/counter.h new file mode 100644 index 00000000000..484b5f49c52 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/counter.h @@ -0,0 +1,39 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TOFINO_COUNTER_H_ +#define BACKENDS_TOFINO_BF_ASM_TOFINO_COUNTER_H_ + +#include "backends/tofino/bf-asm/tables.h" + +class Target::Tofino::CounterTable : public ::CounterTable { + friend class ::CounterTable; + CounterTable(int line, const char *n, gress_t gr, Stage *s, int lid) + : ::CounterTable(line, n, gr, s, lid) {} +}; + +template <> +void CounterTable::setup_teop_regs(Target::Tofino::mau_regs &, int) { + BUG(); // no teop on tofino +} + +template <> +void CounterTable::write_alu_vpn_range(Target::Tofino::mau_regs &) { + BUG(); // not available on tofino +} + +#endif /* BACKENDS_TOFINO_BF_ASM_TOFINO_COUNTER_H_ */ diff --git a/backends/tofino/bf-asm/tofino/deparser.cpp b/backends/tofino/bf-asm/tofino/deparser.cpp new file mode 100644 index 00000000000..fa7cefa4f6a --- /dev/null +++ b/backends/tofino/bf-asm/tofino/deparser.cpp @@ -0,0 +1,926 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* deparser template specializations for tofino -- #included directly in top-level deparser.cpp */ + +#define YES(X) X +#define NO(X) + +#define SIMPLE_INTRINSIC(GR, PFX, NAME, IF_SHIFT) \ + DEPARSER_INTRINSIC(Tofino, GR, NAME, 1) { \ + PFX.NAME.phv = intrin.vals[0].val->reg.deparser_id(); \ + IF_SHIFT(PFX.NAME.shft = intrin.vals[0].val->lo;) \ + if (!intrin.vals[0].pov.empty()) \ + error(intrin.vals[0].pov.front().lineno, "No POV support in tofino " #NAME); \ + PFX.NAME.valid = 1; \ + } +#define SIMPLE_INTRINSIC_RENAME(GR, PFX, NAME, REGNAME, IF_SHIFT) \ + DEPARSER_INTRINSIC(Tofino, GR, NAME, 1) { \ + PFX.REGNAME.phv = intrin.vals[0].val->reg.deparser_id(); \ + IF_SHIFT(PFX.REGNAME.shft = intrin.vals[0].val->lo;) \ + PFX.REGNAME.valid = 1; \ + } +#define IIR_MAIN_INTRINSIC(NAME, SHFT) SIMPLE_INTRINSIC(INGRESS, regs.input.iir.main_i, NAME, SHFT) +#define IIR_INTRINSIC(NAME, SHFT) SIMPLE_INTRINSIC(INGRESS, regs.input.iir.ingr, NAME, SHFT) +#define HIR_INTRINSIC(NAME, SHFT) SIMPLE_INTRINSIC(INGRESS, regs.header.hir.ingr, NAME, SHFT) +#define HIR_INTRINSIC_RENAME(NAME, REGNAME, SHFT) \ + SIMPLE_INTRINSIC_RENAME(INGRESS, regs.header.hir.ingr, NAME, REGNAME, SHFT) +#define IER_MAIN_INTRINSIC(NAME, SHFT) SIMPLE_INTRINSIC(EGRESS, regs.input.ier.main_e, NAME, SHFT) +#define HER_INTRINSIC(NAME, SHFT) SIMPLE_INTRINSIC(EGRESS, regs.header.her.egr, NAME, SHFT) + +IIR_MAIN_INTRINSIC(egress_unicast_port, NO) +IIR_MAIN_INTRINSIC(drop_ctl, YES) +IIR_INTRINSIC(copy_to_cpu, YES) +HIR_INTRINSIC_RENAME(egress_multicast_group_0, egress_multicast_group[0], NO) +HIR_INTRINSIC_RENAME(egress_multicast_group_1, egress_multicast_group[1], NO) +HIR_INTRINSIC_RENAME(hash_lag_ecmp_mcast_0, hash_lag_ecmp_mcast[0], NO) +HIR_INTRINSIC_RENAME(hash_lag_ecmp_mcast_1, hash_lag_ecmp_mcast[1], NO) +HIR_INTRINSIC(copy_to_cpu_cos, YES) +DEPARSER_INTRINSIC(Tofino, INGRESS, ingress_port_source, 1) { + regs.header.hir.ingr.ingress_port.phv = intrin.vals[0].val->reg.deparser_id(); + regs.header.hir.ingr.ingress_port.sel = 0; +} +HIR_INTRINSIC(deflect_on_drop, YES) +HIR_INTRINSIC(meter_color, YES) +HIR_INTRINSIC(icos, YES) +HIR_INTRINSIC(qid, YES) +HIR_INTRINSIC(xid, NO) +HIR_INTRINSIC(yid, NO) +HIR_INTRINSIC(rid, NO) +HIR_INTRINSIC(bypss_egr, YES) +HIR_INTRINSIC(ct_disable, YES) +HIR_INTRINSIC(ct_mcast, YES) + +IER_MAIN_INTRINSIC(egress_unicast_port, NO) +IER_MAIN_INTRINSIC(drop_ctl, YES) +HER_INTRINSIC(force_tx_err, YES) +HER_INTRINSIC(tx_pkt_has_offsets, YES) +HER_INTRINSIC(capture_tx_ts, YES) +HER_INTRINSIC(coal, NO) +HER_INTRINSIC(ecos, YES) + +#undef SIMPLE_INTRINSIC +#undef IIR_MAIN_INTRINSIC +#undef IIR_INTRINSIC +#undef HIR_INTRINSIC +#undef IER_INTRINSIC +#undef HER_INTRINSIC + +#define TOFINO_DIGEST(GRESS, NAME, CFG, TBL, IFSHIFT, IFID, CNT) \ + DEPARSER_DIGEST(Tofino, GRESS, NAME, CNT, IFSHIFT(can_shift = true;)) { \ + CFG.phv = data.select->reg.deparser_id(); \ + IFSHIFT(CFG.shft = data.shift + data.select->lo;) \ + CFG.valid = 1; \ + if (!data.select.pov.empty()) \ + error(data.select.pov.front().lineno, "No POV bit support in tofino %s digest", \ + #NAME); \ + for (auto &set : data.layout) { \ + int id = set.first >> data.shift; \ + unsigned idx = 0; \ + bool first = true, ok = true; \ + int last = -1; \ + int maxidx = TBL[id].phvs.size() - 1; \ + for (auto ® : set.second) { \ + if (first) { \ + first = false; \ + IFID(TBL[id].id_phv = reg->reg.deparser_id(); continue;) \ + } \ + /* The same 16b/32b container cannot appear consecutively, but 8b can. */ \ + if (last == reg->reg.deparser_id() && reg->reg.size != 8) { \ + error(data.lineno, "%s: %db container %s seen in consecutive locations", \ + #NAME, reg->reg.size, reg->reg.name); \ + continue; \ + } \ + for (int i = reg->reg.size / 8; i > 0; i--) { \ + if (idx > maxidx) { \ + error(data.lineno, "%s digest limited to %d bytes", #NAME, maxidx + 1); \ + ok = false; \ + break; \ + } \ + TBL[id].phvs[idx++] = reg->reg.deparser_id(); \ + } \ + last = reg->reg.deparser_id(); \ + if (!ok) break; \ + } \ + TBL[id].valid = 1; \ + TBL[id].len = idx; \ + } \ + } + +TOFINO_DIGEST(INGRESS, learning, regs.input.iir.ingr.learn_cfg, regs.input.iir.ingr.learn_tbl, NO, + NO, 8) +TOFINO_DIGEST(INGRESS, mirror, regs.header.hir.main_i.mirror_cfg, regs.header.hir.main_i.mirror_tbl, + YES, YES, 8) +TOFINO_DIGEST(EGRESS, mirror, regs.header.her.main_e.mirror_cfg, regs.header.her.main_e.mirror_tbl, + YES, YES, 8) +TOFINO_DIGEST(INGRESS, resubmit, regs.input.iir.ingr.resub_cfg, regs.input.iir.ingr.resub_tbl, YES, + NO, 8) + +void tofino_field_dictionary(checked_array_base &fde_control, + checked_array_base &fde_data, + checked_array_base> &pov_layout, + std::vector &pov_order, + ordered_map ®_pov, + std::vector &dict, json::vector &fd_gress, + json::vector &fd_entries, gress_t gress) { + std::map pov; + json::vector chunk_bytes; + json::vector fd_entry_chunk_bytes; + unsigned pov_byte = 0, pov_size = 0, total_headers = 0; + for (auto &ent : pov_order) + if (pov.count(ent->reg.deparser_id()) == 0) { + total_headers++; + pov[ent->reg.deparser_id()] = pov_size; + pov_size += ent->reg.size; + for (unsigned i = 0; i < ent->reg.size; i += 8) { + if (pov_byte >= Target::Tofino::DEPARSER_MAX_POV_BYTES) { + error(ent.lineno, + "Exceeded hardware limit for POV bits (%d) in deparser. " + "Using %d or more headers. Please reduce the number of headers", + Target::Tofino::DEPARSER_MAX_POV_BYTES * 8, total_headers); + return; + } + pov_layout[pov_byte++] = ent->reg.deparser_id(); + } + } + while (pov_byte < Target::Tofino::DEPARSER_MAX_POV_BYTES) pov_layout[pov_byte++] = 0xff; + + int row = -1, prev = -1, prev_pov = -1; + bool prev_is_checksum = false; + unsigned pos = 0; + unsigned total_bytes = 0; + int prev_row = 0; + for (auto &ent : dict) { + unsigned size = ent.what->size(); + total_bytes += size; + int pov_bit = pov[ent.pov.front()->reg.deparser_id()] + ent.pov.front()->lo; + + if (options.match_compiler) { + if (ent.what->is()) { + /* checksum unit -- make sure it gets its own dictionary line */ + prev_pov = -1; + prev_is_checksum = true; + } else { + if (prev_is_checksum) prev_pov = -1; + prev_is_checksum = false; + } + } + + if (ent.what->is() && prev_pov == pov_bit && + int(ent.what->encode()) == prev && ent.what->size() & 6) + error(ent.lineno, "16 and 32-bit container cannot be repeatedly deparsed"); + while (size--) { + if (pov_bit != prev_pov || pos >= 4 /*|| (pos & (size-1)) != 0*/) { + if (row >= 0) { + fde_control[row].num_bytes = pos & 3; + fde_data[row].num_bytes = pos & 3; + } + // Entries used - (192 each in INGRESS & EGRESS for Tofino) + if (++row >= Target::Tofino::DEPARSER_MAX_FD_ENTRIES) { + error(ent.lineno, + "Exceeded hardware limit for " + "deparser field dictionary entries (%d). Using %d headers and %" PRIu64 + " containers. Please reduce the number of headers and/or their length.", + Target::Tofino::DEPARSER_MAX_FD_ENTRIES, total_headers, + uint64_t(dict.size())); + return; + } + fde_control[row].pov_sel = pov_bit; + fde_control[row].version = 0xf; + fde_control[row].valid = 1; + pos = 0; + } + if (prev_row != row) { + json::map fd; + json::map fd_entry; + fd["Field Dictionary Number"] = prev_row; + fd_entry["entry"] = prev_row; + auto prevPovReg = Phv::reg(pov_layout[fde_control[prev_row].pov_sel.value / 8]); + auto prevPovBit = fde_control[prev_row].pov_sel.value; + auto prevPovOffset = prevPovBit - reg_pov[prevPovReg]; + Deparser::write_pov_in_json(fd, fd_entry, prevPovReg, prevPovBit, prevPovOffset); + fd["Content"] = std::move(chunk_bytes); + fd_entry["chunks"] = std::move(fd_entry_chunk_bytes); + fd_gress.push_back(std::move(fd)); + fd_entries.push_back(std::move(fd_entry)); + prev_row = row; + } + auto povReg = Phv::reg(pov_layout[fde_control[row].pov_sel.value / 8]); + auto povBit = fde_control[row].pov_sel.value % povReg->size; + json::map chunk_byte; + json::map fd_entry_chunk_byte; + json::map fd_entry_chunk; + chunk_byte["Byte"] = pos; + fd_entry_chunk_byte["chunk_number"] = pos; + auto phvReg = Phv::reg(ent.what->encode()); + if (ent.what->encode() < CHECKSUM_ENGINE_PHVID_TOFINO_LOW || + ent.what->encode() > CHECKSUM_ENGINE_PHVID_TOFINO_HIGH) { + write_field_name_in_json(phvReg, povReg, povBit, chunk_byte, fd_entry_chunk, 11, + gress); + } else { + write_csum_const_in_json(ent.what->encode(), chunk_byte, fd_entry_chunk, gress); + } + fd_entry_chunk_byte["chunk"] = std::move(fd_entry_chunk); + chunk_bytes.push_back(std::move(chunk_byte.clone())); + fd_entry_chunk_bytes.push_back(std::move(fd_entry_chunk_byte.clone())); + fde_data[row].phv[pos++] = ent.what->encode(); + prev_pov = pov_bit; + } + + prev = ent.what->encode(); + } + if (pos) { + fde_control[row].num_bytes = pos & 3; + fde_data[row].num_bytes = pos & 3; + } + + // Compute average occupancy. For deparser FDE compression to work, + // need to make sure have certain average occupancy. + // This error check may still be too high level. I think it needs a finer granularity, + // but I'm not sure how to model the allowed variability of packet headers. + + // Tofino deparser has a maximum output header size of 480 bytes. This is done in 2 phases. + // Each phase can do 240 bytes, corresponding to 18 QFDEs (4 * 18 * 4 bytes = 288 bytes) + // This means that average occupancy must be better than 240 / 288 bytes, or roughly 83%. + // This is the value we will check. + // We gate the check on total bytes occupied being greater than 64 bytes in an attempt + // to consider the QFDE constraint that it can only drive four stage 2 buses for compression. + + unsigned max_bytes_for_rows_occupied = 4 * (row + 1); + double occupancy = 0.0; + + if (max_bytes_for_rows_occupied > 0) + occupancy = + static_cast(total_bytes) / static_cast(max_bytes_for_rows_occupied); + + if (total_bytes > 64 && occupancy < (240.0 / 288.0)) { + std::stringstream warn_msg; + warn_msg.precision(4); + warn_msg << "Deparser field dictionary occupancy is too sparse."; + warn_msg << "\nHardware requires an occupancy of " << 100.0 * 240.0 / 288.0 + << " to deparse the output header,"; + warn_msg << "\nbut the PHV layout for the header structures was such that" + " the occupancy was only " + << 100.0 * occupancy << "."; + warn_msg << "\nThis situation is usually caused by a program that has one or" + " more of the following requirements:"; + warn_msg << "\n 1. many 'short' headers that are not guaranteed to coexist" + " (e.g. less than 4 bytes)"; + warn_msg << "\n 2. many packet headers that are not multiples of 4 bytes"; + warn_msg << "\n 3. many conditionally updated checksums"; + warning(0, "%s", warn_msg.str().c_str()); + } +} + +template +void tofino_phv_ownership(bitvec phv_use[2], IN_GRP &in_grp, IN_SPLIT &in_split, EG_GRP &eg_grp, + EG_SPLIT &eg_split, unsigned first, unsigned count) { + BUG_CHECK(in_grp.val.size() == eg_grp.val.size()); + BUG_CHECK(in_split.val.size() == eg_split.val.size()); + BUG_CHECK((in_grp.val.size() + 1) * in_split.val.size() == count); + unsigned group_size = in_split.val.size(); + // DANGER -- this only works because tofino Phv::Register uids happend to match + // DANGER -- the deparser encoding of phv containers. + unsigned reg = first; + for (unsigned i = 0; i < in_grp.val.size(); i++, reg += group_size) { + unsigned last = reg + group_size - 1; + int count = 0; + if (phv_use[INGRESS].getrange(reg, group_size)) { + in_grp.val |= 1U << i; + if (i * group_size >= 16 && i * group_size < 32) + error(0, "%s..%s(R%d..R%d) used by ingress deparser but only available to egress", + Phv::reg(reg)->name, Phv::reg(last)->name, reg, last); + else + count++; + } + if (phv_use[EGRESS].getrange(reg, group_size)) { + eg_grp.val |= 1U << i; + if (i * group_size < 16) + error(0, "%s..%s(R%d..R%d) used by egress deparser but only available to ingress", + Phv::reg(reg)->name, Phv::reg(last)->name, reg, last); + else + count++; + } + if (count > 1) + error(0, "%s..%s(R%d..R%d) used by both ingress and egress deparser", + Phv::reg(reg)->name, Phv::reg(last)->name, reg, last); + } + in_split.val = phv_use[INGRESS].getrange(reg, group_size); + eg_split.val = phv_use[EGRESS].getrange(reg, group_size); +} + +static short tofino_phv2cksum[Target::Tofino::Phv::NUM_PHV_REGS][2] = { + // normal {LSWord, MSWord} + {287, 286}, + {283, 282}, + {279, 278}, + {275, 274}, + {271, 270}, + {267, 266}, + {263, 262}, + {259, 258}, + {255, 254}, + {251, 250}, + {247, 246}, + {243, 242}, + {239, 238}, + {235, 234}, + {231, 230}, + {227, 226}, + {223, 222}, + {219, 218}, + {215, 214}, + {211, 210}, + {207, 206}, + {203, 202}, + {199, 198}, + {195, 194}, + {191, 190}, + {187, 186}, + {183, 182}, + {179, 178}, + {175, 174}, + {171, 170}, + {167, 166}, + {163, 162}, + {285, 284}, + {281, 280}, + {277, 276}, + {273, 272}, + {269, 268}, + {265, 264}, + {261, 260}, + {257, 256}, + {253, 252}, + {249, 248}, + {245, 244}, + {241, 240}, + {237, 236}, + {233, 232}, + {229, 228}, + {225, 224}, + {221, 220}, + {217, 216}, + {213, 212}, + {209, 208}, + {205, 204}, + {201, 200}, + {197, 196}, + {193, 192}, + {189, 188}, + {185, 184}, + {181, 180}, + {177, 176}, + {173, 172}, + {169, 168}, + {165, 164}, + {161, 160}, + {147, -1}, + {145, -1}, + {143, -1}, + {141, -1}, + {127, -1}, + {125, -1}, + {123, -1}, + {121, -1}, + {107, -1}, + {105, -1}, + {103, -1}, + {101, -1}, + {87, -1}, + {85, -1}, + {83, -1}, + {81, -1}, + {67, -1}, + {65, -1}, + {63, -1}, + {61, -1}, + {47, -1}, + {45, -1}, + {43, -1}, + {41, -1}, + {27, -1}, + {25, -1}, + {23, -1}, + {21, -1}, + {7, -1}, + {5, -1}, + {3, -1}, + {1, -1}, + {146, -1}, + {144, -1}, + {142, -1}, + {140, -1}, + {126, -1}, + {124, -1}, + {122, -1}, + {120, -1}, + {106, -1}, + {104, -1}, + {102, -1}, + {100, -1}, + {86, -1}, + {84, -1}, + {82, -1}, + {80, -1}, + {66, -1}, + {64, -1}, + {62, -1}, + {60, -1}, + {46, -1}, + {44, -1}, + {42, -1}, + {40, -1}, + {26, -1}, + {24, -1}, + {22, -1}, + {20, -1}, + {6, -1}, + {4, -1}, + {2, -1}, + {0, -1}, + {159, -1}, + {157, -1}, + {155, -1}, + {153, -1}, + {151, -1}, + {149, -1}, + {139, -1}, + {137, -1}, + {135, -1}, + {133, -1}, + {131, -1}, + {129, -1}, + {119, -1}, + {117, -1}, + {115, -1}, + {113, -1}, + {111, -1}, + {109, -1}, + {99, -1}, + {97, -1}, + {95, -1}, + {93, -1}, + {91, -1}, + {89, -1}, + {79, -1}, + {77, -1}, + {75, -1}, + {73, -1}, + {71, -1}, + {69, -1}, + {59, -1}, + {57, -1}, + {55, -1}, + {53, -1}, + {51, -1}, + {49, -1}, + {39, -1}, + {37, -1}, + {35, -1}, + {33, -1}, + {31, -1}, + {29, -1}, + {19, -1}, + {17, -1}, + {15, -1}, + {13, -1}, + {11, -1}, + {9, -1}, + {158, -1}, + {156, -1}, + {154, -1}, + {152, -1}, + {150, -1}, + {148, -1}, + {138, -1}, + {136, -1}, + {134, -1}, + {132, -1}, + {130, -1}, + {128, -1}, + {118, -1}, + {116, -1}, + {114, -1}, + {112, -1}, + {110, -1}, + {108, -1}, + {98, -1}, + {96, -1}, + {94, -1}, + {92, -1}, + {90, -1}, + {88, -1}, + {78, -1}, + {76, -1}, + {74, -1}, + {72, -1}, + {70, -1}, + {68, -1}, + {58, -1}, + {56, -1}, + {54, -1}, + {52, -1}, + {50, -1}, + {48, -1}, + {38, -1}, + {36, -1}, + {34, -1}, + {32, -1}, + {30, -1}, + {28, -1}, + {18, -1}, + {16, -1}, + {14, -1}, + {12, -1}, + {10, -1}, + {8, -1}, + + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + {-1, -1}, + + // tagalong {LSWord, MSWord} + {1, 0}, + {3, 2}, + {5, 4}, + {7, 6}, + {9, 8}, + {11, 10}, + {13, 12}, + {15, 14}, + {17, 16}, + {19, 18}, + {21, 20}, + {23, 22}, + {25, 24}, + {27, 26}, + {29, 28}, + {31, 30}, + {33, 32}, + {35, 34}, + {37, 36}, + {39, 38}, + {41, 40}, + {43, 42}, + {45, 44}, + {47, 46}, + {49, 48}, + {51, 50}, + {53, 52}, + {55, 54}, + {57, 56}, + {59, 58}, + {61, 60}, + {63, 62}, + {64, -1}, + {65, -1}, + {66, -1}, + {67, -1}, + {68, -1}, + {69, -1}, + {70, -1}, + {71, -1}, + {72, -1}, + {73, -1}, + {74, -1}, + {75, -1}, + {76, -1}, + {77, -1}, + {78, -1}, + {79, -1}, + {80, -1}, + {81, -1}, + {82, -1}, + {83, -1}, + {84, -1}, + {85, -1}, + {86, -1}, + {87, -1}, + {88, -1}, + {89, -1}, + {90, -1}, + {91, -1}, + {92, -1}, + {93, -1}, + {94, -1}, + {95, -1}, + {96, -1}, + {97, -1}, + {98, -1}, + {99, -1}, + {100, -1}, + {101, -1}, + {102, -1}, + {103, -1}, + {104, -1}, + {105, -1}, + {106, -1}, + {107, -1}, + {108, -1}, + {109, -1}, + {110, -1}, + {111, -1}, + {112, -1}, + {113, -1}, + {114, -1}, + {115, -1}, + {116, -1}, + {117, -1}, + {118, -1}, + {119, -1}, + {120, -1}, + {121, -1}, + {122, -1}, + {123, -1}, + {124, -1}, + {125, -1}, + {126, -1}, + {127, -1}, + {128, -1}, + {129, -1}, + {130, -1}, + {131, -1}, + {132, -1}, + {133, -1}, + {134, -1}, + {135, -1}, + {136, -1}, + {137, -1}, + {138, -1}, + {139, -1}, + {140, -1}, + {141, -1}, + {142, -1}, + {143, -1}}; + +#define TAGALONG_THREAD_BASE \ + (Target::Tofino::Phv::COUNT_8BIT_TPHV + Target::Tofino::Phv::COUNT_16BIT_TPHV + \ + 2 * Target::Tofino::Phv::COUNT_32BIT_TPHV) + +template +static void copy_csum_cfg_entry(DTYPE &dst_unit, STYPE &src_unit) { + BUG_CHECK(dst_unit.size() == src_unit.size()); + + for (unsigned i = 0; i < dst_unit.size(); i++) { + auto &src = src_unit[i]; + auto &dst = dst_unit[i]; + + dst.zero_l_s_b = src.zero_l_s_b; + dst.zero_m_s_b = src.zero_m_s_b; + dst.swap = src.swap; + } +} + +template +static void init_tofino_checksum_entry(ENTRIES &entry) { + entry.zero_l_s_b = 1; + entry.zero_l_s_b.rewrite(); + entry.zero_m_s_b = 1; + entry.zero_m_s_b.rewrite(); + entry.swap = 0; + entry.swap.rewrite(); +} + +template +static void tofino_checksum_units(checked_array_base &main_csum_units, + checked_array_base &tagalong_csum_units, gress_t gress, + Deparser::FullChecksumUnit checksum_unit[]) { + BUG_CHECK(tofino_phv2cksum[Target::Tofino::Phv::NUM_PHV_REGS - 1][0] == 143); + for (int i = 0; i < Target::Tofino::DEPARSER_CHECKSUM_UNITS; i++) { + auto &main_unit = main_csum_units[i].csum_cfg_entry; + auto &tagalong_unit = tagalong_csum_units[i].csum_cfg_entry; + auto &tagalong_unit_zeros_as_ones = tagalong_csum_units[i].zeros_as_ones; + for (auto &ent : main_unit) init_tofino_checksum_entry(ent); + for (auto &ent : tagalong_unit) init_tofino_checksum_entry(ent); + if (checksum_unit[i].entries.empty()) continue; + // Tofino does not support checksum calculation using multiple + // partial checksum unit. + // Full checksum unit and partial checksum unit will always be same + BUG_CHECK(checksum_unit[i].entries.size() == 1); + auto &checksum_unit_entries = checksum_unit[i].entries[i]; + for (auto ® : checksum_unit_entries) { + int mask = reg.mask; + int swap = reg.swap; + int idx = reg->reg.deparser_id(); + if (!reg.pov.empty()) + error(reg.pov.front().lineno, "No POV support in tofino checksum"); + auto cksum_idx0 = tofino_phv2cksum[idx][0]; + auto cksum_idx1 = tofino_phv2cksum[idx][1]; + BUG_CHECK(cksum_idx0 >= 0); + if (idx >= 256) { + write_checksum_entry(tagalong_unit[cksum_idx0], mask & 3, swap & 1, i, + reg->reg.name); + if (cksum_idx1 >= 0) + write_checksum_entry(tagalong_unit[cksum_idx1], mask >> 2, swap >> 1, i, + reg->reg.name); + else + BUG_CHECK((mask >> 2 == 0) && (swap >> 1 == 0)); + } else { + write_checksum_entry(main_unit[cksum_idx0], mask & 3, swap & 1, i, reg->reg.name); + if (cksum_idx1 >= 0) + write_checksum_entry(main_unit[cksum_idx1], mask >> 2, swap >> 1, i, + reg->reg.name); + else + BUG_CHECK((mask >> 2 == 0) && (swap >> 1 == 0)); + } + } + // Thread non-tagalong checksum results through the tagalong unit + int idx = i + TAGALONG_THREAD_BASE + gress * Target::Tofino::DEPARSER_CHECKSUM_UNITS; + write_checksum_entry(tagalong_unit[idx], 0x3, 0x0, i); + // Setting Zeros_As_Ones enable + tagalong_unit_zeros_as_ones.en = checksum_unit[i].zeros_as_ones_en; + main_unit.set_modified(); + tagalong_unit.set_modified(); + } +} + +static void tofino_checksum_units( + Target::Tofino::deparser_regs ®s, + Deparser::FullChecksumUnit full_checksum_unit[2][MAX_DEPARSER_CHECKSUM_UNITS]) { + for (unsigned id = 2; id < MAX_DEPARSER_CHECKSUM_UNITS; id++) { + if (!full_checksum_unit[0][id].entries.empty() && + !full_checksum_unit[1][id].entries.empty()) + error(-1, "deparser checksum unit %d used in both ingress and egress", id); + } + + tofino_checksum_units(regs.input.iim.ii_phv_csum.csum_cfg, + regs.header.him.hi_tphv_csum.csum_cfg, INGRESS, + full_checksum_unit[INGRESS]); + tofino_checksum_units(regs.input.iem.ie_phv_csum.csum_cfg, + regs.header.hem.he_tphv_csum.csum_cfg, EGRESS, + full_checksum_unit[EGRESS]); + + // make sure shared units are configured identically + for (unsigned id = 2; id < Target::Tofino::DEPARSER_CHECKSUM_UNITS; id++) { + auto &eg_main_unit = regs.input.iem.ie_phv_csum.csum_cfg[id].csum_cfg_entry; + auto &ig_main_unit = regs.input.iim.ii_phv_csum.csum_cfg[id].csum_cfg_entry; + + auto &eg_tphv_unit = regs.header.hem.he_tphv_csum.csum_cfg[id].csum_cfg_entry; + auto &ig_tphv_unit = regs.header.him.hi_tphv_csum.csum_cfg[id].csum_cfg_entry; + + if (!full_checksum_unit[0][id].entries.empty()) { + copy_csum_cfg_entry(eg_main_unit, ig_main_unit); + copy_csum_cfg_entry(eg_tphv_unit, ig_tphv_unit); + } else if (!full_checksum_unit[1][id].entries.empty()) { + copy_csum_cfg_entry(ig_main_unit, eg_main_unit); + copy_csum_cfg_entry(ig_tphv_unit, eg_tphv_unit); + } + } +} + +template <> +void Deparser::write_config(Target::Tofino::deparser_regs ®s) { + regs.input.icr.inp_cfg.disable(); + regs.input.icr.intr.disable(); + regs.header.hem.he_edf_cfg.disable(); + regs.header.him.hi_edf_cfg.disable(); + + tofino_checksum_units(regs, full_checksum_unit); + json::map field_dictionary_alloc; + json::vector fd_gress; + json::vector fde_entries_i; + json::vector fde_entries_e; + + // Deparser resources + json::vector resources_deparser; + + // Create field dictionaries for ingress + tofino_field_dictionary(regs.input.iim.ii_fde_pov.fde_pov, regs.header.him.hi_fde_phv.fde_phv, + regs.input.iir.main_i.pov.phvs, pov_order[INGRESS], pov[INGRESS], + dictionary[INGRESS], fd_gress, fde_entries_i, INGRESS); + field_dictionary_alloc["ingress"] = std::move(fd_gress); + // Create field dictionaries for egress + tofino_field_dictionary(regs.input.iem.ie_fde_pov.fde_pov, regs.header.hem.he_fde_phv.fde_phv, + regs.input.ier.main_e.pov.phvs, pov_order[EGRESS], pov[EGRESS], + dictionary[EGRESS], fd_gress, fde_entries_e, EGRESS); + field_dictionary_alloc["egress"] = std::move(fd_gress); + + if (Log::verbosity() > 0) { + auto json_dump = open_output("logs/field_dictionary.log"); + *json_dump << &field_dictionary_alloc; + } + // Output deparser resources + report_resources_deparser_json(fde_entries_i, fde_entries_e); + + if (Phv::use(INGRESS).intersects(Phv::use(EGRESS))) { + warning(lineno[INGRESS], "Registers used in both ingress and egress in pipeline: %s", + Phv::db_regset(Phv::use(INGRESS) & Phv::use(EGRESS)).c_str()); + /* FIXME -- this only (sort-of) works because 'deparser' comes first in the alphabet, + * FIXME -- so is the first section to have its 'output' method run. Its a hack + * FIXME -- anyways to attempt to correct broken asm that should be an error */ + Phv::unsetuse(INGRESS, phv_use[EGRESS]); + Phv::unsetuse(EGRESS, phv_use[INGRESS]); + } + + tofino_phv_ownership(phv_use, regs.input.iir.ingr.phv8_grp, regs.input.iir.ingr.phv8_split, + regs.input.ier.egr.phv8_grp, regs.input.ier.egr.phv8_split, + Target::Tofino::Phv::FIRST_8BIT_PHV, Target::Tofino::Phv::COUNT_8BIT_PHV); + tofino_phv_ownership(phv_use, regs.input.iir.ingr.phv16_grp, regs.input.iir.ingr.phv16_split, + regs.input.ier.egr.phv16_grp, regs.input.ier.egr.phv16_split, + Target::Tofino::Phv::FIRST_16BIT_PHV, + Target::Tofino::Phv::COUNT_16BIT_PHV); + tofino_phv_ownership(phv_use, regs.input.iir.ingr.phv32_grp, regs.input.iir.ingr.phv32_split, + regs.input.ier.egr.phv32_grp, regs.input.ier.egr.phv32_split, + Target::Tofino::Phv::FIRST_32BIT_PHV, + Target::Tofino::Phv::COUNT_32BIT_PHV); + + for (unsigned i = 0; i < 8; i++) { + if (phv_use[EGRESS].intersects(Target::Tofino::Phv::tagalong_groups[i])) { + regs.input.icr.tphv_cfg.i_e_assign |= 1 << i; + if (phv_use[INGRESS].intersects(Target::Tofino::Phv::tagalong_groups[i])) { + error(lineno[INGRESS], + "tagalong group %d used in both ingress and " + "egress deparser", + i); + } + } + } + + for (auto &intrin : intrinsics) intrin.type->setregs(regs, *this, intrin); + + if (!regs.header.hir.ingr.ingress_port.sel.modified()) + regs.header.hir.ingr.ingress_port.sel = 1; + + for (auto &digest : digests) digest.type->setregs(regs, *this, digest); + + // The csum_cfg_entry registers are NOT reset by hardware and must be + // explicitly configured. We remove the disable_if_reset_value() calls on + // these register tree for now, but ideally they should have a flag to indicate no + // reset value is present and the register tree should prune only those regs + // if (options.condense_json) { + // regs.input.disable_if_reset_value(); + // regs.header.disable_if_reset_value(); } + if (error_count == 0 && options.gen_json) { + regs.input.emit_json(*open_output("regs.all.deparser.input_phase.cfg.json")); + regs.header.emit_json(*open_output("regs.all.deparser.header_phase.cfg.json")); + } + TopLevel::regs()->reg_pipe.deparser.hdr.set("regs.all.deparser.header_phase", + ®s.header); + TopLevel::regs()->reg_pipe.deparser.inp.set("regs.all.deparser.input_phase", + ®s.input); +} + +template <> +unsigned Deparser::FDEntry::Checksum::encode() { + return CHECKSUM_ENGINE_PHVID_TOFINO_LOW + (gress * CHECKSUM_ENGINE_PHVID_TOFINO_PER_GRESS) + + unit; +} + +template <> +unsigned Deparser::FDEntry::Constant::encode() { + error(lineno, "Tofino deparser does not support constant entries"); + return -1; +} + +template <> +void Deparser::gen_learn_quanta(Target::Tofino::parser_regs ®s, json::vector &learn_quanta) {} + +template <> +void Deparser::process(Target::Tofino *) { + // Chip-specific code for process method + // None for Tofino +} diff --git a/backends/tofino/bf-asm/tofino/exact_match.cpp b/backends/tofino/bf-asm/tofino/exact_match.cpp new file mode 100644 index 00000000000..dcd8d5d022a --- /dev/null +++ b/backends/tofino/bf-asm/tofino/exact_match.cpp @@ -0,0 +1,37 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/tofino/exact_match.h" + +void Target::Tofino::ExactMatchTable::setup_ways() { + ::ExactMatchTable::setup_ways(); + for (auto &row : layout) { + int first_way = -1; + for (auto &ram : row.memunits) { + int way = way_map.at(ram).way; + if (first_way < 0) { + first_way = way; + } else if (ways[way].group_xme != ways[first_way].group_xme) { + error(row.lineno, + "Ways %d and %d of table %s share address bus on row %d, " + "but use different hash groups", + first_way, way, name(), row.row); + break; + } + } + } +} diff --git a/backends/tofino/bf-asm/tofino/exact_match.h b/backends/tofino/bf-asm/tofino/exact_match.h new file mode 100644 index 00000000000..a4e6199bb9d --- /dev/null +++ b/backends/tofino/bf-asm/tofino/exact_match.h @@ -0,0 +1,31 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TOFINO_EXACT_MATCH_H_ +#define BACKENDS_TOFINO_BF_ASM_TOFINO_EXACT_MATCH_H_ + +#include "backends/tofino/bf-asm/tables.h" + +class Target::Tofino::ExactMatchTable : public ::ExactMatchTable { + friend class ::ExactMatchTable; + ExactMatchTable(int line, const char *n, gress_t gr, Stage *s, int lid) + : ::ExactMatchTable(line, n, gr, s, lid) {} + + void setup_ways() override; +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_TOFINO_EXACT_MATCH_H_ */ diff --git a/backends/tofino/bf-asm/tofino/gateway.cpp b/backends/tofino/bf-asm/tofino/gateway.cpp new file mode 100644 index 00000000000..9be05e17d70 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/gateway.cpp @@ -0,0 +1,320 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/tofino/gateway.h" + +#include "backends/tofino/bf-asm/hashexpr.h" +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tofino/ternary_match.h" +#include "lib/hex.h" + +/* Tofino1/2 Gateway table support + * GatewayTable uses the Table::Layout in a somewhat hacky way to track the gateway match + * and payload blocks. Layout may have either one or two entries. + * layout[0] is the layout for the gateway match -- which row and search bus is being used + * layout[1] is the layout for the payload -- which row and result bus is being used. + * if layout.size() == 1, there is no payload. + * The payload result bus is stored as bus[RESULT_BUS] even though it may be either a + * match result bus or a tind bus -- the second bit (so busses 2 and 3) are the tind + * busses as that is the way they they are encode in some registers. It should perhaps be + * changed to use the bus_type_t to track whether it is a match RESULT_BUS or a TIND_BUS + */ + +bool Target::Tofino::GatewayTable::check_match_key(MatchKey &key, const std::vector &vec, + bool is_xor) { + if (!::GatewayTable::check_match_key(key, vec, is_xor)) return false; + if (key.offset < 32 && (key.offset & 7) != (key.val->lo & 7)) + error(key.val.lineno, "Gateway %s key %s misaligned within byte", is_xor ? "xor" : "match", + key.val.name()); + if (key.offset + key.val->size() > (is_xor ? 32 : 44)) { + error(key.val.lineno, "Gateway %s key too big", is_xor ? "xor" : "match"); + return false; + } + if (key.offset >= 32 && !input_xbar.empty()) { + BUG_CHECK(input_xbar.size() == 1, "%s does not have one input xbar", name()); + auto hash = input_xbar[0]->hash_column(key.offset + 8); + if (hash.size() != 1 || hash[0]->bit || !hash[0]->fn || + !hash[0]->fn->match_phvref(key.val)) { + // FIXME: hash.size() maybe zero when key.valid is true. + // which means the key.offset is incorrect. + if (!key.valid) { + error(key.val.lineno, "Gateway %s key %s not in matching hash column", + is_xor ? "xor" : "match", key.val.name()); + return false; + } + } + } + return true; +} + +void Target::Tofino::GatewayTable::pass1() { + ::GatewayTable::pass1(); + /* in a gateway, the layout has one or two rows -- layout[0] specifies the gateway, and + * layout[1] specifies the payload. There will be no columns in either row. + */ + if (layout.empty() || layout[0].row < 0) + error(lineno, "No row specified in gateway"); + else if (!layout[0].bus.count(Layout::SEARCH_BUS) && (!match.empty() || !xor_match.empty())) + error(lineno, "No bus specified in gateway to read from"); + if (payload_unit >= 0 && have_payload < 0 && match_address < 0) + error(lineno, "payload_unit with no payload or match address in gateway"); + if (layout.size() > 1) { + if (layout[1].bus.count(Layout::RESULT_BUS) && (have_payload >= 0 || match_address >= 0)) { + int result_bus = layout[1].bus.at(Layout::RESULT_BUS); + if (payload_unit < 0) { + payload_unit = result_bus & 1; + } else if (payload_unit != (result_bus & 1)) { + error(layout[1].lineno, "payload unit %d cannot write to result bus %d", + payload_unit, result_bus); + } + } + if (layout[1].row < 0) { + error(layout[1].lineno, "payload_bus with no payload_row in gateway"); + } else if (Table *tbl = match_table) { + if (auto *tmatch = dynamic_cast(tbl)) tbl = tmatch->indirect; + if (tbl && !tbl->layout.empty()) { + for (auto &r : tbl->layout) { + if (r.row != layout[1].row) continue; + if (!r.bus.count(Layout::RESULT_BUS)) continue; + int match_rbus = r.bus.at(Layout::RESULT_BUS); + if (payload_unit >= 0 && payload_unit != (match_rbus & 1)) continue; + if (!layout[1].bus.count(Layout::RESULT_BUS)) + layout[1].bus[Layout::RESULT_BUS] = match_rbus; + if (match_rbus == layout[1].bus.at(Layout::RESULT_BUS)) { + if (tbl->to()) layout[1].bus[Layout::RESULT_BUS] |= 2; + break; + } + } + } + } else if (have_payload >= 0 || match_address >= 0) { + if (payload_unit) { + if (auto *old = stage->gw_payload_use[layout[1].row][payload_unit]) + error(layout[1].lineno, "payload %d.%d already in use by table %s", + layout[1].row, payload_unit, old->name()); + else + stage->gw_payload_use[layout[1].row][payload_unit] = this; + } + } else if (payload_unit >= 0) { + error(lineno, "payload_unit with no payload or match address in gateway"); + } + } else if ((have_payload >= 0 || match_address >= 0) && !match_table) { + error(have_payload, "payload on standalone gateway requires explicit payload_row"); + } else if (payload_unit >= 0 && match_table) { + bool ternary = false; + Table *tbl = match_table; + if (auto *tmatch = dynamic_cast(tbl)) { + ternary = true; + tbl = tmatch->indirect; + } + if (!tbl || tbl->layout.empty()) { + error(lineno, "No result busses in table %s for gateway payload", match_table->name()); + } else { + for (auto &r : tbl->layout) { + auto match_rbus = r.bus.count(Layout::RESULT_BUS) ? r.bus.at(Layout::RESULT_BUS) + : r.bus.at(Layout::SEARCH_BUS); + if (match_rbus >= 0 && payload_unit != (match_rbus & 1)) continue; + if (!stage->gw_payload_use[r.row][payload_unit]) { + layout.resize(2); + layout[1].row = r.row; + if (r.bus.count(Layout::RESULT_BUS)) + layout[1].bus[Layout::RESULT_BUS] = r.bus.at(Layout::RESULT_BUS); + else + layout[1].bus[Layout::RESULT_BUS] = + r.bus.at(Layout::SEARCH_BUS) | (ternary ? 2 : 0); + stage->gw_payload_use[r.row][payload_unit] = this; + break; + } + } + if (layout.size() < 2) + error(lineno, "No row in table %s has payload unit %d free", tbl->name(), + payload_unit); + } + } + if (layout.size() > 1 && layout[1].bus.count(Layout::RESULT_BUS)) { + int result_bus = layout[1].bus.at(Layout::RESULT_BUS); + Table *tbl = match_table; + if (auto *tmatch = dynamic_cast(tbl)) tbl = tmatch->indirect; + if (!tbl) tbl = this; + auto &bus_use = + (result_bus & 2) ? stage->tcam_indirect_bus_use : stage->match_result_bus_use; + auto *old = bus_use[layout[1].row][result_bus & 1]; + if (old && old != tbl) + error(layout[1].lineno, + "Gateway payload result bus %d conflict on row %d between " + "%s and %s", + result_bus, layout[1].row, name(), old->name()); + bus_use[layout[1].row][result_bus & 1] = tbl; + } +} + +void Target::Tofino::GatewayTable::pass2() { + ::GatewayTable::pass2(); + if (gw_unit < 0) { + if (layout[0].bus.count(Layout::SEARCH_BUS) && + !stage->gw_unit_use[layout[0].row][layout[0].bus.at(Layout::SEARCH_BUS)]) { + gw_unit = layout[0].bus.at(Layout::SEARCH_BUS); + } else { + for (int i = 0; i < 2; ++i) { + if (!stage->gw_unit_use[layout[0].row][i] && + !stage->sram_search_bus_use[layout[0].row][i]) { + gw_unit = i; + break; + } + } + } + if (gw_unit < 0) + error(layout[0].lineno, "No gateway units available on row %d", layout[0].row); + else + stage->gw_unit_use[layout[0].row][gw_unit] = this; + } + if (!layout[0].bus.count(Layout::SEARCH_BUS) && gw_unit >= 0) + layout[0].bus[Layout::SEARCH_BUS] = gw_unit; + if (payload_unit < 0 && (have_payload >= 0 || match_address >= 0)) { + if (layout.size() > 1) { + if (!layout[1].bus.count(Layout::RESULT_BUS)) { + if (!stage->gw_payload_use[layout[1].row][0]) + payload_unit = 0; + else if (!stage->gw_payload_use[layout[1].row][1]) + payload_unit = 1; + } else { + int u = layout[1].bus.at(Layout::RESULT_BUS) & 1; + if (!stage->gw_payload_use[layout[1].row][u]) payload_unit = u; + } + if (payload_unit >= 0) + stage->gw_payload_use[layout[1].row][payload_unit] = this; + else + error(lineno, "No payload available on row %d", layout[1].row); + } else if (Table *tbl = match_table) { + bool ternary = false; + if (auto *tmatch = dynamic_cast(tbl)) { + tbl = tmatch->indirect; + ternary = true; + } + if (tbl && !tbl->layout.empty()) { + for (auto &row : tbl->layout) { + auto match_rbus = row.bus.at(ternary ? Layout::TIND_BUS : Layout::RESULT_BUS); + BUG_CHECK(match_rbus >= 0); // alloc_busses on the match table must run first + if (stage->gw_payload_use[row.row][match_rbus]) { + continue; + } else { + payload_unit = match_rbus; + } + stage->gw_payload_use[row.row][payload_unit] = this; + layout.resize(2); + layout[1].row = row.row; + layout[1].bus[Layout::RESULT_BUS] = match_rbus | (ternary ? 2 : 0); + break; + } + if (payload_unit < 0) + error(lineno, "No row in table %s has a free payload unit", tbl->name()); + } else { + error(lineno, "No result busses in table %s for gateway payload", + match_table->name()); + } + } + } + if (payload_unit >= 0 && !layout[1].bus.count(Layout::RESULT_BUS)) { + BUG_CHECK(layout.size() > 1); + int row = layout[1].row; + Table *tbl = match_table; + int ternary = tbl ? 0 : -1; + if (auto *tmatch = dynamic_cast(tbl)) { + ternary = 1; + tbl = tmatch->indirect ? tmatch->indirect : tmatch; + } + if (!tbl) tbl = this; + for (int i = payload_unit; i < 4; i += 2) { + if (ternary >= 0 && (i >> 1) != ternary) continue; + auto &result_bus = (i & 2) ? stage->tcam_indirect_bus_use : stage->match_result_bus_use; + if (!result_bus[row][i & 1] || result_bus[row][i & 1] == tbl) { + layout[1].bus[Layout::RESULT_BUS] = i; + result_bus[row][i & 1] = tbl; + break; + } + } + if (!layout[1].bus.count(Layout::RESULT_BUS)) { + error(lineno, "No result bus available for gateway payload of table %s on row %d", + name(), layout[1].row); + } + } +} + +void Target::Tofino::GatewayTable::pass3() { + ::GatewayTable::pass3(); + if (layout[0].bus.count(Layout::SEARCH_BUS)) { + int search_bus = layout[0].bus.at(Layout::SEARCH_BUS); + auto *tbl = stage->sram_search_bus_use[layout[0].row][search_bus]; + // Sharing with an exact match -- make sure it is ok + if (!tbl) return; + for (auto &ixb : input_xbar) { + auto *sram_tbl = tbl->to(); + BUG_CHECK(sram_tbl, + "%s is not an SRamMatch table even though it is using a " + "search bus?", + tbl->name()); + SRamMatchTable::WayRam *way = nullptr; + for (auto &row : sram_tbl->layout) { + if (row.row == layout[0].row && row.bus.at(Layout::SEARCH_BUS) == search_bus) { + if (row.memunits.empty()) { + // FIXME -- not really used, so we don't need to check the + // match/hash group. Should this be an asm error? + return; + } + way = &sram_tbl->way_map.at(row.memunits[0]); + break; + } + } + BUG_CHECK(way, "%s claims to use search bus %d.%d, but we can't find it in the layout", + sram_tbl->name(), layout[0].row, search_bus); + if (ixb->hash_group() >= 0 && sram_tbl->ways[way->way].group_xme >= 0 && + ixb->hash_group() != sram_tbl->ways[way->way].group_xme) { + error(layout[0].lineno, + "%s sharing search bus %d.%d with %s, but wants a " + "different hash group", + name(), layout[0].row, search_bus, tbl->name()); + } + if (ixb->match_group() >= 0 && sram_tbl->word_ixbar_group[way->word] >= 0 && + gateway_needs_ixbar_group() && + ixb->match_group() != sram_tbl->word_ixbar_group[way->word]) { + error(layout[0].lineno, + "%s sharing search bus %d.%d with %s, but wants a " + "different match group", + name(), layout[0].row, search_bus, tbl->name()); + } + } + } +} + +template <> +void enable_gateway_payload_exact_shift_ovr(Target::Tofino::mau_regs ®s, int bus) { + // Not supported on tofino + BUG(); +} +template void enable_gateway_payload_exact_shift_ovr(Target::Tofino::mau_regs ®s, int bus); + +void Target::Tofino::GatewayTable::write_next_table_regs(Target::Tofino::mau_regs ®s) { + auto &merge = regs.rams.match.merge; + int idx = 3; + if (need_next_map_lut) error(lineno, "Tofino does not support using next_map_lut in gateways"); + for (auto &line : table) { + BUG_CHECK(idx >= 0); + if (!line.run_table) + merge.gateway_next_table_lut[logical_id][idx] = line.next.next_table_id(); + --idx; + } + if (!miss.run_table) merge.gateway_next_table_lut[logical_id][4] = miss.next.next_table_id(); +} diff --git a/backends/tofino/bf-asm/tofino/gateway.h b/backends/tofino/bf-asm/tofino/gateway.h new file mode 100644 index 00000000000..6035a80dab7 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/gateway.h @@ -0,0 +1,42 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TOFINO_GATEWAY_H_ +#define BACKENDS_TOFINO_BF_ASM_TOFINO_GATEWAY_H_ + +#include "backends/tofino/bf-asm/tables.h" + +class Target::Tofino::GatewayTable : public ::GatewayTable { + friend class ::GatewayTable; + GatewayTable(int line, const char *n, gress_t gr, Stage *s, int lid) + : ::GatewayTable(line, n, gr, s, lid) {} + + void pass1() override; + void pass2() override; + void pass3() override; + + bool check_match_key(MatchKey &, const std::vector &, bool) override; + int gw_memory_unit() const override { return layout[0].row * 2 + gw_unit; } + REGSETS_IN_CLASS(Tofino, TARGET_OVERLOAD, void write_next_table_regs, (mau_regs &), override) +}; + +template +void enable_gateway_payload_exact_shift_ovr(REGS ®s, int bus); +template <> +void enable_gateway_payload_exact_shift_ovr(Target::Tofino::mau_regs ®s, int bus); + +#endif /* BACKENDS_TOFINO_BF_ASM_TOFINO_GATEWAY_H_ */ diff --git a/backends/tofino/bf-asm/tofino/input_xbar.cpp b/backends/tofino/bf-asm/tofino/input_xbar.cpp new file mode 100644 index 00000000000..5a3334b5cd4 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/input_xbar.cpp @@ -0,0 +1,80 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/tofino/input_xbar.h" + +template <> +void InputXbar::write_galois_matrix(Target::Tofino::mau_regs ®s, HashTable id, + const std::map &mat) { + int parity_col = -1; + BUG_CHECK(id.type == HashTable::EXACT, "not an exact hash table %d", id.type); + if (hash_table_parity.count(id) && !options.disable_gfm_parity) { + parity_col = hash_table_parity[id]; + } + auto &hash = regs.dp.xbar_hash.hash; + std::set gfm_rows; + for (auto &col : mat) { + int c = col.first; + // Skip parity column encoding, if parity is set overall parity is + // computed later below + if (c == parity_col) continue; + const HashCol &h = col.second; + for (int word = 0; word < 4; word++) { + unsigned data = h.data.getrange(word * 16, 16); + unsigned valid = (h.valid >> word * 2) & 3; + if (data == 0 && valid == 0) continue; + auto &w = hash.galois_field_matrix[id.index * 4 + word][c]; + w.byte0 = data & 0xff; + w.byte1 = (data >> 8) & 0xff; + w.valid0 = valid & 1; + w.valid1 = (valid >> 1) & 1; + gfm_rows.insert(id.index * 4 + word); + } + } + // A GFM row can be shared by multiple tables. In most cases the columns are + // non overlapping but if they are overlapping the GFM encodings must be the + // same (e.g. ATCAM tables). The input xbar has checks to determine which + // cases are valid. + // The parity must be computed for all columns within the row and set into + // the parity column. + if (parity_col >= 0) { + for (auto r : gfm_rows) { + int hp_byte0 = 0, hp_byte1 = 0; + int hp_valid0 = 0, hp_valid1 = 0; + for (auto c = 0; c < 52; c++) { + if (c == parity_col) continue; + auto &w = hash.galois_field_matrix[r][c]; + hp_byte0 ^= w.byte0; + hp_byte1 ^= w.byte1; + hp_valid0 ^= w.valid0; + hp_valid1 ^= w.valid1; + } + auto &w_hp = hash.galois_field_matrix[r][parity_col]; + w_hp.byte0.rewrite(); + w_hp.byte1.rewrite(); + w_hp.valid0.rewrite(); + w_hp.valid1.rewrite(); + w_hp.byte0 = hp_byte0; + w_hp.byte1 = hp_byte1; + w_hp.valid0 = hp_valid0; + w_hp.valid1 = hp_valid1; + } + } +} + +template void InputXbar::write_galois_matrix(Target::Tofino::mau_regs ®s, HashTable id, + const std::map &mat); diff --git a/backends/tofino/bf-asm/tofino/input_xbar.h b/backends/tofino/bf-asm/tofino/input_xbar.h new file mode 100644 index 00000000000..5fcf746e7ce --- /dev/null +++ b/backends/tofino/bf-asm/tofino/input_xbar.h @@ -0,0 +1,27 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TOFINO_INPUT_XBAR_H_ +#define BACKENDS_TOFINO_BF_ASM_TOFINO_INPUT_XBAR_H_ + +#include "backends/tofino/bf-asm/input_xbar.h" + +template <> +void InputXbar::write_galois_matrix(Target::Tofino::mau_regs ®s, HashTable id, + const std::map &mat); + +#endif /* BACKENDS_TOFINO_BF_ASM_TOFINO_INPUT_XBAR_H_ */ diff --git a/backends/tofino/bf-asm/tofino/instruction.cpp b/backends/tofino/bf-asm/tofino/instruction.cpp new file mode 100644 index 00000000000..e2ca6ad7b87 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/instruction.cpp @@ -0,0 +1,56 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* Tofino overloads for instructions #included in instruction.cpp + * WARNING -- this is included in an anonymous namespace, as VLIWInstruction is + * in that anonymous namespace */ + +void VLIWInstruction::write_regs(Target::Tofino::mau_regs ®s, Table *tbl, + Table::Actions::Action *act) { + if (act != tbl->stage->imem_addr_use[tbl->gress][act->addr]) { + LOG3("skipping " << tbl->name() << '.' << act->name << " as its imem is used by " + << tbl->stage->imem_addr_use[tbl->gress][act->addr]->name); + return; + } + LOG2(this); + auto &imem = regs.dp.imem; + int iaddr = act->addr / ACTION_IMEM_COLORS; + int color = act->addr % ACTION_IMEM_COLORS; + unsigned bits = encode(); + BUG_CHECK(slot >= 0); + switch (Phv::reg(slot)->size) { + case 8: + imem.imem_subword8[slot - 64][iaddr].imem_subword8_instr = bits; + imem.imem_subword8[slot - 64][iaddr].imem_subword8_color = color; + imem.imem_subword8[slot - 64][iaddr].imem_subword8_parity = parity(bits) ^ color; + break; + case 16: + imem.imem_subword16[slot - 128][iaddr].imem_subword16_instr = bits; + imem.imem_subword16[slot - 128][iaddr].imem_subword16_color = color; + imem.imem_subword16[slot - 128][iaddr].imem_subword16_parity = parity(bits) ^ color; + break; + case 32: + imem.imem_subword32[slot][iaddr].imem_subword32_instr = bits; + imem.imem_subword32[slot][iaddr].imem_subword32_color = color; + imem.imem_subword32[slot][iaddr].imem_subword32_parity = parity(bits) ^ color; + break; + default: + BUG(); + } + auto &power_ctl = regs.dp.actionmux_din_power_ctl; + phvRead([&](const Phv::Slice &sl) { set_power_ctl_reg(power_ctl, sl.reg.mau_id()); }); +} diff --git a/backends/tofino/bf-asm/tofino/match_table.cpp b/backends/tofino/bf-asm/tofino/match_table.cpp new file mode 100644 index 00000000000..57513269851 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/match_table.cpp @@ -0,0 +1,75 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* mau table template specializations for tofino -- #included directly in match_tables.cpp */ + +template <> +void MatchTable::write_next_table_regs(Target::Tofino::mau_regs ®s, Table *tbl) { + auto &merge = regs.rams.match.merge; + // Copies the values directly from the hit map provided by the compiler directly into the + // map + if (!tbl->get_hit_next().empty()) { + merge.next_table_map_en |= (1U << logical_id); + auto &mp = merge.next_table_map_data[logical_id]; + ubits<8> *map_data[8] = {&mp[0].next_table_map_data0, &mp[0].next_table_map_data1, + &mp[0].next_table_map_data2, &mp[0].next_table_map_data3, + &mp[1].next_table_map_data0, &mp[1].next_table_map_data1, + &mp[1].next_table_map_data2, &mp[1].next_table_map_data3}; + int index = 0; + for (auto &n : tbl->get_hit_next()) *map_data[index++] = n.next_table_id(); + } + + merge.next_table_format_data[logical_id].match_next_table_adr_mask = next_table_adr_mask; + + /** + * Unfortunately for the compiler/driver integration, this register is both required + * to be owned by the compiler and the driver. The driver is responsible for programming + * this register when the default action of a table is specified. The value written + * is the next_table_full of that particular action. + * + * However, the compiler owns this register in the following scenarios: + * 1. For match_with_no_key tables, where the pathway is through the hit pathway, + * the driver does not touch this register, as the values are actually reversed + * 2. For a table that is split into multiple tables, the driver only writes the + * last value. Thus the compiler now sets up this register for all tables + * before this. + */ + merge.next_table_format_data[logical_id].match_next_table_adr_miss_value = + tbl->get_miss_next().next_table_id(); + /** + * The next_table_format_data register is built up of three values: + * - match_next_table_adr_miss_value - Configurable at runtime + * - match_next_table_adr_mask - Static Config + * - match_next_table_adr_default - Static Config + * + * In order to reprogram the register at runtime, the driver must have all three values to + * not require a hardware read, even though only one is truly programmable. Thus in the + * context JSON, we provide the two extra values in an extremely poorly named JSON + * + * ERROR: Driver doesn't read the match_next_table_adr_default + * "default_next_table_mask" - match_next_table_adr_mask + * "default_next_table" - Only required if a table has no default_action specified, which is + * only a Glass value. This could always be 0. Perhaps we can remove from Brig through + * compiler version? + * + */ +} + +template <> +void MatchTable::write_regs(Target::Tofino::mau_regs ®s, int type, Table *result) { + write_common_regs(regs, type, result); +} diff --git a/backends/tofino/bf-asm/tofino/meter.h b/backends/tofino/bf-asm/tofino/meter.h new file mode 100644 index 00000000000..f812ecee37c --- /dev/null +++ b/backends/tofino/bf-asm/tofino/meter.h @@ -0,0 +1,39 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TOFINO_METER_H_ +#define BACKENDS_TOFINO_BF_ASM_TOFINO_METER_H_ + +#include "backends/tofino/bf-asm/tables.h" + +class Target::Tofino::MeterTable : public ::MeterTable { + friend class ::MeterTable; + MeterTable(int line, const char *n, gress_t gr, Stage *s, int lid) + : ::MeterTable(line, n, gr, s, lid) {} +}; + +template <> +void MeterTable::setup_teop_regs(Target::Tofino::mau_regs &, int) { + BUG(); // no teop on tofino +} + +template <> +void MeterTable::write_alu_vpn_range(Target::Tofino::mau_regs &) { + BUG(); // not available on tofino +} + +#endif /* BACKENDS_TOFINO_BF_ASM_TOFINO_METER_H_ */ diff --git a/backends/tofino/bf-asm/tofino/parser.cpp b/backends/tofino/bf-asm/tofino/parser.cpp new file mode 100644 index 00000000000..2ef8f7b510b --- /dev/null +++ b/backends/tofino/bf-asm/tofino/parser.cpp @@ -0,0 +1,1631 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include + +#include "backends/tofino/bf-asm/misc.h" +#include "backends/tofino/bf-asm/parser-tofino-jbay.h" +#include "backends/tofino/bf-asm/target.h" +#include "backends/tofino/bf-asm/top_level.h" + +// ---------------------------------------------------------------------------- +// Slots & Useful constants +// ---------------------------------------------------------------------------- + +// Following constants are used for detection of unused slot (e.g., initial value) and +// minimal/maximal indexes for extractor slots +static unsigned EXTRACT_SLOT_UNUSED = 511; +static unsigned EXTRACT_SLOT_CONSTANT_DIS = 0; +static unsigned EXTRACT_SLOT_CONSTANT_EN = 1; +static unsigned EXTRACT_SLOT_CONSTANT_ZERO = 0; +static unsigned PHV_MIN_INDEX = 0; +static unsigned PHV_MAX_INDEX = 224; +static unsigned TPHV_MIN_INDEX = 256; +static unsigned TPHV_MAX_INDEX = 368; + +/* remapping structure for getting at the config bits for phv output + * programming in a systematic way */ +struct tofino_phv_output_map { + int size; /* 8, 16, or 32 */ + ubits<9> *dst; + ubits_base *src; /* 6 or 8 bits */ + ubits<1> *src_type, *offset_add, *offset_rot; +}; +std::ostream &operator<<(std::ostream &of, const tofino_phv_output_map *om) { + of << om->size << "bit, dst = " << std::to_string(om->dst->value) + << ", src = " << std::to_string(om->src->value); + if (om->src_type) of << ", src_type = " << std::to_string(om->src_type->value); + if (om->offset_add) of << ", offset_add = " << std::to_string(om->offset_add->value); + if (om->offset_rot) of << ", offset_rot = " << std::to_string(om->offset_rot->value); + return of; +} +enum extractor_slots { + /* enum for indexes in the tofino_phv_output_map */ + phv_32b_0, + phv_32b_1, + phv_32b_2, + phv_32b_3, + phv_16b_0, + phv_16b_1, + phv_16b_2, + phv_16b_3, + phv_8b_0, + phv_8b_1, + phv_8b_2, + phv_8b_3, + tofino_phv_output_map_size, +}; + +// PHV use slots: ordered list of slots to try +// +// For example, when trying to find a 32b slot: +// 1. First try the 4 x 32b extractors. +// 2. If that fails, try pairs of 16b extractors. +// 3. If that still fails, finally try the 8b extractors together. +// +// For checksums, allocate in the reverse order as we need to fill +// from the last container back due to a HW bug (see MODEL-210). +// +// FIXME: what does "shift" represent??? +static struct phv_use_slots { + int idx; + unsigned usemask, shift, size; +} phv_32b_slots[] = {{phv_32b_0, 1U << phv_32b_0, 0, 32}, {phv_32b_1, 1U << phv_32b_1, 0, 32}, + {phv_32b_2, 1U << phv_32b_2, 0, 32}, {phv_32b_3, 1U << phv_32b_3, 0, 32}, + {phv_16b_0, 3U << phv_16b_0, 16, 16}, {phv_16b_2, 3U << phv_16b_2, 16, 16}, + {phv_8b_0, 0xfU << phv_8b_0, 24, 8}, {0, 0, 0, 0}}, + phv_16b_slots[] = {{phv_16b_0, 1U << phv_16b_0, 0, 16}, + {phv_16b_1, 1U << phv_16b_1, 0, 16}, + {phv_16b_2, 1U << phv_16b_2, 0, 16}, + {phv_16b_3, 1U << phv_16b_3, 0, 16}, + {phv_8b_0, 3U << phv_8b_0, 8, 8}, + {phv_8b_2, 3U << phv_8b_2, 8, 8}, + {0, 0, 0, 0}}, + phv_8b_slots[] = {{phv_8b_0, 1U << phv_8b_0, 0, 8}, + {phv_8b_1, 1U << phv_8b_1, 0, 8}, + {phv_8b_2, 1U << phv_8b_2, 0, 8}, + {phv_8b_3, 1U << phv_8b_3, 0, 8}, + {0, 0, 0, 0}}, + phv_32b_csum_slots[] = {{phv_32b_3, 1U << phv_32b_3, 0, 32}, + {phv_32b_2, 1U << phv_32b_2, 0, 32}, + {phv_32b_1, 1U << phv_32b_1, 0, 32}, + {phv_32b_0, 1U << phv_32b_0, 0, 32}, + {phv_16b_2, 3U << phv_16b_2, 16, 16}, + {phv_16b_0, 3U << phv_16b_0, 16, 16}, + {phv_8b_0, 0xfU << phv_8b_0, 24, 8}, + {0, 0, 0, 0}}, + phv_16b_csum_slots[] = {{phv_16b_3, 1U << phv_16b_3, 0, 16}, + {phv_16b_2, 1U << phv_16b_2, 0, 16}, + {phv_16b_1, 1U << phv_16b_1, 0, 16}, + {phv_16b_0, 1U << phv_16b_0, 0, 16}, + {phv_8b_2, 3U << phv_8b_2, 8, 8}, + {phv_8b_0, 3U << phv_8b_0, 8, 8}, + {0, 0, 0, 0}}, + phv_8b_csum_slots[] = {{phv_8b_3, 1U << phv_8b_3, 0, 8}, + {phv_8b_2, 1U << phv_8b_2, 0, 8}, + {phv_8b_1, 1U << phv_8b_1, 0, 8}, + {phv_8b_0, 1U << phv_8b_0, 0, 8}, + {0, 0, 0, 0}}; + +static phv_use_slots *get_phv_use_slots(int size) { + phv_use_slots *usable_slots = nullptr; + + if (size == 32) + usable_slots = phv_32b_slots; + else if (size == 16) + usable_slots = phv_16b_slots; + else if (size == 8) + usable_slots = phv_8b_slots; + else + BUG(); + + return usable_slots; +} + +static phv_use_slots *get_phv_csum_use_slots(int size) { + phv_use_slots *usable_slots = nullptr; + + if (size == 32) + usable_slots = phv_32b_csum_slots; + else if (size == 16) + usable_slots = phv_16b_csum_slots; + else if (size == 8) + usable_slots = phv_8b_csum_slots; + else + BUG(); + + return usable_slots; +} + +// ---------------------------------------------------------------------------- +// Helping classes +// ---------------------------------------------------------------------------- + +/// Helping cache to remember values for a different parser objects +/// based on the type of extraction size. The storage is done into two +/// layers and user is free to specify the values of layer 1 and layer 2 +/// types. The third type specifies the return value +template +class TwoLevelCache { + std::map> m_cache; + + public: + void insert(const T1 key1, T2 key2, T3 val) { m_cache[key1][key2] = val; } + + bool has(const T1 key1, T2 key2) const { + if (!m_cache.count(key1)) return false; + auto level1 = m_cache.at(key1); + + return level1.count(key2); + } + + T3 get(const T1 key1, T2 key2) const { return m_cache.at(key1).at(key2); } +}; + +/** + * @brief This class is used for internal tracking of Tofino output map + * extractor allocation. It is beneficial during the debugging process of this + * functionality because it can provid the answer to question: + * "What is alloacted into this extractor slot?" + */ +class MatchSlotTracker { + using SetMap = + TwoLevelCache; + using SaveMap = + TwoLevelCache; + using CsumMap = TwoLevelCache; + using PaddingMap = TwoLevelCache; + + public: + // Helping caches for tracking of slot occupancy mapping + SetMap setMap; + SaveMap saveMap; + CsumMap csumMap; + PaddingMap padMap; + + /** + * @brief Get the db slots object + * + * @param match Match line to dump + * @param slot_idx Passed index of slot which needs to be dumped + * @return std::string object with dumped data + */ + std::string get_db_slots(const Parser::State::Match *match, const int slot_idx) const { + std::stringstream ss; + ss << "Mapping for state " << match->state->name; + if (match->match) ss << ", match " << match->match; + ss << ", slot " << slot_idx << ": "; + + if (setMap.has(match, slot_idx)) { + auto set = setMap.get(match, slot_idx); + ss << "set, " << set->where; + } else if (saveMap.has(match, slot_idx)) { + auto save = saveMap.get(match, slot_idx); + ss << "save, " << save->where; + } else if (csumMap.has(match, slot_idx)) { + auto csum = csumMap.get(match, slot_idx); + ss << "csum, " << csum->dest; + } else if (padMap.has(match, slot_idx)) { + ss << "fake extraction padding, " << padMap.get(match, slot_idx); + } else { + ss << ""; + } + + return ss.str(); + } +}; + +static MatchSlotTracker matchSlotTracker; + +// ---------------------------------------------------------------------------- +// Parser configuration dump +// ---------------------------------------------------------------------------- + +template <> +void Parser::Checksum::write_config(Target::Tofino::parser_regs ®s, Parser *parser) { + if (unit == 0) + write_tofino_row_config(regs.memory[gress].po_csum_ctrl_0_row[addr]); + else if (unit == 1) + write_tofino_row_config(regs.memory[gress].po_csum_ctrl_1_row[addr]); + else + error(lineno, "invalid unit for parser checksum"); +} + +template <> +void Parser::CounterInit::write_config(Target::Tofino::parser_regs ®s, gress_t gress, int idx) { + auto &ctr_init_ram = regs.memory[gress].ml_ctr_init_ram[idx]; + ctr_init_ram.add = add; + ctr_init_ram.mask = mask; + ctr_init_ram.rotate = rot; + ctr_init_ram.max = max; + ctr_init_ram.src = src; +} + +template <> +void Parser::RateLimit::write_config(::Tofino::regs_pipe ®s, gress_t gress) { + if (gress == INGRESS) { + auto &ctrl = regs.pmarb.parb_reg.parb_group.i_output_rate_ctrl; + ctrl.ratectrl_inc = inc; + ctrl.ratectrl_dec = dec; + ctrl.ratectrl_max = max; + ctrl.ratectrl_ena = 1; + } else if (gress == EGRESS) { + auto &ctrl = regs.pmarb.parb_reg.parb_group.e_output_rate_ctrl; + ctrl.ratectrl_inc = inc; + ctrl.ratectrl_dec = dec; + ctrl.ratectrl_max = max; + ctrl.ratectrl_ena = 1; + } +} + +template <> +void Parser::State::Match::write_lookup_config(Target::Tofino::parser_regs ®s, State *state, + int row) const { + auto &word0 = regs.memory[state->gress].ml_tcam_row_word0[row]; + auto &word1 = regs.memory[state->gress].ml_tcam_row_word1[row]; + match_t lookup = {0, 0}; + unsigned dont_care = 0; + for (int i = 0; i < 4; i++) { + lookup.word0 <<= 8; + lookup.word1 <<= 8; + dont_care <<= 8; + if (state->key.data[i].bit >= 0) { + lookup.word0 |= ((match.word0 >> state->key.data[i].bit) & 0xff); + lookup.word1 |= ((match.word1 >> state->key.data[i].bit) & 0xff); + } else { + dont_care |= 0xff; + } + } + lookup.word0 |= dont_care; + lookup.word1 |= dont_care; + word0.lookup_16 = (lookup.word0 >> 16) & 0xffff; + word1.lookup_16 = (lookup.word1 >> 16) & 0xffff; + word0.lookup_8[0] = (lookup.word0 >> 8) & 0xff; + word1.lookup_8[0] = (lookup.word1 >> 8) & 0xff; + word0.lookup_8[1] = lookup.word0 & 0xff; + word1.lookup_8[1] = lookup.word1 & 0xff; + word0.curr_state = state->stateno.word0; + word1.curr_state = state->stateno.word1; + if (state->key.ctr_zero >= 0) { + word0.ctr_zero = (match.word0 >> state->key.ctr_zero) & 1; + word1.ctr_zero = (match.word1 >> state->key.ctr_zero) & 1; + } else { + word0.ctr_zero = word1.ctr_zero = 1; + } + + if (state->key.ctr_neg >= 0) { + word0.ctr_neg = (match.word0 >> state->key.ctr_neg) & 1; + word1.ctr_neg = (match.word1 >> state->key.ctr_neg) & 1; + } else { + word0.ctr_neg = word1.ctr_neg = 1; + } + + word0.ver_0 = word1.ver_0 = 1; + word0.ver_1 = word1.ver_1 = 1; +} + +/* FIXME -- combine these next two methods into a single method on MatchKey */ +/* FIXME -- factor Tofino/JBay variation better (most is common) */ +template <> +int Parser::State::write_lookup_config(Target::Tofino::parser_regs ®s, Parser *pa, State *state, + int row, const std::vector &prev) { + LOG2("-- checking match from state " << name << " (" << stateno << ')'); + auto &ea_row = regs.memory[gress].ml_ea_row[row]; + int max_off = -1; + for (int i = 0; i < 4; i++) { + if (i == 1) continue; + if (key.data[i].bit < 0) continue; + bool set = true; + for (State *p : prev) { + if (p->key.data[i].bit >= 0) { + set = false; + if (p->key.data[i].byte != key.data[i].byte) + error(p->lineno, + "Incompatible match fields between states " + "%s and %s, triggered from state %s", + name.c_str(), p->name.c_str(), state->name.c_str()); + } + } + if (set && key.data[i].byte != MatchKey::USE_SAVED) { + int off = key.data[i].byte + ea_row.shift_amt; + if (off < 0 || off >= 32) { + error(key.lineno, + "Match offset of %d in state %s out of range " + "for previous state %s", + key.data[i].byte, name.c_str(), state->name.c_str()); + } else if (i) { + ea_row.lookup_offset_8[(i - 2)] = off; + ea_row.ld_lookup_8[(i - 2)] = 1; + max_off = std::max(max_off, off); + } else { + ea_row.lookup_offset_16 = off; + ea_row.ld_lookup_16 = 1; + max_off = std::max(max_off, off + 1); + } + } + } + return max_off; +} + +template <> +int Parser::State::Match::write_load_config(Target::Tofino::parser_regs ®s, Parser *pa, + State *state, int row) const { + auto &ea_row = regs.memory[state->gress].ml_ea_row[row]; + int max_off = -1; + for (int i = 0; i < 4; i++) { + if (i == 1) continue; + if (load.data[i].bit < 0) continue; + if (load.data[i].byte != MatchKey::USE_SAVED) { + int off = load.data[i].byte; + if (off < 0 || off >= 32) { + error(load.lineno, "Load offset of %d in state %s out of range", load.data[i].byte, + state->name.c_str()); + } else if (i) { + ea_row.lookup_offset_8[(i - 2)] = off; + ea_row.ld_lookup_8[(i - 2)] = 1; + max_off = std::max(max_off, off); + } else { + ea_row.lookup_offset_16 = off; + ea_row.ld_lookup_16 = 1; + max_off = std::max(max_off, off + 1); + } + } + } + return max_off; +} + +// Narrow-to-wide extraction alignment needs adjusting when +// 8b/16b checksum validations are written in the same cycle +bool adjust_phv_use_slot(phv_use_slots &slot, int size, int csum_8b, int csum_16b) { + if ((size == 32 && slot.idx >= phv_16b_0) || (size == 16 && slot.idx >= phv_8b_0)) { + if (slot.idx <= phv_16b_3) { + slot.idx -= csum_16b; + slot.usemask >>= csum_16b; + return slot.idx >= phv_16b_0; + } else { + slot.idx -= csum_8b; + slot.usemask >>= csum_8b; + return slot.idx >= phv_8b_0; + } + } + return true; +} + +template <> +void Parser::Checksum::write_output_config(Target::Tofino::parser_regs ®s, Parser *pa, + State::Match *ma, void *_map, unsigned &used) const { + if (type != 0 || !dest) return; + + // checksum verification requires the last extractor to be a dummy (to work around a RTL bug) + // see MODEL-210 for discussion. + + tofino_phv_output_map *map = reinterpret_cast(_map); + + phv_use_slots *usable_slots = get_phv_csum_use_slots(dest->reg.size); + + auto &slot = usable_slots[0]; + + auto id = dest->reg.parser_id(); + *map[slot.idx].dst = id; + matchSlotTracker.csumMap.insert(ma, slot.idx, this); + // The source address is checked for source extract errors whenever the dest + // is not 511. To prevent errors when buf_req = 0 (corresponding to states with no extracts), + // point the source to the version area of the source range which is always valid. + *map[slot.idx].src = PARSER_SRC_MAX_IDX - (dest->reg.size / 8) + 1; + used |= slot.usemask; + + pa->phv_allow_bitwise_or[id] = 1; +} + +template <> +int Parser::State::Match::Save::write_output_config(Target::Tofino::parser_regs ®s, void *_map, + unsigned &used, int csum_8b, + int csum_16b) const { + tofino_phv_output_map *map = reinterpret_cast(_map); + + int slot_size = (hi - lo + 1) * 8; + phv_use_slots *usable_slots = get_phv_use_slots(slot_size); + + for (int i = 0; usable_slots[i].usemask; i++) { + auto slot = usable_slots[i]; + if (!adjust_phv_use_slot(slot, where->reg.size, csum_8b, csum_16b)) continue; + if (used & slot.usemask) continue; + if ((flags & ROTATE) && !map[slot.idx].offset_rot) continue; + + if ((where->reg.size == 32 && slot.idx >= phv_16b_0) || + (where->reg.size == 16 && slot.idx >= phv_8b_0)) { + match->has_narrow_to_wide_extract = true; + + if (where->reg.size == 32 && slot.idx == phv_8b_0) { + match->narrow_to_wide_32b_8.push_back(&where); + } else if (where->reg.size == 32 && slot.idx >= phv_16b_0) { + match->narrow_to_wide_32b_16.push_back(&where); + } else { + match->narrow_to_wide_16b_8.push_back(&where); + } + } + + // swizzle upper/lower pairs of extractors for 4x8->32 + // a 32b value using 8b extractors must use the extractors in this order: [2 3 0 1] + bool swizzle_b1 = where->reg.size == 32 && slot.idx == phv_8b_0; + + int byte = lo; + for (int i = slot.idx; slot.usemask & (1U << i); i++, byte += slot.size / 8U) { + int x = i; + if (swizzle_b1) x ^= 2; + + *map[x].dst = where->reg.parser_id(); + *map[x].src = byte; + matchSlotTracker.saveMap.insert(match, x, this); + if (flags & OFFSET) *map[x].offset_add = 1; + if (flags & ROTATE) *map[x].offset_rot = 1; + } + used |= slot.usemask; + return hi; + } + error(where.lineno, "Ran out of phv output extractor slots"); + return -1; +} + +bool can_slot_extract_constant(int slot) { + return slot != phv_16b_2 && slot != phv_16b_3 && slot != phv_32b_2 && slot != phv_32b_3; +} + +/** + * @brief Encode constant @p val for use with extractor slot @p slot. + * + * @param slot Valid value of enum extractor_slot + * @param val Constant to encode + * @return int The encoded constant, or -1 if given @p slot cannot extract a constant or is not a + * valid value of enum extractor_slot + */ +static int encode_constant_for_slot(int slot, unsigned val) { + if (!can_slot_extract_constant(slot)) return -1; + if (val == 0) return val; + switch (slot) { + case phv_32b_0: + case phv_32b_1: + for (int i = 0; i < 32; i++) { + if ((val & 1) && (0x7 & val) == val) return (i << 3) | val; + val = ((val >> 1) | (val << 31)) & 0xffffffffU; + } + return -1; + case phv_16b_0: + case phv_16b_1: + if ((val >> 16) && encode_constant_for_slot(slot, val >> 16) < 0) return -1; + val &= 0xffff; + for (int i = 0; i < 16; i++) { + if ((val & 1) && (0xf & val) == val) return (i << 4) | val; + val = ((val >> 1) | (val << 15)) & 0xffffU; + } + return -1; + case phv_8b_0: + case phv_8b_1: + case phv_8b_2: + case phv_8b_3: + return val & 0xff; + default: + BUG(); + return -1; + } +} + +template <> +void Parser::State::Match::Set::write_output_config(Target::Tofino::parser_regs ®s, void *_map, + unsigned &used, int csum_8b, + int csum_16b) const { + tofino_phv_output_map *map = reinterpret_cast(_map); + + phv_use_slots *usable_slots = get_phv_use_slots(where->reg.size); + + for (int i = 0; usable_slots[i].usemask; i++) { + auto slot = usable_slots[i]; + if (!adjust_phv_use_slot(slot, where->reg.size, csum_8b, csum_16b)) continue; + if (used & slot.usemask) continue; + if (!map[slot.idx].src_type) continue; + if ((flags & ROTATE) && (!map[slot.idx].offset_rot || slot.shift)) continue; + unsigned shift = 0; + bool can_encode = true; + for (int i = slot.idx; slot.usemask & (1U << i); i++) { + if (encode_constant_for_slot(i, (what << where->lo) >> shift) < 0) { + can_encode = false; + break; + } + shift += slot.size; + } + if (!can_encode) continue; + + if ((where->reg.size == 32 && slot.idx >= phv_16b_0) || + (where->reg.size == 16 && slot.idx >= phv_8b_0)) { + match->has_narrow_to_wide_extract = true; + + if (where->reg.size == 32 && slot.idx == phv_8b_0) { + match->narrow_to_wide_32b_8.push_back(&where); + } else if (where->reg.size == 32 && slot.idx >= phv_16b_0) { + match->narrow_to_wide_32b_16.push_back(&where); + } else { + match->narrow_to_wide_16b_8.push_back(&where); + } + } + + // swizzle upper/lower pairs of extractors for 4x8->32 + // a 32b value using 8b extractors must use the extractors in this order: [2 3 0 1] + bool swizzle_b1 = where->reg.size == 32 && slot.idx == phv_8b_0; + + // Go from most- to least-significant slice + shift = where->reg.size - slot.size; + for (int i = slot.idx; slot.usemask & (1U << i); i++) { + int x = i; + if (swizzle_b1) x ^= 2; + + *map[x].dst = where->reg.parser_id(); + *map[x].src_type = 1; + auto v = encode_constant_for_slot(x, (what << where->lo) >> shift); + *map[x].src = v; + matchSlotTracker.setMap.insert(match, x, this); + if (flags & OFFSET) *map[x].offset_add = 1; + if (flags & ROTATE) *map[x].offset_rot = 1; + shift -= slot.size; + } + used |= slot.usemask; + return; + } + error(where.lineno, "Ran out of phv output extractor slots"); +} + +/** Tofino1-specific output map management + * Tofino1 has separate 8- 16- and 32-bit extractors with various limitations on extracting + * constants and capability of ganging extractors to extract larger PHVs or extrating adjacent + * pairs of smaller PHVs. They're also addressed via named registers rather than an array, + * so we build an array of pointers into the reg object to simplify things. The `used` + * value ends up begin a simple 12-bit bitmap with 1 bit for each extractor. + */ + +#define OUTPUT_MAP_INIT(MAP, ROW, SIZE, INDEX) \ + MAP[phv_##SIZE##b_##INDEX].size = SIZE; \ + MAP[phv_##SIZE##b_##INDEX].dst = &ROW.phv_##SIZE##b_dst_##INDEX; \ + MAP[phv_##SIZE##b_##INDEX].src = &ROW.phv_##SIZE##b_src_##INDEX; \ + MAP[phv_##SIZE##b_##INDEX].src_type = &ROW.phv_##SIZE##b_src_type_##INDEX; \ + MAP[phv_##SIZE##b_##INDEX].offset_add = &ROW.phv_##SIZE##b_offset_add_dst_##INDEX; \ + MAP[phv_##SIZE##b_##INDEX].offset_rot = &ROW.phv_##SIZE##b_offset_rot_imm_##INDEX; +#define OUTPUT_MAP_INIT_PART(MAP, ROW, SIZE, INDEX) \ + MAP[phv_##SIZE##b_##INDEX].size = SIZE; \ + MAP[phv_##SIZE##b_##INDEX].dst = &ROW.phv_##SIZE##b_dst_##INDEX; \ + MAP[phv_##SIZE##b_##INDEX].src = &ROW.phv_##SIZE##b_src_##INDEX; \ + MAP[phv_##SIZE##b_##INDEX].src_type = 0; \ + MAP[phv_##SIZE##b_##INDEX].offset_add = &ROW.phv_##SIZE##b_offset_add_dst_##INDEX; \ + MAP[phv_##SIZE##b_##INDEX].offset_rot = 0; + +template <> +void *Parser::setup_phv_output_map(Target::Tofino::parser_regs ®s, gress_t gress, int row) { + static tofino_phv_output_map map[tofino_phv_output_map_size]; + auto &action_row = regs.memory[gress].po_action_row[row]; + OUTPUT_MAP_INIT(map, action_row, 32, 0) + OUTPUT_MAP_INIT(map, action_row, 32, 1) + OUTPUT_MAP_INIT_PART(map, action_row, 32, 2) + OUTPUT_MAP_INIT_PART(map, action_row, 32, 3) + OUTPUT_MAP_INIT(map, action_row, 16, 0) + OUTPUT_MAP_INIT(map, action_row, 16, 1) + OUTPUT_MAP_INIT_PART(map, action_row, 16, 2) + OUTPUT_MAP_INIT_PART(map, action_row, 16, 3) + OUTPUT_MAP_INIT(map, action_row, 8, 0) + OUTPUT_MAP_INIT(map, action_row, 8, 1) + OUTPUT_MAP_INIT(map, action_row, 8, 2) + OUTPUT_MAP_INIT(map, action_row, 8, 3) + return map; +} + +template <> +void Parser::mark_unused_output_map(Target::Tofino::parser_regs ®s, void *_map, unsigned used) { + tofino_phv_output_map *map = reinterpret_cast(_map); + for (int i = 0; i < tofino_phv_output_map_size; i++) + if (!(used & (1U << i))) *map[i].dst = 0x1ff; +} + +template <> +void Parser::State::Match::HdrLenIncStop::write_config( + Tofino::memories_all_parser_::_po_action_row &) const { + BUG(); // no hdr_len_inc_stop on tofino; should not get here +} + +template <> +void Parser::State::Match::Clot::write_config(Tofino::memories_all_parser_::_po_action_row &, int, + bool) const { + BUG(); // no CLOTs on tofino; should not get here +} + +template <> +void Parser::State::Match::write_counter_config( + Target::Tofino::parser_regs::_memory::_ml_ea_row &ea_row) const { + ea_row.ctr_amt_idx = ctr_instr ? ctr_instr->addr : ctr_imm_amt; + ea_row.ctr_ld_src = ctr_ld_src; + ea_row.ctr_load = ctr_load; +} + +template +void init_common_regs(Parser *p, COMMON ®s, gress_t gress) { + // TODO: fixed config copied from compiler -- needs to be controllable + for (int i = 0; i < 4; i++) { + if (p->start_state[i]) { + regs.start_state.state[i] = p->start_state[i]->stateno.word1; + regs.enable_.enable_[i] = 1; + } + regs.pri_start.pri[i] = p->priority[i]; + regs.pri_thresh.pri[i] = p->pri_thresh[i]; + } + regs.mode = 4; + regs.max_iter.max = 128; + if (p->parser_error.lineno >= 0) { + regs.err_phv_cfg.dst = p->parser_error->reg.parser_id(); + regs.err_phv_cfg.aram_mbe_en = 1; + regs.err_phv_cfg.ctr_range_err_en = 1; + regs.err_phv_cfg.dst_cont_err_en = 1; + regs.err_phv_cfg.fcs_err_en = 1; + regs.err_phv_cfg.multi_wr_err_en = 1; + regs.err_phv_cfg.no_tcam_match_err_en = 1; + regs.err_phv_cfg.partial_hdr_err_en = 1; + regs.err_phv_cfg.phv_owner_err_en = 1; + regs.err_phv_cfg.src_ext_err_en = 1; + regs.err_phv_cfg.timeout_cycle_err_en = 1; + regs.err_phv_cfg.timeout_iter_err_en = 1; + } +} + +enum class AnalysisType { BIT8, BIT16 }; +using extractor_slots_list = std::initializer_list; +const extractor_slots_list phv_8bit_extractors = {phv_8b_0, phv_8b_1, phv_8b_2, phv_8b_3}; +const extractor_slots_list phv_16bit_extractors = {phv_16b_0, phv_16b_1, phv_16b_2, phv_16b_3}; +// Declare a helping type for the count cache class +using ExtractionCountCache = TwoLevelCache; + +/// Count the number of extractions for a given @p match. +/// The method takes the @p elems list which holds PHV indexes to check +/// (accepted lists are @p phv_8bit_extractors and @p phv_16_bit_extractors). +int count_number_of_extractions(Parser *parser, Target::Tofino::parser_regs ®s, + Parser::State::Match *match, const AnalysisType type) { + int used = 0; + int row = parser->match_to_row.at(match); + auto map = reinterpret_cast( + parser->setup_phv_output_map(regs, parser->gress, row)); + + auto elems = type == AnalysisType::BIT8 ? phv_8bit_extractors : phv_16bit_extractors; + for (auto i : elems) { + if (map[i].dst->value != EXTRACT_SLOT_UNUSED) { + used++; + } + } + + return used; +} + +/// Pad collector object which provides mapping from a narrow-to-wide match +/// to added padding +class PaddingInfoCollector { + public: + struct PadInfo { + /// The number of added extractors to work correctly + int m_count8; + int m_count16; + + PadInfo() { + m_count8 = 0; + m_count16 = 0; + } + + void add(const AnalysisType type, const int val) { + if (type == AnalysisType::BIT8) { + m_count8 += val; + } else { + m_count16 += val; + } + } + }; + + /// Information for one parser state where the padding + // is being added + struct PadState { + /// Added padding information into parser states (successors or predecessors) + std::map m_padding; + + void addPadInfo(Parser::State::Match *match, AnalysisType pad, int count) { + if (count == 0) return; + + if (!m_padding.count(match)) { + m_padding[match] = new PadInfo; + } + + m_padding[match]->add(pad, count); + } + + bool hasPadInfo() { return m_padding.size() != 0; } + + void print() { + std::stringstream message; + message << " Pad State Info : " << std::endl; + for (auto &m : m_padding) { + message << " \t State(match) : " << m.first->state->name << "(" << m.first->match + << ")" << " -> { m_count8 : " << m.second->m_count8 + << ", m_count16 : " << m.second->m_count16 << " }" << std::endl; + } + LOG1(message.str()); + } + }; + + PadState *getPadState(Parser::State::Match *match) { + if (!m_nrw_matches.count(match)) { + m_nrw_matches[match] = new PadState; + } + + return m_nrw_matches[match]; + } + + void printPadInfo() { + for (auto s : m_nrw_matches) { + auto nrw_match = s.first; + auto info_collector = s.second; + // Skip the info if we don't have any stored padding + if (!info_collector->hasPadInfo()) { + continue; + } + + std::stringstream message; + message << "State " << nrw_match->state->name; + if (nrw_match->match == true) { + message << ", match " << nrw_match->match; + } + + message << " is using the narrow-to-wide extraction: " << std::endl; + + if (nrw_match->narrow_to_wide_32b_16.size() != 0) { + message << "\t* 32 bit extractors are replaced by 2 x 16 bit extractors: "; + for (auto ref : nrw_match->narrow_to_wide_32b_16) { + message << ref->name() << " "; + } + message << std::endl; + } + + if (nrw_match->narrow_to_wide_32b_8.size() != 0) { + message << "\t* 32 bit extractors are replaced by 4 x 8 bit extractors: "; + for (auto ref : nrw_match->narrow_to_wide_32b_8) { + message << ref->name() << " "; + } + message << std::endl; + } + + if (nrw_match->narrow_to_wide_16b_8.size() != 0) { + message << "\t* 16 bit extractors are replaced by 2 x 8 bit extractors: "; + for (auto ref : nrw_match->narrow_to_wide_16b_8) { + message << ref->name() << " "; + } + message << std::endl; + } + + message + << "The following extractions need to be added to parser states to work correctly:" + << std::endl; + + for (auto pad : info_collector->m_padding) { + auto match = pad.first; + auto pad_info = pad.second; + + message << "\t* State " << match->state->name; + if (match->match == true) { + message << ", match " << match->match; + } + + message << " needs " << pad_info->m_count8 << " x 8 bit and " << pad_info->m_count16 + << " x 16 bit extractions to be added" << std::endl; + } + + LOG1("WARNING: " << message.str()); + } + } + + private: + /// This provides mapping between the narrow-to-wide (NRW) match and collected + /// padding information + std::map m_nrw_matches; +}; + +/// Size of internal parser FIFO +static const int parser_fifo_size = 32; + +/// Compute the @p val / @p div and ceil it to the nearest upper value. The result +/// will be wrapped to the FIFO size in parser. +int ceil_and_wrap_to_fifo_size(int val, int div) { + int fifo_items = val > parser_fifo_size ? parser_fifo_size : val; + return (fifo_items + div - 1) / div; +} + +int analyze_worst_extractor_path(Parser *parser, Target::Tofino::parser_regs ®s, + Parser::State::Match *match, AnalysisType type, + std::set &visited, ExtractionCountCache &cache) { + if (visited.count(match->state)) { + // We have found a node in a loop --> we will get via our predessors into the same state. + // This means that we can take this loop many times and that we need to distribute the + // maximal FIFO value to our parets (we are predecessors of our parents). + // + // IN SUCH CASE THE NUMBER OF EXTRACTIONS FOR ALL SUCCESSORS DOESN'T CATCH + // THE REALITY. IT IS JUST FOR THE SIMULATION OF FULL PARSER FIFO BLOCKS. + // REALITY IS COVERED WHEN THE GRAPH DOESN'T HAVE LOOPS + return parser_fifo_size; + } + + if (LOGGING(3)) { + std::stringstream ss; + ss << "Processing match " << match->state->name << ", gress = " << match->state->gress; + if (match->match) { + ss << ", match " << match->match; + } + LOG3(ss.str()); + } + + // Check the cache if we know the result + if (cache.has(match, type)) { + return cache.get(match, type); + } + + // Mark node as visited and run the analysis + visited.insert(match->state); + int extractions = count_number_of_extractions(parser, regs, match, type); + int pred_extractions = 0; + for (auto pred : match->state->pred) { + pred_extractions = + std::max(pred_extractions, + analyze_worst_extractor_path(parser, regs, pred, type, visited, cache)); + } + + // Insert the result into the cache and unmark the node as visited + visited.erase(match->state); + int extraction_result = extractions + pred_extractions; + cache.insert(match, type, extraction_result); + + return extraction_result; +} + +/** + * @brief Dump the occupancy of extraction slots in output map + * + * @param match Current match which is being processed + * @param indexes Indexes to inspect + * @param prefix Prefix to add before the print + */ +static void print_slot_occupancy(const Parser::State::Match *match, + const std::initializer_list indexes, + const std::string prefix = "") { + // Print the prefix if not empty, iterate over checked indexes and + // print slot occupancy information. + std::stringstream ss; + std::string sep; + if (prefix != "") { + ss << prefix << " : " << std::endl; + sep = "\t* "; + } + + for (auto idx : indexes) { + ss << sep << matchSlotTracker.get_db_slots(match, idx) << std::endl; + } + + auto output = ss.str(); + if (output.size() == 0) return; + LOG5(output); +} + +/** + * @brief Set the @p pad_idx or @p from_idx based on the used extractor scenario + * + * @param pad_idx Input/output for padding index + * @param from_idx Input/output for source index + * @param used Number of used extractors + * @param has_csum State is using the VERIFY checksum + * @param match State match which is being processed + * @param map Pointer on the output map configuration + */ +static void set_idx_for_16b_extractions(unsigned &pad_idx, unsigned &from_idx, const unsigned used, + const bool has_csum, const Parser::State::Match *match, + struct tofino_phv_output_map *map) { + if (used == 1) { + // One extractor is being used and the index is stored in from_idx. The allocation + // strategy here is to keep data in tuples {0,1} and {2,3}. + if (from_idx == phv_16b_0 || from_idx == phv_16b_2) { + pad_idx = from_idx + 1; + } else if (from_idx == phv_16b_1 || from_idx == phv_16b_3) { + pad_idx = from_idx - 1; + } else { + // We should never reach this point + error(match->lineno, + "Cannot identify index for 16bit extractor padding (1 extractor)!"); + } + } else { + // Three extractors are used and the unused extractor index is stored in + // the pad_idx variable. We can keep indexes in tuples + if (has_csum) { + from_idx = phv_16b_3; + } else if (pad_idx == phv_16b_0 || pad_idx == phv_16b_2) { + from_idx = pad_idx + 1; + } else if (pad_idx == phv_16b_1 || pad_idx == phv_16b_3) { + from_idx = pad_idx - 1; + } else { + // We should never reach this point + error(match->lineno, + "Cannot identify index for 16bit extractor padding (3 extractors)!"); + } + } +} + +/** + * @brief Verify all invariants for the extractor padding configuration + * + * @param pad_idx Input/output for padding index + * @param from_idx Input/output for source index + * @param used Number of used extractors + * @param has_csum State is using the VERIFY checksum + * @param match State match which is being processed + * @param map Pointer on the output map configuration + */ +static void check_16b_extractor_configuration(const unsigned pad_idx, const unsigned from_idx, + const unsigned used, const bool has_csum, + const struct tofino_phv_output_map *map) { + // 1] Indexes are kept in tuples {0,1} and {2,3}. Checksum means that from_idx is + // set to the last extractor. + bool csum = has_csum && (from_idx == phv_16b_3); + bool first_tuple = (from_idx == phv_16b_0 || from_idx == phv_16b_1) && (pad_idx <= phv_16b_1); + bool second_tuple = (from_idx == phv_16b_2 || from_idx == phv_16b_3) && (pad_idx >= phv_16b_2); + BUG_CHECK(has_csum || first_tuple || second_tuple, + "Source and destination index are not configured correctly for 16bit 2n extractor " + "padding!"); + + // 2] All indexes are sourced from global version field which is tied to zeros. + // The Checksum case means that we need to set the from index on the last 16b extractor + BUG_CHECK(map[pad_idx].dst->value != EXTRACT_SLOT_UNUSED, + "Invalid extractor destination for 16bit 2n padding!"); + if (has_csum) { + BUG_CHECK(from_idx == phv_16b_3, + "Invalid from_idx for the 16bit 2n padding with checksum!"); + } + + // Check the slot configuration - sourcing from global field and no constant for {0,1} + BUG_CHECK( + *map[pad_idx].src >= PARSER_SRC_MAX_IDX - 3 && *map[pad_idx].src != EXTRACT_SLOT_UNUSED, + "Field is not sourcing from the global version field!"); + if (pad_idx == phv_16b_0 || pad_idx == phv_16b_1) { + BUG_CHECK(*map[pad_idx].src_type == 0, + "Invalid configuration of the source type for 16b 2n padding!"); + } +} + +// +// uArch for Tofino Parser: +// * Parser Output section +// * Checksum section +// * Parse Merge section + +/** + * @brief Perform the 16b padding onto map and computed index. + * + * @param regs Register configuration instance + * @param map Tofino output map pointer + * @param pad_idx Index which needs to be padded + * @param from_idx Index which is the source of the slot configuration + */ +static void do_16b_padding(Target::Tofino::parser_regs ®s, tofino_phv_output_map *map, + const unsigned pad_idx, const unsigned from_idx) { + // Add fake extractors to reach 2n constraint, we need to copy destination and source from + // the global version field which is tied to zeros in RTL. + // We are keeping both indexes in tuples {0,1} or {2,3}. + map[pad_idx].dst->rewrite(); + *map[pad_idx].dst = *map[from_idx].dst; + *map[pad_idx].src = PARSER_SRC_MAX_IDX - 1; + if (pad_idx < phv_16b_2) { + // Even though extractors {0, 1} can extract from constants, we want to extract + // from the tied-to-zero global version field for consistency across all 16b extractors + *map[pad_idx].src_type = EXTRACT_SLOT_CONSTANT_DIS; + } + + // Mark the dummy write dest as multi-write, we need to distinguish between PHV and TPHV + if (*map[from_idx].dst >= PHV_MIN_INDEX && *map[from_idx].dst < PHV_MAX_INDEX) { + regs.ingress.prsr_reg.no_multi_wr.nmw[*map[from_idx].dst].rewrite(); + regs.egress.prsr_reg.no_multi_wr.nmw[*map[from_idx].dst].rewrite(); + + regs.ingress.prsr_reg.no_multi_wr.nmw[*map[from_idx].dst] = 0; + regs.egress.prsr_reg.no_multi_wr.nmw[*map[from_idx].dst] = 0; + } else if (*map[from_idx].dst >= TPHV_MIN_INDEX && *map[from_idx].dst < TPHV_MAX_INDEX) { + auto tphv_idx = *map[from_idx].dst - TPHV_MIN_INDEX; + regs.ingress.prsr_reg.no_multi_wr.t_nmw[tphv_idx].rewrite(); + regs.egress.prsr_reg.no_multi_wr.t_nmw[tphv_idx].rewrite(); + + regs.ingress.prsr_reg.no_multi_wr.t_nmw[tphv_idx] = 0; + regs.egress.prsr_reg.no_multi_wr.t_nmw[tphv_idx] = 0; + } +} + +/// Add the fake extractions to have 2n 16b extractions. The function returns +/// the number of added extractions. +static int pad_to_16b_extracts_to_2n(Parser *parser, Target::Tofino::parser_regs ®s, + Parser::State::Match *match) { + // Obtain the slot configuration for given row + int row = parser->match_to_row.at(match); + auto map = reinterpret_cast( + parser->setup_phv_output_map(regs, parser->gress, row)); + if (LOGGING(5)) { + print_slot_occupancy(match, phv_16bit_extractors, "Before 16bit padding"); + } + + // Count number of used extractors - the number of extractions/constant set operations and + // checksums should be the padded to 2n. + unsigned used = 0; + unsigned pad_idx = 0; + unsigned from_idx = 0; + bool from_idx_is_8b_16b = false; + for (auto i : phv_16bit_extractors) { + if (map[i].dst->value == EXTRACT_SLOT_UNUSED) { + pad_idx = i; + } else { + used++; + // Try to get an 8b or 16b index. + // If we use a single 32b index to pad then we'll hit problems + // because we need 2 x 16b to fill a 32b container. + if (!from_idx_is_8b_16b) { + from_idx = i; + auto *reg = Phv::reg(map[i].dst->value); + from_idx_is_8b_16b = reg->size == 8 || reg->size == 16; + } + } + } + + // Check if csum equals VERIFY type and destination register size is 16 + bool has_csum = false; + for (auto &c : match->csum) { + if (c.type == 0 && c.dest && c.dest->reg.size == 16) { + has_csum = true; + break; + } + } + + // Identify indexes for source and destination slots for the padding + if (used == 1 || used == 3) { + set_idx_for_16b_extractions(pad_idx, from_idx, used, has_csum, match, map); + } else { + // Value is 0,2 or 4, we are good! + if (LOGGING(5)) { + LOG5("No 16bit padding is needed to add in " << match->state->name << " state."); + } + + return 0; + } + + // Add fake extractors to reach 2n constraint, we need to copy destination and source from + // the global version field which is tied to zeros in RTL. + // We are keeping both indexes in tuples {0,1} or {2,3}. + do_16b_padding(regs, map, pad_idx, from_idx); + matchSlotTracker.padMap.insert(match, pad_idx, &map[pad_idx]); + check_16b_extractor_configuration(pad_idx, from_idx, used, has_csum, map); + + // Match can also have a value set which means we need to do the initialization for + // other rows. The first row is being initialized, other rows needs the initialization + // and padding configuration + for (int vs_offset = 1; vs_offset < match->value_set_size; ++vs_offset) { + int nrow = row + vs_offset; + LOG5("Adding the padding for value_set " + << match->value_set_name << " offset = " << vs_offset << " (row = " << nrow << ")"); + map = reinterpret_cast( + parser->setup_phv_output_map(regs, parser->gress, nrow)); + do_16b_padding(regs, map, pad_idx, from_idx); + } + + if (LOGGING(5)) { + print_slot_occupancy(match, phv_16bit_extractors, "After 16bit padding"); + } + + return 1; +} + +/** + * @brief Perform the 8b padding onto map and computed index. + * + * @param regs Register configuration instance + * @param map Tofino output map pointer + * @param from_idx Index which is the source of the slot configuration + */ +static void do_8b_padding(Target::Tofino::parser_regs ®s, tofino_phv_output_map *map, + const unsigned from_idx) { + for (auto pad_idx : phv_8bit_extractors) { + if (map[pad_idx].dst->value != EXTRACT_SLOT_UNUSED) continue; + + // Extraction slot is not used and we need to put padding there. The main idea + // is to source from the zero constant + map[pad_idx].dst->rewrite(); + *map[pad_idx].dst = *map[from_idx].dst; + *map[pad_idx].src = EXTRACT_SLOT_CONSTANT_ZERO; + *map[pad_idx].src_type = EXTRACT_SLOT_CONSTANT_EN; + } + + // Mark the dummy write dest as multi-write, we need to distinguish between PHV and TPHV + if (*map[from_idx].dst >= PHV_MIN_INDEX && *map[from_idx].dst < PHV_MAX_INDEX) { + regs.ingress.prsr_reg.no_multi_wr.nmw[*map[from_idx].dst].rewrite(); + regs.egress.prsr_reg.no_multi_wr.nmw[*map[from_idx].dst].rewrite(); + + regs.ingress.prsr_reg.no_multi_wr.nmw[*map[from_idx].dst] = 0; + regs.egress.prsr_reg.no_multi_wr.nmw[*map[from_idx].dst] = 0; + } else if (*map[from_idx].dst >= TPHV_MIN_INDEX && *map[from_idx].dst < TPHV_MAX_INDEX) { + auto tphv_idx = *map[from_idx].dst - TPHV_MIN_INDEX; + regs.ingress.prsr_reg.no_multi_wr.t_nmw[tphv_idx].rewrite(); + regs.egress.prsr_reg.no_multi_wr.t_nmw[tphv_idx].rewrite(); + + regs.ingress.prsr_reg.no_multi_wr.t_nmw[tphv_idx] = 0; + regs.egress.prsr_reg.no_multi_wr.t_nmw[tphv_idx] = 0; + } +} + +/// Add the fake extractions to have 4n 8b extractions. The function returns +/// the number of added extractions. +static int pad_to_8b_extracts_to_4n(Parser *parser, Target::Tofino::parser_regs ®s, + Parser::State::Match *match) { + // Obtain the slot configuration for given row + int row = parser->match_to_row.at(match); + auto map = reinterpret_cast( + parser->setup_phv_output_map(regs, parser->gress, row)); + if (LOGGING(5)) { + print_slot_occupancy(match, phv_8bit_extractors, "Before 8bit padding"); + } + + // Count number of used extractors - the number of extraction/constant set operations and + // checksums should be padded to 4n. The source of the added padding will be stored in + // the from_idx variable. + unsigned used = 0; + unsigned from_idx = 0; + bool from_idx_is_8b = false; + for (auto i : phv_8bit_extractors) { + if (map[i].dst->value == EXTRACT_SLOT_UNUSED) continue; + // Update the used counter and remember the used slot + used++; + // Try to get an 8b index. + // If we use a single 16b index to pad then we'll hit problems + // because we need 2 x 8b to fill a 16b container. + if (!from_idx_is_8b) { + from_idx = i; + from_idx_is_8b = Phv::reg(map[i].dst->value)->size == 8; + } + } + + if (used % 4 == 0) { + if (LOGGING(5)) { + LOG5("No 8bit padding is needed to add in " << match->state->name << " state."); + } + + return 0; + } + + // Add fake extractions to meet the 4n constraint and setup tracking + do_8b_padding(regs, map, from_idx); + for (auto pad_idx : phv_8bit_extractors) { + matchSlotTracker.padMap.insert(match, pad_idx, &map[pad_idx]); + } + + // Match can also have a value set which means we need to do the initialization for + // other rows. The first row is being initialized, other rows needs the initialization + // and padding configuration + for (int vs_offset = 1; vs_offset < match->value_set_size; ++vs_offset) { + int nrow = row + vs_offset; + LOG5("Adding the padding for value_set " + << match->value_set_name << " offset = " << vs_offset << " (row = " << nrow << ")"); + map = reinterpret_cast( + parser->setup_phv_output_map(regs, parser->gress, nrow)); + do_8b_padding(regs, map, from_idx); + } + + if (LOGGING(5)) { + print_slot_occupancy(match, phv_8bit_extractors, "After 8bit padding"); + } + + return 4 - (used % 4); +} + +/// Add padding extracts to a parser state and its children. +/// +/// @tparam use_8bit Apply to 8b extracts (true) or 16b extracts (false) +/// @param parser Parser containing the state being padded +/// @param regs +/// @param node_count Number of states to pad, including this state. States with zero extracts are +/// not counted in @p node_count. +/// @param visited +/// @param pstate +template +void pad_nodes_extracts(Parser *parser, Target::Tofino::parser_regs ®s, int node_count, + Parser::State::Match *match, std::set &visited, + PaddingInfoCollector::PadState *pstate, + std::map &cache) { + if (node_count == 0 || !match) { + LOG4("Node count or nullptr match was reached"); + return; + } + + const std::string log_pad = use_8bit ? "8b" : "16b"; + if (visited.count(match->state)) { + LOG4("State " << match->state << " was already visited in " << log_pad << " padding."); + return; + } + + visited.insert(match->state); + LOG3("Padding " << log_pad << " extracts - state = " << match->state->name << ", " + << "remaining " << log_pad << " states to pad is " << node_count); + + pstate->print(); + // Memoization to minimize path visits + // If state is visited before with the same or higher node count dont visit it + // again. Cache holds a map from state to node count + if (cache[match] >= node_count && node_count > 0) { + LOG5(" Using cached state(match) : " << match->state->name << "(" << match->match + << ") -> node count " << cache[match]); + visited.erase(match->state); + return; + } + cache[match] = node_count; + LOG5(" Caching state(match) : " << match->state->name << "(" << match->match + << ") -> node count " << cache[match]); + + // We need to be sure that we will not be passing data to 2x16bit busses. Therefore, we have + // to pad the bus entirely for 16bit extractions. In addition, we need to see node_cout 16bit + // extactions - due to possible FIFO stalls. If the node doesn't contain the required + // extraction, we don't decrement the node_count value. + int new_node_count = node_count; + auto phv_type = use_8bit ? AnalysisType::BIT8 : AnalysisType::BIT16; + if (count_number_of_extractions(parser, regs, match, phv_type)) { + if (use_8bit) { + int pad = pad_to_8b_extracts_to_4n(parser, regs, match); + pstate->addPadInfo(match, AnalysisType::BIT8, pad); + } else { + int pad = pad_to_16b_extracts_to_2n(parser, regs, match); + pstate->addPadInfo(match, AnalysisType::BIT16, pad); + } + + new_node_count--; + } + if (LOGGING(5)) pstate->print(); + + for (auto state : match->next) { + for (auto next_match : state->match) { + pad_nodes_extracts(parser, regs, new_node_count, next_match, visited, pstate, + cache); + } + } + + visited.erase(match->state); +} + +// Helping aliases for padding functions +const auto pad_nodes_8b_extracts = pad_nodes_extracts; +const auto pad_nodes_16b_extracts = pad_nodes_extracts; + +void handle_narrow_to_wide_constraint(Parser *parser, Target::Tofino::parser_regs ®s) { + // 1] Apply narrow-to-wide constraints to all predecessors + std::set narrow_to_wide_matches; + PaddingInfoCollector pad_collector; + + for (auto &kv : parser->match_to_row) { + if (kv.first->has_narrow_to_wide_extract) narrow_to_wide_matches.insert(kv.first); + } + + if (narrow_to_wide_matches.size() == 0) { + LOG2("No narrow to wide matches has been detected."); + return; + } + + // Pad all predecessors + std::set all_preds; + for (auto m : narrow_to_wide_matches) { + auto states = m->get_all_preds(); + states.insert(m); + auto pstate = pad_collector.getPadState(m); + + for (auto p : states) { + if (all_preds.count(p)) continue; + + all_preds.insert(p); + int pad = pad_to_16b_extracts_to_2n(parser, regs, p); + pstate->addPadInfo(p, AnalysisType::BIT16, pad); + pad = pad_to_8b_extracts_to_4n(parser, regs, p); + pstate->addPadInfo(p, AnalysisType::BIT8, pad); + } + } + + // 2] Apply the narrow-to-wide constraints to a given number + // of child nodes. + ExtractionCountCache cache; + for (auto m : narrow_to_wide_matches) { + auto pstate = pad_collector.getPadState(m); + std::set visited_states; + int extracts_16b = analyze_worst_extractor_path(parser, regs, m, AnalysisType::BIT16, + visited_states, cache); + int extracts_8b = analyze_worst_extractor_path(parser, regs, m, AnalysisType::BIT8, + visited_states, cache); + + if (LOGGING(3)) { + std::stringstream ss; + ss << "INFO: Used extractors for " << m->state->gress << "," << m->state->name; + if (m->match) { + ss << "," << m->match; + } + ss << " - " << "8bit:" << extracts_8b << ", 16bit:" << extracts_16b; + LOG3(ss.str()); + } + + // Count the number of nodes we need to take, the arbiter is taking 4x8bit chunks + // and 2x16b chunks of data. The result will be ceiled to 16 because that is the + // depth of the FIFO. After that, we need to apply the 16b padding to a computed number + // of nodes and the same for 8b padding. + int pass_16b_nodes = ceil_and_wrap_to_fifo_size(extracts_16b, 2); + int pass_8b_nodes = ceil_and_wrap_to_fifo_size(extracts_8b, 4); + + // The state counts represent the states _after_ the n2w state. Increment by 1 to account + // for n2w state. + if (pass_8b_nodes) pass_8b_nodes++; + if (pass_16b_nodes) pass_16b_nodes++; + + // Pad extracts: 8b extracts should be padded based on the number of states to flush 16b + // narrow-to-wide extracts, and 16b extracts should be padded based on the number + // of states to flush 8b narrow-to-wide extracts. + std::map cacheNodeCount; + pad_nodes_16b_extracts(parser, regs, pass_8b_nodes, m, visited_states, pstate, + cacheNodeCount); + cacheNodeCount.clear(); + pad_nodes_8b_extracts(parser, regs, pass_16b_nodes, m, visited_states, pstate, + cacheNodeCount); + } + + if (LOGGING(1)) { + pad_collector.printPadInfo(); + } +} + +template <> +void Parser::write_config(Target::Tofino::parser_regs ®s, json::map &ctxt_json, + bool single_parser) { + /// remove after 8.7 release + if (single_parser) { + for (auto st : all) { + st->write_config(regs, this, ctxt_json[st->gress == EGRESS ? "egress" : "ingress"]); + } + } else { + ctxt_json["states"] = json::vector(); + for (auto st : all) st->write_config(regs, this, ctxt_json["states"]); + } + + if (error_count > 0) return; + + int i = 0; + for (auto ctr : counter_init) { + if (ctr) ctr->write_config(regs, gress, i); + ++i; + } + + for (i = 0; i < checksum_use.size(); i++) { + for (auto csum : checksum_use[i]) + if (csum) csum->write_config(regs, this); + } + + if (gress == INGRESS) { + init_common_regs(this, regs.ingress.prsr_reg, INGRESS); + // regs.ingress.ing_buf_regs.glb_group.disable(); + // regs.ingress.ing_buf_regs.chan0_group.chnl_drop.disable(); + // regs.ingress.ing_buf_regs.chan0_group.chnl_metadata_fix.disable(); + // regs.ingress.ing_buf_regs.chan1_group.chnl_drop.disable(); + // regs.ingress.ing_buf_regs.chan1_group.chnl_metadata_fix.disable(); + // regs.ingress.ing_buf_regs.chan2_group.chnl_drop.disable(); + // regs.ingress.ing_buf_regs.chan2_group.chnl_metadata_fix.disable(); + // regs.ingress.ing_buf_regs.chan3_group.chnl_drop.disable(); + // regs.ingress.ing_buf_regs.chan3_group.chnl_metadata_fix.disable(); + + regs.ingress.prsr_reg.hdr_len_adj.amt = hdr_len_adj; + } + + if (gress == EGRESS) { + init_common_regs(this, regs.egress.prsr_reg, EGRESS); + for (int i = 0; i < 4; i++) regs.egress.epb_prsr_port_regs.chnl_ctrl[i].meta_opt = meta_opt; + + int prsr_max_dph = get_prsr_max_dph(); + if (prsr_max_dph * 16 > Target::PARSER_DEPTH_MAX_BYTES_MULTITHREADED_EGRESS()) { + if (!options.tof1_egr_parse_depth_checks_disabled) + warning(lineno, + "Egress parser max depth exceeds %d, which requires disabling " + "multithreading in the parser", + Target::PARSER_DEPTH_MAX_BYTES_MULTITHREADED_EGRESS()); + options.tof1_egr_parse_depth_checks_disabled = true; + } + regs.egress.epb_prsr_port_regs.multi_threading.prsr_dph_max = prsr_max_dph; + regs.egress.prsr_reg.hdr_len_adj.amt = hdr_len_adj; + } + + // FIXME: The "|| 1" causes the PHV use information to be unconditionally copied + // into the PHV ownership. This forces the parser ownership to be identical to that + // in the pipe. + // Remove to allow different ownership, but make sure that header stacks + // are processed correctly. All stack elements writeable by the parser must + // be owned by the parser. + if (options.match_compiler || 1) { + phv_use[INGRESS] |= Phv::use(INGRESS); + phv_use[EGRESS] |= Phv::use(EGRESS); + } + + for (int i : phv_use[EGRESS]) { + auto id = Phv::reg(i)->parser_id(); + if (id >= 256) { + regs.merge.phv_owner.t_owner[id - 256] = 1; + regs.ingress.prsr_reg.phv_owner.t_owner[id - 256] = 1; + regs.egress.prsr_reg.phv_owner.t_owner[id - 256] = 1; + } else if (id < 224) { + regs.merge.phv_owner.owner[id] = 1; + regs.ingress.prsr_reg.phv_owner.owner[id] = 1; + regs.egress.prsr_reg.phv_owner.owner[id] = 1; + } + } + + for (int i = 0; i < 224; i++) { + if (!phv_allow_bitwise_or[i]) { + regs.ingress.prsr_reg.no_multi_wr.nmw[i] = 1; + regs.egress.prsr_reg.no_multi_wr.nmw[i] = 1; + } + if (phv_allow_bitwise_or[i] || phv_init_valid[i]) regs.merge.phv_valid.vld[i] = 1; + } + + for (int i = 0; i < 112; i++) + if (!phv_allow_bitwise_or[256 + i]) { + regs.ingress.prsr_reg.no_multi_wr.t_nmw[i] = 1; + regs.egress.prsr_reg.no_multi_wr.t_nmw[i] = 1; + } + + // if (options.condense_json) { + // // FIXME -- removing the uninitialized memory causes problems? + // // FIXME -- walle gets the addresses wrong. Might also require explicit + // // FIXME -- zeroing in the driver on real hardware + // // regs.memory[INGRESS].disable_if_reset_value(); + // // regs.memory[EGRESS].disable_if_reset_value(); + // regs.ingress.disable_if_reset_value(); + // regs.egress.disable_if_reset_value(); + // regs.merge.disable_if_reset_value(); + // } + + // Handles the constraint when using narrow extractors to generate wide values + // (either extracted from the packet or using the constants), then you need to + // follow the rule the _every_ preceding cycle must do: + // 0 or 4 8b extractions + // 0 or 2 or 4 16b extractions + handle_narrow_to_wide_constraint(this, regs); + + if (error_count == 0 && options.gen_json) { + /// TODO remove after 8.7 release + /// TODO Needs fix to simple test harness for parsers node + /// support + if (single_parser) { + if (gress == INGRESS) { + regs.memory[INGRESS].emit_json(*open_output("memories.all.parser.ingress.cfg.json"), + "ingress"); + regs.ingress.emit_json(*open_output("regs.all.parser.ingress.cfg.json")); + } else if (gress == EGRESS) { + regs.memory[EGRESS].emit_json(*open_output("memories.all.parser.egress.cfg.json"), + "egress"); + regs.egress.emit_json(*open_output("regs.all.parser.egress.cfg.json")); + } + regs.merge.emit_json(*open_output("regs.all.parse_merge.cfg.json")); + } else { + if (gress == INGRESS) { + regs.memory[INGRESS].emit_json( + *open_output("memories.all.parser.ingress.%02x.cfg.json", parser_no), + "ingress"); + regs.ingress.emit_json( + *open_output("regs.all.parser.ingress.%02x.cfg.json", parser_no)); + } + if (gress == EGRESS) { + regs.memory[EGRESS].emit_json( + *open_output("memories.all.parser.egress.%02x.cfg.json", parser_no), "egress"); + regs.egress.emit_json( + *open_output("regs.all.parser.egress.%02x.cfg.json", parser_no)); + } + regs.merge.emit_json(*open_output("regs.all.parse_merge.cfg.json")); + } + } + + /// TODO remove after 8.7 release + if (single_parser) { + for (int i = 0; i < 18; i++) { + if (gress == INGRESS) { + TopLevel::regs()->mem_pipe.i_prsr[i].set( + "memories.all.parser.ingress", ®s.memory[INGRESS]); + TopLevel::regs()->reg_pipe.pmarb.ibp18_reg.ibp_reg[i].set( + "regs.all.parser.ingress", ®s.ingress); + } else if (gress == EGRESS) { + TopLevel::regs()->mem_pipe.e_prsr[i].set( + "memories.all.parser.egress", ®s.memory[EGRESS]); + TopLevel::regs()->reg_pipe.pmarb.ebp18_reg.ebp_reg[i].set( + "regs.all.parser.egress", ®s.egress); + } + } + } else { + if (gress == INGRESS) { + TopLevel::regs()->parser_ingress.emplace( + ctxt_json["handle"]->as_number()->val, ®s.ingress); + TopLevel::regs()->parser_memory[INGRESS].emplace( + ctxt_json["handle"]->as_number()->val, ®s.memory[INGRESS]); + } else if (gress == EGRESS) { + TopLevel::regs()->parser_egress.emplace( + ctxt_json["handle"]->as_number()->val, ®s.egress); + TopLevel::regs()->parser_memory[EGRESS].emplace( + ctxt_json["handle"]->as_number()->val, ®s.memory[EGRESS]); + } + +#if 0 + /// for initiliazing the parser registers in default configuration. + int start_bit = port_use.ffs(); + do { + int end_bit = port_use.ffz(start_bit); + std::cout << "set memories and regs from " << start_bit + << " to " << end_bit - 1 << std::endl; + for (auto i = start_bit; i <= end_bit - 1; i++) { + TopLevel::regs()->mem_pipe.i_prsr[i] + .set("memories.all.parser.ingress", ®s.memory[INGRESS]); + TopLevel::regs()->reg_pipe.pmarb.ibp18_reg.ibp_reg[i] + .set("regs.all.parser.ingress", ®s.ingress); + TopLevel::regs()->mem_pipe.e_prsr[i] + .set("memories.all.parser.egress", ®s.memory[EGRESS]); + TopLevel::regs()->reg_pipe.pmarb.ebp18_reg.ebp_reg[i] + .set("regs.all.parser.egress", ®s.egress); + } + start_bit = port_use.ffs(end_bit); + } while (start_bit >= 0); +#endif + } + // all parsers share the same parser_merge configuration. + TopLevel::regs()->reg_pipe.pmarb.prsr_reg.set("regs.all.parse_merge", + ®s.merge); +} + +template <> +void Parser::gen_configuration_cache(Target::Tofino::parser_regs ®s, json::vector &cfg_cache) { + std::string reg_fqname; + std::string reg_name; + unsigned reg_value; + std::string reg_value_str; + unsigned reg_width = 8; + + if (gress == EGRESS) { + // epb_prsr_port_regs.chnl_ctrl + for (int i = 0; i < 4; i++) { + reg_fqname = "pmarb.ebp18_reg.ebp_reg[0].epb_prsr_port_regs.chnl_ctrl[" + + std::to_string(i) + "]"; + reg_name = "parser0_chnl_ctrl_" + std::to_string(i); + reg_value = regs.egress.epb_prsr_port_regs.chnl_ctrl[i]; + if ((reg_value != 0) || (options.match_compiler)) { + reg_value_str = int_to_hex_string(reg_value, reg_width); + add_cfg_reg(cfg_cache, reg_fqname, reg_name, reg_value_str); + } + } + + // epb_prsr_port_regs.multi_threading + reg_fqname = "pmarb.ebp18_reg.ebp_reg[0].epb_prsr_port_regs.multi_threading"; + reg_name = "parser0_multi_threading"; + reg_value = regs.egress.epb_prsr_port_regs.multi_threading; + if ((reg_value != 0) || (options.match_compiler)) { + reg_value_str = int_to_hex_string(reg_value, reg_width); + add_cfg_reg(cfg_cache, reg_fqname, reg_name, reg_value_str); + } + } +} diff --git a/backends/tofino/bf-asm/tofino/phv.cpp b/backends/tofino/bf-asm/tofino/phv.cpp new file mode 100644 index 00000000000..7e8c19f8b45 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/phv.cpp @@ -0,0 +1,66 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/tofino/phv.h" + +void Target::Tofino::Phv::init_regs(::Phv &phv) { + // Allocating Tofino registers so the uids map to register encodings + static const struct { + char code[4]; + unsigned size, count; + } sizes[] = {{"W", 32, 64}, {"B", 8, 64}, {"H", 16, 96}, {"", 0, 32}, + {"TW", 32, 32}, {"TB", 8, 32}, {"TH", 16, 48}}; + unsigned uid = 0; + phv.regs.resize(NUM_PHV_REGS); + for (unsigned i = 0; i < sizeof sizes / sizeof *sizes; i++) { + for (unsigned j = 0; j < sizes[i].count; j++, uid++) { + auto reg = phv.regs[uid] = new Register; + memset(reg->name, 0, sizeof(reg->name)); + reg->type = (uid >= FIRST_TPHV) ? Register::TAGALONG : Register::NORMAL; + reg->index = j; + reg->uid = uid; + reg->size = sizes[i].size; + if (sizes[i].size) { + char buf[8]; + snprintf(buf, sizeof(buf), "R%d", uid); + phv.names[INGRESS][buf][0].slice = ::Phv::Slice(*reg, 0, sizes[i].size - 1); + phv.names[EGRESS][buf][0].slice = ::Phv::Slice(*reg, 0, sizes[i].size - 1); + snprintf(reg->name, sizeof(reg->name), "%.2s%d", sizes[i].code, j); + phv.names[INGRESS][reg->name][0].slice = ::Phv::Slice(*reg, 0, sizes[i].size - 1); + phv.names[EGRESS][reg->name][0].slice = ::Phv::Slice(*reg, 0, sizes[i].size - 1); + } + } + } + BUG_CHECK(uid == phv.regs.size()); +} + +static bitvec tagalong_group(int n) { + bitvec rv; + rv.setrange( + Target::Tofino::Phv::FIRST_8BIT_TPHV + n * (Target::Tofino::Phv::COUNT_8BIT_TPHV / 8), + Target::Tofino::Phv::COUNT_8BIT_TPHV / 8); + rv.setrange( + Target::Tofino::Phv::FIRST_16BIT_TPHV + n * (Target::Tofino::Phv::COUNT_16BIT_TPHV / 8), + Target::Tofino::Phv::COUNT_16BIT_TPHV / 8); + rv.setrange( + Target::Tofino::Phv::FIRST_32BIT_TPHV + n * (Target::Tofino::Phv::COUNT_32BIT_TPHV / 8), + Target::Tofino::Phv::COUNT_32BIT_TPHV / 8); + return rv; +} +const bitvec Target::Tofino::Phv::tagalong_groups[8] = { + tagalong_group(0), tagalong_group(1), tagalong_group(2), tagalong_group(3), + tagalong_group(4), tagalong_group(5), tagalong_group(6), tagalong_group(7)}; diff --git a/backends/tofino/bf-asm/tofino/phv.h b/backends/tofino/bf-asm/tofino/phv.h new file mode 100644 index 00000000000..1f16dc391d6 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/phv.h @@ -0,0 +1,55 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TOFINO_PHV_H_ +#define BACKENDS_TOFINO_BF_ASM_TOFINO_PHV_H_ + +#include "backends/tofino/bf-asm/phv.h" + +class Target::Tofino::Phv : public Target::Phv { + friend class ::Phv; + struct Register : public ::Phv::Register { + int parser_id() const override { return uid; } + int mau_id() const override { return uid < FIRST_TPHV ? uid : -1; } + int ixbar_id() const override { return uid < FIRST_TPHV ? uid : -1; } + int deparser_id() const override { return uid; } + }; + void init_regs(::Phv &phv) override; + target_t type() const override { return TOFINO; } + unsigned mau_groupsize() const override { return 16; } + + public: + enum { + NUM_PHV_REGS = 368, + FIRST_8BIT_PHV = 64, + COUNT_8BIT_PHV = 64, + FIRST_16BIT_PHV = 128, + COUNT_16BIT_PHV = 96, + FIRST_32BIT_PHV = 0, + COUNT_32BIT_PHV = 64, + FIRST_TPHV = 256, + FIRST_8BIT_TPHV = 288, + COUNT_8BIT_TPHV = 32, + FIRST_16BIT_TPHV = 320, + COUNT_16BIT_TPHV = 48, + FIRST_32BIT_TPHV = 256, + COUNT_32BIT_TPHV = 32, + }; + static const bitvec tagalong_groups[8]; +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_TOFINO_PHV_H_ */ diff --git a/backends/tofino/bf-asm/tofino/salu_inst.cpp b/backends/tofino/bf-asm/tofino/salu_inst.cpp new file mode 100644 index 00000000000..31205779b3a --- /dev/null +++ b/backends/tofino/bf-asm/tofino/salu_inst.cpp @@ -0,0 +1,194 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software d2istributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* Tofino template specializations for instructions #included in salu_inst.cpp + * WARNING -- this is included in an anonymous namespace, as these SaluInstruction + * subclasses are all defined in that anonymous namespace */ + +template <> +void AluOP::write_regs(Target::Tofino::mau_regs ®s, Table *tbl_, Table::Actions::Action *act) { + LOG2(this); + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu = meter_group.stateful.salu_instr_state_alu[act->code][slot - ALU2LO]; + auto &salu_instr_common = meter_group.stateful.salu_instr_common[act->code]; + salu.salu_op = opc->opcode & 0xf; + salu.salu_arith = opc->opcode >> 4; + salu.salu_pred = predication_encode & Target::Tofino::STATEFUL_PRED_MASK; + const int alu_const_min = Target::STATEFUL_ALU_CONST_MIN(); + const int alu_const_max = Target::STATEFUL_ALU_CONST_MAX(); + if (srca) { + if (auto m = srca.to()) { + salu.salu_asrc_memory = 1; + salu.salu_asrc_memory_index = m->field->bit(0) > 0; + } else if (auto k = srca.to()) { + salu.salu_asrc_memory = 0; + if (k->value >= alu_const_min && k->value <= alu_const_max) { + salu.salu_const_src = k->value & Target::STATEFUL_ALU_CONST_MASK(); + salu.salu_regfile_const = 0; + } else { + salu.salu_const_src = tbl->get_const(k->lineno, k->value); + salu.salu_regfile_const = 1; + } + } else if (auto r = srca.to()) { + salu.salu_asrc_memory = 0; + salu.salu_const_src = r->index; + salu.salu_regfile_const = 1; + } else { + BUG(); + } + } + if (srcb) { + if (auto f = srcb.to()) { + salu.salu_bsrc_phv = 1; + salu.salu_bsrc_phv_index = f->phv_index(tbl); + } else if (auto m = srcb.to()) { + salu_instr_common.salu_alu2_lo_bsrc_math = 1; + if (auto b = m->of.to()) { + salu_instr_common.salu_alu2_lo_math_src = b->phv_index(tbl); + } else if (auto b = m->of.to()) { + salu_instr_common.salu_alu2_lo_math_src = b->field->bit(0) > 0 ? 3 : 2; + } else { + BUG(); + } + } else if (auto k = srcb.to()) { + salu.salu_bsrc_phv = 0; + if (k->value >= alu_const_min && k->value <= alu_const_max) { + salu.salu_const_src = k->value & Target::STATEFUL_ALU_CONST_MASK(); + salu.salu_regfile_const = 0; + } else { + salu.salu_const_src = tbl->get_const(k->lineno, k->value); + salu.salu_regfile_const = 1; + } + } else if (auto r = srcb.to()) { + salu.salu_bsrc_phv = 0; + salu.salu_const_src = r->index; + salu.salu_regfile_const = 1; + } else { + BUG(); + } + } +} +void AluOP::write_regs(Target::Tofino::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + write_regs(regs, tbl, act); +} + +template <> +void BitOP::write_regs(Target::Tofino::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + LOG2(this); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu = meter_group.stateful.salu_instr_state_alu[act->code][slot - ALU2LO]; + salu.salu_op = opc->opcode & 0xf; + salu.salu_pred = predication_encode & Target::Tofino::STATEFUL_PRED_MASK; + // 1b instructions are from mem-lo to alu1-lo + salu.salu_asrc_memory = 1; + salu.salu_asrc_memory_index = 0; +} +void BitOP::write_regs(Target::Tofino::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + write_regs(regs, tbl, act); +} + +template <> +void CmpOP::write_regs(Target::Tofino::mau_regs ®s, Table *tbl_, Table::Actions::Action *act) { + LOG2(this); + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu = meter_group.stateful.salu_instr_cmp_alu[act->code][slot]; + if (srca) { + salu.salu_cmp_asrc_input = srca->field->bit(0) > 0; + salu.salu_cmp_asrc_sign = srca_neg; + salu.salu_cmp_asrc_enable = 1; + } + if (srcb) { + salu.salu_cmp_bsrc_input = srcb->phv_index(tbl); + salu.salu_cmp_bsrc_sign = srcb_neg; + salu.salu_cmp_bsrc_enable = 1; + } + if (srcc) { + if (auto k = dynamic_cast(srcc)) { + const int cmp_const_min = Target::STATEFUL_CMP_CONST_MIN(); + const int cmp_const_max = Target::STATEFUL_CMP_CONST_MAX(); + if (k->value >= cmp_const_min && k->value <= cmp_const_max) { + salu.salu_cmp_const_src = k->value & Target::STATEFUL_CMP_CONST_MASK(); + salu.salu_cmp_regfile_const = 0; + } else { + salu.salu_cmp_const_src = tbl->get_const(srcc->lineno, k->value); + salu.salu_cmp_regfile_const = 1; + } + } else if (auto r = dynamic_cast(srcc)) { + salu.salu_cmp_const_src = r->index; + salu.salu_cmp_regfile_const = 1; + } + } else { + salu.salu_cmp_const_src = 0; + salu.salu_cmp_regfile_const = 0; + } + salu.salu_cmp_opcode = opc->opcode | (type << 2); +} +void CmpOP::write_regs(Target::Tofino::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + write_regs(regs, tbl, act); +} + +void TMatchOP::write_regs(Target::Tofino::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + BUG(); // should never be called +} + +void OutOP::decode_output_mux(Target::Tofino, Table *tbl, value_t &op) { + static const std::map ops_mux_lookup = { + {"mem_hi", 0}, {"mem_lo", 1}, {"memory_hi", 0}, {"memory_lo", 1}, + {"phv_hi", 2}, {"phv_lo", 3}, {"alu_hi", 4}, {"alu_lo", 5}, + {"alu_hi_out", 4}, {"alu_lo_out", 5}, {"predicate", 6}}; + if (op.type == tCMD && ops_mux_lookup.count(op[0].s)) + output_mux = ops_mux_lookup.at(op[0].s); + else if (op.type == tSTR && ops_mux_lookup.count(op.s)) + output_mux = ops_mux_lookup.at(op.s); + else + output_mux = -1; + if (src) { + int tmp = output_mux; + if (auto *phv = src.to()) + output_mux = 3 - phv->phv_index(tbl->to()); + else if (auto *mem = src.to()) + output_mux = mem->field->bit(0) > 0 ? 0 : 1; + BUG_CHECK(tmp < 0 || tmp == output_mux, "inconsistent output mux decode"); + } +} +int OutOP::decode_output_option(Target::Tofino, value_t &op) { return -1; } + +template <> +void OutOP::write_regs(Target::Tofino::mau_regs ®s, Table *tbl_, Table::Actions::Action *act) { + LOG2(this); + auto tbl = dynamic_cast(tbl_); + BUG_CHECK(tbl); + int logical_home_row = tbl->layout[0].row; + auto &meter_group = regs.rams.map_alu.meter_group[logical_home_row / 4U]; + auto &salu = meter_group.stateful.salu_instr_output_alu[act->code]; + if (predication_encode) { + salu.salu_output_cmpfn = predication_encode & Target::Tofino::STATEFUL_PRED_MASK; + } else { + salu.salu_output_cmpfn = STATEFUL_PREDICATION_ENCODE_UNCOND; + } + salu.salu_output_asrc = output_mux; +} +void OutOP::write_regs(Target::Tofino::mau_regs ®s, Table *tbl, Table::Actions::Action *act) { + write_regs(regs, tbl, act); +} diff --git a/backends/tofino/bf-asm/tofino/sram_match.cpp b/backends/tofino/bf-asm/tofino/sram_match.cpp new file mode 100644 index 00000000000..b956e13e5b9 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/sram_match.cpp @@ -0,0 +1,96 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/stage.h" +#include "backends/tofino/bf-asm/tables.h" +#include "lib/log.h" + +static int find_in_ixbar(Table *table, std::vector &match) { + // It would seem like it would be possible to simplify this code by refactoring it + // to use one loop calling Table::find_on_ixbar (which does much of what this does),r + // but it is important to prefer a group defined in this table to one defined in other + // tables, which the two loops does. Could perhaps have a variant of find_on_ixbar that + // return *all* groups where the Phv::Ref is present (in priority order), so we could + // do the intersection (preserving priority order) rather than this repeated looping? + int max_i = -1; + LOG3("find_in_ixbar " << match); + for (unsigned group = 0; group < EXACT_XBAR_GROUPS; group++) { + LOG3(" looking in table in group " << group); + bool ok = true; + for (auto &r : match) { + LOG3(" looking for " << r); + for (auto &ixb : table->input_xbar) { + if (!ixb->find_exact(*r, group)) { + LOG3(" -- not found"); + ok = false; + break; + } + } + } + if (ok) { + LOG3(" success"); + return group; + } + } + for (unsigned group = 0; group < EXACT_XBAR_GROUPS; group++) { + LOG3(" looking in group " << group); + bool ok = true; + for (auto &r : match) { + LOG3(" looking for " << r); + bool found = false; + InputXbar::Group ixbar_group(InputXbar::Group::EXACT, group); + for (auto *in : table->stage->ixbar_use[ixbar_group]) { + if (in->find_exact(*r, group)) { + found = true; + break; + } + } + if (!found) { + LOG3(" -- not found"); + if (&r - &match[0] > max_i) max_i = &r - &match[0]; + ok = false; + break; + } + } + if (ok) { + LOG3(" success"); + return group; + } + } + if (max_i > 0) + error(match[max_i].lineno, "%s: Can't find %s and %s in same input xbar group", + table->name(), match[max_i].name(), match[0].name()); + else + error(match[0].lineno, "%s: Can't find %s in any input xbar group", table->name(), + match[0].name()); + return -1; +} + +void SRamMatchTable::setup_word_ixbar_group(Target::Tofino) { + word_ixbar_group.resize(match_in_word.size()); + unsigned i = 0; + for (auto &match : match_in_word) { + std::vector phv_ref_match; + for (auto *source : match) { + auto phv_ref = dynamic_cast(source); + BUG_CHECK(phv_ref); + BUG_CHECK(*phv_ref); + phv_ref_match.push_back(*phv_ref); + } + word_ixbar_group[i++] = phv_ref_match.empty() ? -1 : find_in_ixbar(this, phv_ref_match); + } +} diff --git a/backends/tofino/bf-asm/tofino/stage.cpp b/backends/tofino/bf-asm/tofino/stage.cpp new file mode 100644 index 00000000000..2c906e858a1 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/stage.cpp @@ -0,0 +1,140 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/* mau stage template specializations for tofino -- #included directly in top-level stage.cpp */ + +template <> +void Stage::write_regs(Target::Tofino::mau_regs ®s, bool) { + write_common_regs(regs); + auto &merge = regs.rams.match.merge; + for (gress_t gress : Range(INGRESS, EGRESS)) { + if (stageno == 0) { + merge.predication_ctl[gress].start_table_fifo_delay0 = pred_cycle(gress) - 1; + merge.predication_ctl[gress].start_table_fifo_delay1 = 0; + merge.predication_ctl[gress].start_table_fifo_enable = 1; + } else { + switch (stage_dep[gress]) { + case MATCH_DEP: + merge.predication_ctl[gress].start_table_fifo_delay0 = + this[-1].pipelength(gress) - this[-1].pred_cycle(gress) + + pred_cycle(gress) - 1; + merge.predication_ctl[gress].start_table_fifo_delay1 = + this[-1].pipelength(gress) - this[-1].pred_cycle(gress); + merge.predication_ctl[gress].start_table_fifo_enable = 3; + break; + case ACTION_DEP: + merge.predication_ctl[gress].start_table_fifo_delay0 = 1; + merge.predication_ctl[gress].start_table_fifo_delay1 = 0; + merge.predication_ctl[gress].start_table_fifo_enable = 1; + break; + case CONCURRENT: + merge.predication_ctl[gress].start_table_fifo_enable = 0; + break; + default: + BUG(); + } + } + if (stageno != 0) { + regs.dp.cur_stage_dependency_on_prev[gress] = MATCH_DEP - stage_dep[gress]; + if (stage_dep[gress] == CONCURRENT) regs.dp.stage_concurrent_with_prev |= 1U << gress; + } + if (stageno != AsmStage::numstages() - 1) + regs.dp.next_stage_dependency_on_cur[gress] = MATCH_DEP - this[1].stage_dep[gress]; + else if (AsmStage::numstages() < Target::NUM_MAU_STAGES()) + regs.dp.next_stage_dependency_on_cur[gress] = 2; + auto &deferred_eop_bus_delay = regs.rams.match.adrdist.deferred_eop_bus_delay[gress]; + deferred_eop_bus_delay.eop_internal_delay_fifo = pred_cycle(gress) + 3; + /* FIXME -- making this depend on the dependency of the next stage seems wrong */ + if (stageno == AsmStage::numstages() - 1) { + if (AsmStage::numstages() < Target::NUM_MAU_STAGES()) + deferred_eop_bus_delay.eop_output_delay_fifo = 0; + else + deferred_eop_bus_delay.eop_output_delay_fifo = pipelength(gress) - 1; + } else if (this[1].stage_dep[gress] == MATCH_DEP) + deferred_eop_bus_delay.eop_output_delay_fifo = pipelength(gress) - 1; + else if (this[1].stage_dep[gress] == ACTION_DEP) + deferred_eop_bus_delay.eop_output_delay_fifo = 1; + else + deferred_eop_bus_delay.eop_output_delay_fifo = 0; + deferred_eop_bus_delay.eop_delay_fifo_en = 1; + } + + for (gress_t gress : Range(INGRESS, EGRESS)) + if (table_use[gress] & USE_TCAM) + regs.tcams.tcam_piped |= options.match_compiler ? 3 : 1 << gress; + + bitvec in_use = match_use[INGRESS] | action_use[INGRESS] | action_set[INGRESS]; + bitvec eg_use = match_use[EGRESS] | action_use[EGRESS] | action_set[EGRESS]; + if (options.match_compiler) { + /* the glass compiler occasionally programs extra uses of random registers on + * busses where it doesn't actually use them. Sometimes, these regs + * are in use by the other thread, so rely on the deparser to correctly + * set the Phv::use info and strip out registers it says are used by + * the other thread */ + in_use -= Deparser::PhvUse(EGRESS); + eg_use -= Deparser::PhvUse(INGRESS); + } + /* FIXME -- if the regs are live across a stage (even if not used in that stage) they + * need to be set in the thread registers. For now we just assume if they are used + * anywhere, they need to be marked as live */ + in_use |= Phv::use(INGRESS); + eg_use |= Phv::use(EGRESS); + static const int phv_use_transpose[2][14] = { + {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21}, + {4, 5, 6, 7, 12, 13, 14, 15, 22, 23, 24, 25, 26, 27}}; + // FIXME -- this code depends on the Phv::Register uids matching the + // FIXME -- mau encoding of phv containers. (FIXME-PHV) + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 14; j++) { + regs.dp.phv_ingress_thread_alu[i][j] = regs.dp.phv_ingress_thread_imem[i][j] = + regs.dp.phv_ingress_thread[i][j] = in_use.getrange(8 * phv_use_transpose[i][j], 8); + regs.dp.phv_egress_thread_alu[i][j] = regs.dp.phv_egress_thread_imem[i][j] = + regs.dp.phv_egress_thread[i][j] = eg_use.getrange(8 * phv_use_transpose[i][j], 8); + } + } +} + +template <> +void Stage::gen_configuration_cache(Target::Tofino::mau_regs ®s, json::vector &cfg_cache) { + Stage::gen_configuration_cache_common(regs, cfg_cache); + + unsigned reg_width = 8; // this means number of hex characters + std::string reg_fqname; + std::string reg_name; + unsigned reg_value; + std::string reg_value_str; + + // meter_ctl + auto &meter_ctl = regs.rams.map_alu.meter_group; + for (int i = 0; i < 4; i++) { + reg_fqname = "mau[" + std::to_string(stageno) + "].rams.map_alu.meter_group[" + + std::to_string(i) + "]" + ".meter.meter_ctl"; + reg_name = "stage_" + std::to_string(stageno) + "_meter_ctl_" + std::to_string(i); + reg_value = meter_ctl[i].meter.meter_ctl; + if ((reg_value != 0) || (options.match_compiler)) { + reg_value_str = int_to_hex_string(reg_value, reg_width); + add_cfg_reg(cfg_cache, reg_fqname, reg_name, reg_value_str); + } + } +} + +template <> +void Stage::gen_mau_stage_extension(Target::Tofino::mau_regs ®s, json::map &extend) { + BUG(); // stage extension not supported on tofino +} + +void AlwaysRunTable::write_regs(Target::Tofino::mau_regs &) { BUG(); } diff --git a/backends/tofino/bf-asm/tofino/stateful.cpp b/backends/tofino/bf-asm/tofino/stateful.cpp new file mode 100644 index 00000000000..9265d2d5bb5 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/stateful.cpp @@ -0,0 +1,77 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/tofino/stateful.h" + +int StatefulTable::parse_counter_mode(Target::Tofino target, const value_t &v) { + if (v != "counter") return -1; + if (v.type == tSTR) return 4; + if (v.type != tCMD || v.vec.size != 2) return -1; + static const std::map modes = {{"hit", 2}, {"miss", 1}, {"gateway", 3}}; + if (!modes.count(v[1].s)) return -1; + return modes.at(v[1].s); +} + +void StatefulTable::set_counter_mode(Target::Tofino target, int mode) { + stateful_counter_mode |= mode; +} + +template <> +void StatefulTable::write_logging_regs(Target::Tofino::mau_regs ®s) { + auto &merge = regs.rams.match.merge; + unsigned meter_group = layout.at(0).row / 4U; + auto &salu = regs.rams.map_alu.meter_group[meter_group].stateful; + for (MatchTable *m : match_tables) { + auto *call = m->get_call(this); + if (!call || call->args.at(0).type != Call::Arg::Counter) continue; + if (auto mode = call->args.at(0).count_mode()) { + merge.mau_stateful_log_counter_ctl[m->logical_id / 8U].set_subfield( + mode, 3 * (m->logical_id % 8U), 3); + for (auto &rep : merge.mau_stateful_log_ctl_ixbar_map[m->logical_id / 8U]) + rep.set_subfield(meter_group | 0x4, 3 * (m->logical_id % 8U), 3); + } + } + if (stateful_counter_mode) { + merge.mau_stateful_log_instruction_width.set_subfield(format->log2size - 3, 2 * meter_group, + 2); + merge.mau_stateful_log_vpn_offset[meter_group / 2].set_subfield(logvpn_min, + 6 * (meter_group % 2), 6); + merge.mau_stateful_log_vpn_limit[meter_group / 2].set_subfield(logvpn_max, + 6 * (meter_group % 2), 6); + } + + for (size_t i = 0; i < const_vals.size(); ++i) { + if (const_vals[i].value > INT_MAX || const_vals[i].value < INT_MIN) + error(const_vals[i].lineno, "constant value %" PRId64 " too large for stateful alu", + const_vals[i].value); + salu.salu_const_regfile[i] = const_vals[i].value & 0xffffffffU; + } +} + +/// Compute the proper value for the register +/// map_alu.meter_alu_group_data_delay_ctl[].meter_alu_right_group_delay +/// which controls the two halves of the ixbar->meter_alu fifo, based on a bytemask of which +/// bytes are needed in the meter_alu. On Tofino, the fifo is 64 bits wide, so each enable +/// bit controls 32 bits +int AttachedTable::meter_alu_fifo_enable_from_mask(Target::Tofino::mau_regs &, unsigned bytemask) { + int rv = 0; + if (bytemask & 0xf) rv |= 1; + if (bytemask & 0xf0) rv |= 2; + return rv; +} + +void StatefulTable::gen_tbl_cfg(Target::Tofino, json::map &tbl, json::map &stage_tbl) const {} diff --git a/backends/tofino/bf-asm/tofino/stateful.h b/backends/tofino/bf-asm/tofino/stateful.h new file mode 100644 index 00000000000..fab94ea2ee9 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/stateful.h @@ -0,0 +1,33 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TOFINO_STATEFUL_H_ +#define BACKENDS_TOFINO_BF_ASM_TOFINO_STATEFUL_H_ + +#include "backends/tofino/bf-asm/tables.h" +#include "backends/tofino/bf-asm/target.h" + +class Target::Tofino::StatefulTable : public ::StatefulTable { + friend class ::StatefulTable; + StatefulTable(int line, const char *n, gress_t gr, Stage *s, int lid) + : ::StatefulTable(line, n, gr, s, lid) {} +}; + +template <> +void StatefulTable::write_logging_regs(Target::Tofino::mau_regs ®s); + +#endif /* BACKENDS_TOFINO_BF_ASM_TOFINO_STATEFUL_H_ */ diff --git a/backends/tofino/bf-asm/tofino/template_objects.yaml b/backends/tofino/bf-asm/tofino/template_objects.yaml new file mode 100644 index 00000000000..f8dde2503a9 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/template_objects.yaml @@ -0,0 +1,109 @@ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 + +global: + - namespace=Tofino + - emit_binary + - emit_fieldname + - emit_json + - enable_disable + - input_binary + - reverse_write + - write_dma=mapram_config + - write_dma=imem_subword8 + - write_dma=imem_subword16 + - write_dma=imem_subword32 + - write_dma=galois_field_matrix +generate: + memories: + pipe_top_level: + memories.pipe_top_level.h: [ decl, name=memories.top ] + memories.pipe_top_level.cpp: [ defn, name=memories.top, + -Imemories.pipe_top_level.h, -Imemories.pipe_addrmap.h ] + pipe_addrmap: + memories.pipe_addrmap.h: [ decl, name=memories.pipe ] + memories.pipe_addrmap.cpp: [ defn, name=memories.pipe, + -Imemories.pipe_addrmap.h, -Imemories.prsr_mem_main_rspec.h ] + prsr_mem_main_rspec: + memories.prsr_mem_main_rspec.h: [ decl, name=memories.all.parser.%s ] + memories.prsr_mem_main_rspec.cpp: [ defn, name=memories.all.parser.%s, + -Imemories.prsr_mem_main_rspec.h ] + regs: + tofino: + regs.tofino.h: [ decl, name=regs.top ] + regs.tofino.cpp: [ defn, name=regs.top, + -Iregs.tofino.h, -Iregs.pipe_addrmap.h ] + pipe_addrmap: + regs.pipe_addrmap.h: [ decl, name=regs.pipe, expand_disabled_vector ] + regs.pipe_addrmap.cpp: [ defn, name=regs.pipe, expand_disabled_vector, + -Iregs.pipe_addrmap.h, -Iregs.ibp_rspec.h, -Iregs.ebp_rspec.h, + -Iregs.prsr_reg_merge_rspec.h, -Iregs.mau_addrmap.h, + -Iregs.dprsr_inp.h, -Iregs.dprsr_hdr.h ] + # pmarb_rspec + ibp_rspec: # Ingress parser registers + regs.ibp_rspec.h: [ decl, name=regs.all.parser.ingress ] + regs.ibp_rspec.cpp: [ defn, name=regs.all.parser.ingress, + -Iregs.ibp_rspec.h ] + ebp_rspec: # Egress parser registers + regs.ebp_rspec.h: [ decl, name=regs.all.parser.egress ] + regs.ebp_rspec.cpp: [ defn, name=regs.all.parser.egress, + -Iregs.ebp_rspec.h ] + prsr_reg_merge_rspec: # Shared parser registers + regs.prsr_reg_merge_rspec.h: [ decl, name=regs.all.parse_merge ] + regs.prsr_reg_merge_rspec.cpp: [ defn, name=regs.all.parse_merge, + -Iregs.prsr_reg_merge_rspec.h ] + mau_addrmap: + regs.mau_addrmap.h: [ decl, name=regs.match_action_stage.%02x ] + regs.mau_addrmap.cpp: [ defn, name=regs.match_action_stage.%02x, + -Iregs.mau_addrmap.h ] + # dprsr_reg_rspec + dprsr_inp: + regs.dprsr_inp.h: [ decl, name=regs.all.deparser.input_phase, global=fde_pov ] + regs.dprsr_inp.cpp: [ defn, name=regs.all.deparser.input_phase, global=fde_pov, + -Iregs.dprsr_inp.h ] + #dprsr_out_ingr: {} + #dprsr_out_egr: {} + dprsr_hdr: + regs.dprsr_hdr.h: [ decl, name=regs.all.deparser.header_phase, global=fde_phv ] + regs.dprsr_hdr.cpp: [ defn, name=regs.all.deparser.header_phase, global=fde_phv, + -Iregs.dprsr_hdr.h ] +ignore: + memories: + - mau_addrmap + # pipe_top_level + - tm_pre_mem_rspec + - party_pgr_mem_rspec + regs: + # tofino + - dvsl_addrmap + - mac_addrmap + - serdes_addrmap + # pipe_addrmap + # pmarb_rspec + # ebp_rspec + - egrNx_regs + # parb_regs + - pbus_station_regs + - party_pgr_reg_rspec + - party_glue_reg_rspec + # dprsr_reg_rspec + - mir_buf_all + - dprsr_out_ingr + - dprsr_out_egr + # dprsr_hdr + # dprsr_hi_mem + - dprsr_h_pv_table_map diff --git a/backends/tofino/bf-asm/tofino/ternary_match.cpp b/backends/tofino/bf-asm/tofino/ternary_match.cpp new file mode 100644 index 00000000000..e18bdf96723 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/ternary_match.cpp @@ -0,0 +1,55 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "backends/tofino/bf-asm/tofino/ternary_match.h" + +#include "backends/tofino/bf-asm/stage.h" + +void Target::Tofino::TernaryMatchTable::pass1() { + ::TernaryMatchTable::pass1(); + // Dont allocate id (mark them as used) for empty ternary tables (keyless + // tables). Keyless tables are marked ternary with a tind. They are setup by + // the driver to always miss (since there is no match) and run the miss + // action. The miss action is associated with the logical table space and + // does not need a tcam id association. This saves tcams ids to be assigned + // to actual ternary tables. This way we can have 8 real ternary match + // tables within a stage and not count the keyless among them. + // NOTE: The tcam_id is never assigned for these tables and will be set to + // default (-1). We also disable registers associated with tcam_id for this + // table. + if (layout_size() != 0) { + alloc_id("tcam", tcam_id, stage->pass1_tcam_id, TCAM_TABLES_PER_STAGE, false, + stage->tcam_id_use); + physical_ids[tcam_id] = 1; + } + // alloc_busses(stage->tcam_match_bus_use); -- now hardwired +} + +void Target::Tofino::TernaryIndirectTable::pass1() { + ::TernaryIndirectTable::pass1(); + alloc_busses(stage->tcam_indirect_bus_use, Layout::TIND_BUS); +} + +void Target::Tofino::TernaryMatchTable::check_tcam_match_bus( + const std::vector &layout) { + for (auto &row : layout) { + if (row.bus.empty()) continue; + for (auto &tcam : row.memunits) + if (row.bus.at(Table::Layout::SEARCH_BUS) != tcam.col) + error(row.lineno, "Tcam match bus hardwired to tcam column"); + } +} diff --git a/backends/tofino/bf-asm/tofino/ternary_match.h b/backends/tofino/bf-asm/tofino/ternary_match.h new file mode 100644 index 00000000000..cfb635443b1 --- /dev/null +++ b/backends/tofino/bf-asm/tofino/ternary_match.h @@ -0,0 +1,40 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TOFINO_TERNARY_MATCH_H_ +#define BACKENDS_TOFINO_BF_ASM_TOFINO_TERNARY_MATCH_H_ + +#include "backends/tofino/bf-asm/tables.h" + +class Target::Tofino::TernaryMatchTable : public ::TernaryMatchTable { + friend class ::TernaryMatchTable; + TernaryMatchTable(int line, const char *n, gress_t gr, Stage *s, int lid) + : ::TernaryMatchTable(line, n, gr, s, lid) {} + + void pass1() override; + void check_tcam_match_bus(const std::vector &); +}; + +class Target::Tofino::TernaryIndirectTable : public ::TernaryIndirectTable { + friend class ::TernaryIndirectTable; + TernaryIndirectTable(int line, const char *n, gress_t gr, Stage *s, int lid) + : ::TernaryIndirectTable(line, n, gr, s, lid) {} + + void pass1() override; +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_TOFINO_TERNARY_MATCH_H_ */ diff --git a/backends/tofino/bf-asm/top_level.cpp b/backends/tofino/bf-asm/top_level.cpp new file mode 100644 index 00000000000..d7d89d5f79c --- /dev/null +++ b/backends/tofino/bf-asm/top_level.cpp @@ -0,0 +1,126 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "top_level.h" + +#include "bfas.h" +#include "binary_output.h" +#include "bson.h" +#include "version.h" + +TopLevel *TopLevel::all = nullptr; + +TopLevel::TopLevel() { + BUG_CHECK(!all); + all = this; +} + +TopLevel::~TopLevel() { all = nullptr; } + +template +TopLevelRegs::TopLevelRegs() { + declare_registers(&this->mem_top, sizeof(this->mem_top), + [this](std::ostream &out, const char *addr, const void *end) { + out << "memories.top"; + this->mem_top.emit_fieldname(out, addr, end); + }); + declare_registers(&this->mem_pipe, sizeof(this->mem_pipe), + [this](std::ostream &out, const char *addr, const void *end) { + out << "memories.pipe"; + this->mem_pipe.emit_fieldname(out, addr, end); + }); + declare_registers(&this->reg_top, sizeof(this->reg_top), + [this](std::ostream &out, const char *addr, const void *end) { + out << "registers.top"; + this->reg_top.emit_fieldname(out, addr, end); + }); + declare_registers(&this->reg_pipe, sizeof(this->reg_pipe), + [this](std::ostream &out, const char *addr, const void *end) { + out << "registers.pipe"; + this->reg_pipe.emit_fieldname(out, addr, end); + }); +} + +template +TopLevelRegs::~TopLevelRegs() { + undeclare_registers(&this->mem_top); + undeclare_registers(&this->mem_pipe); + undeclare_registers(&this->reg_top); + undeclare_registers(&this->reg_pipe); +} + +template +void TopLevelRegs::output(json::map &ctxt_json) { + for (int i = 0; i < Target::NUM_PIPES(); i++) { + if (options.binary >= PIPE0 && options.binary != PIPE0 + i) { + this->mem_top.pipes[i].disable(); + this->reg_top.pipes[i].disable(); + } else { + this->mem_top.pipes[i].set("memories.pipe", &this->mem_pipe); + this->reg_top.pipes[i].set("regs.pipe", &this->reg_pipe); + } + } + if (options.condense_json) { + this->mem_top.disable_if_reset_value(); + this->mem_pipe.disable_if_reset_value(); + this->reg_top.disable_if_reset_value(); + this->reg_pipe.disable_if_reset_value(); + } + if (error_count == 0) { + if (options.gen_json) { + this->mem_top.emit_json(*open_output("memories.top.cfg.json")); + this->mem_pipe.emit_json(*open_output("memories.pipe.cfg.json")); + this->reg_top.emit_json(*open_output("regs.top.cfg.json")); + this->reg_pipe.emit_json(*open_output("regs.pipe.cfg.json")); + } + if (options.binary != NO_BINARY) { + auto binfile = open_output("%s.bin", TARGET::name); + json::map header; + header["asm_version"] = BFASM::Version::getVersion(); + if (ctxt_json["compiler_version"]) + header["compiler_version"] = ctxt_json["compiler_version"]->clone(); + header["reg_version"] = TARGET::top_level_regs::_regs_top::_reg_version; + if (ctxt_json["run_id"]) header["run_id"] = ctxt_json["run_id"]->clone(); + if (ctxt_json["program_name"]) + header["program_name"] = ctxt_json["program_name"]->clone(); + header["target"] = Target::name(); + header["stages"] = Target::NUM_MAU_STAGES(); + *binfile << binout::tag('H') << json::binary(header); + if (options.binary != ONE_PIPE) { + this->mem_top.emit_binary(*binfile, 0); + this->reg_top.emit_binary(*binfile, 0); + } else { + this->mem_pipe.emit_binary(*binfile, 0); + this->reg_pipe.emit_binary(*binfile, 0); + } + + if (options.multi_parsers) { + emit_parser_registers(this, *binfile); + } + } + } +} + +template +void TopLevelRegs::set_mau_stage(int stage, const char *file, + typename TARGET::mau_regs *regs, bool egress_only) { + BUG_CHECK(!egress_only, "separate egress MAU on target that does not support it"); + this->reg_pipe.mau[stage].set(file, regs); +} + +#define TOP_LEVEL_REGS(REGSET) template class TopLevelRegs; +FOR_ALL_REGISTER_SETS(TOP_LEVEL_REGS) diff --git a/backends/tofino/bf-asm/top_level.h b/backends/tofino/bf-asm/top_level.h new file mode 100644 index 00000000000..9c150607a71 --- /dev/null +++ b/backends/tofino/bf-asm/top_level.h @@ -0,0 +1,61 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_TOP_LEVEL_H_ +#define BACKENDS_TOFINO_BF_ASM_TOP_LEVEL_H_ + +#include "backends/tofino/bf-asm/json.h" +#include "backends/tofino/bf-asm/target.h" + +template +class TopLevelRegs; + +class TopLevel { + protected: + TopLevel(); + + public: + static TopLevel *all; + virtual ~TopLevel(); + virtual void output(json::map &) = 0; + static void output_all(json::map &ctxtJson) { all->output(ctxtJson); } + template + static TopLevelRegs *regs(); +#define SET_MAU_STAGE(TARGET) \ + virtual void set_mau_stage(int, const char *, Target::TARGET::mau_regs *, bool) { \ + BUG_CHECK(!"register mismatch"); \ + } + FOR_ALL_REGISTER_SETS(SET_MAU_STAGE) +}; + +template +class TopLevelRegs : public TopLevel, public REGSET::top_level_regs { + public: + TopLevelRegs(); + ~TopLevelRegs(); + + void output(json::map &); + void set_mau_stage(int stage, const char *file, typename REGSET::mau_regs *regs, + bool egress_only); +}; + +template +TopLevelRegs *TopLevel::regs() { + return dynamic_cast *>(all); +} + +#endif /* BACKENDS_TOFINO_BF_ASM_TOP_LEVEL_H_ */ diff --git a/backends/tofino/bf-asm/ubits.cpp b/backends/tofino/bf-asm/ubits.cpp new file mode 100644 index 00000000000..2b90cceb6d9 --- /dev/null +++ b/backends/tofino/bf-asm/ubits.cpp @@ -0,0 +1,83 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "ubits.h" + +#include +#include + +#include "lib/hex.h" +#include "lib/log.h" + +struct regrange { + const char *base; + size_t sz; + std::function fn; +}; + +static std::map *registers; + +static regrange *find_regrange(const void *addr_) { + const char *addr = static_cast(addr_); + if (registers) { + auto it = registers->upper_bound(addr); + if (it != registers->begin()) { + it--; + if (addr <= it->second.base + it->second.sz) return &it->second; + } + } + return nullptr; +} + +void declare_registers(const void *addr_, size_t sz, + std::function fn) { + const char *addr = static_cast(addr_); + if (!registers) registers = new std::map(); + registers->emplace(addr, regrange{addr, sz, fn}); +} + +void undeclare_registers(const void *addr_) { + const char *addr = static_cast(addr_); + registers->erase(addr); + if (registers->empty()) { + delete registers; + registers = 0; + } +} + +void print_regname(std::ostream &out, const void *addr, const void *end) { + if (auto rr = find_regrange(addr)) + rr->fn(out, static_cast(addr), end); + else + out << "???"; +} + +std::string string_regname(const void *addr, const void *end) { + std::stringstream tmp; + print_regname(tmp, addr, end); + return tmp.str(); +} + +void ubits_base::log(const char *op, uint64_t v) const { + if (LOGGING(1)) { + std::ostringstream tmp; + if (!find_regrange(this)) return; + LOG1(this << ' ' << op << ' ' << v + << (v != value ? tmp << " (now " << value << ")", tmp : tmp).str() << " (0x" + << hex(value) << ")"); + } +} diff --git a/backends/tofino/bf-asm/ubits.h b/backends/tofino/bf-asm/ubits.h new file mode 100644 index 00000000000..adc4248f955 --- /dev/null +++ b/backends/tofino/bf-asm/ubits.h @@ -0,0 +1,178 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_UBITS_H_ // NOLINT(build/header_guard) +#define BACKENDS_TOFINO_BF_ASM_UBITS_H_ + +#include +#include +#include + +#include +#include +#include + +#include "lib/bitvec.h" +#include "lib/log.h" + +using namespace P4; + +void declare_registers(const void *addr, size_t sz, + std::function fn); +void undeclare_registers(const void *addr); +void print_regname(std::ostream &out, const void *addr, const void *end); +std::string string_regname(const void *addr, const void *end); + +struct ubits_base; + +struct ubits_base { + uint64_t value, reset_value; + mutable bool read, write; + mutable bool disabled_; + + ubits_base() : value(0), reset_value(0), read(false), write(false), disabled_(false) {} + explicit ubits_base(uint64_t v) + : value(v), reset_value(v), read(false), write(false), disabled_(false) {} + operator uint64_t() const { + read = true; + return value; + } + bool modified() const { return write; } + void set_modified(bool v = true) { write = v; } + bool disabled() const { return disabled_; } + bool disable_if_unmodified() { return write ? false : (disabled_ = true); } + bool disable_if_zero() const { return value == 0 && !write; } + bool disable_if_reset_value() { return value == reset_value ? (disabled_ = true) : false; } + bool disable() const { + if (write) { + LOG1("ERROR: Disabling modified register in " << this); + return false; + } + disabled_ = true; + return disabled_; + } + void enable() const { disabled_ = false; } + void rewrite() { write = false; } + virtual uint64_t operator=(uint64_t v) = 0; + virtual const ubits_base &operator|=(uint64_t v) = 0; + virtual unsigned size() = 0; + void log(const char *op, uint64_t v) const; +}; + +inline std::ostream &operator<<(std::ostream &out, const ubits_base *u) { + print_regname(out, u, u + 1); + return out; +} + +template +struct ubits : ubits_base { + ubits() : ubits_base() {} + const ubits &check(std::true_type) { + if (value >= (uint64_t(1) << N)) { + LOG1("ERROR: out of range for " << N << " bits in " << this); + value &= (uint64_t(1) << N) - 1; + } + return *this; + } + const ubits &check(std::false_type) { return *this; } + const ubits &check() { + return check(std::integral_constant{}); + } + explicit ubits(uint64_t v) : ubits_base(v) { check(); } + ubits(const ubits &) = delete; + ubits(ubits &&) = default; + uint64_t operator=(uint64_t v) override { + if (disabled_) LOG1("ERROR: Writing disabled register value in " << this); + if (write) + LOG1((value != v ? "ERROR:" : "WARNING:") + << " Overwriting " << value << " with " << v << " in " << this); + value = v; + write = true; + log("=", v); + check(); + return v; + } + const ubits &operator=(const ubits &v) { + *this = v.value; + v.read = true; + return v; + } + const ubits_base &operator=(const ubits_base &v) { + *this = v.value; + v.read = true; + return v; + } + unsigned size() override { return N; } + const ubits &operator|=(uint64_t v) override { + if (disabled_) LOG1("ERROR: Writing disabled register value in " << this); + if (write && (v & value) != 0) + LOG1("WARNING: Overwriting " << value << " with " << (v | value) << " in " << this); + value |= v; + write = true; + log("|=", v); + return check(); + } + const ubits &operator|=(bitvec v) { + if (disabled_) LOG1("ERROR: Writing disabled register value in " << this); + if (v.ffs(N) > 0) + LOG1("ERROR: bitvec 0x" << v << " out of range for " << N << " bits in " << this); + uint64_t val = v.getrange(0, N); + if (write && (val & value) != 0) + LOG1("WARNING: Overwriting " << value << " with " << (val | value) << " in " << this); + value |= val; + write = true; + log("|=", val); + return check(); + } + const ubits &operator+=(uint64_t v) { + if (disabled_) LOG1("ERROR: Overwriting disabled register value in " << this); + value += v; + write = true; + log("+=", v); + return check(); + } + const ubits &operator^=(uint64_t v) { + if (disabled_) LOG1("ERROR: Overwriting disabled register value in " << this); + value ^= v; + write = true; + log("^=", v); + return check(); + } + const ubits &set_subfield(uint64_t v, unsigned bit, unsigned size) { + if (disabled_) LOG1("ERROR: Overwriting disabled register value in " << this); + uint64_t mask = (1ULL << size) - 1; + uint64_t oldv = (value >> bit) & mask; + if (bit + size > N) { + LOG1("ERROR: subfield " << bit << ".." << (bit + size - 1) << " out of range in " + << this); + } else if (write && oldv) { + LOG1((v != oldv ? "ERROR" : "WARNING") + << ": Overwriting subfield(" << bit << ".." << (bit + size - 1) << ") value " + << oldv << " with " << v << " in " << this); + } + if (v > mask) { + LOG1("ERROR: Subfield value " << v << " too large for " << size << " bits in " << this); + v &= mask; + } + value |= v << bit; + write = true; + log("|=", v << bit); + return check(); + } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_UBITS_H_ */ diff --git a/backends/tofino/bf-asm/vector.c b/backends/tofino/bf-asm/vector.c new file mode 100644 index 00000000000..7a4df9f36f7 --- /dev/null +++ b/backends/tofino/bf-asm/vector.c @@ -0,0 +1,116 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include "vector.h" + +struct raw_vector { + int capacity, size; + void *data; +}; + +int init_raw_vector(void *vec, size_t elsize, int mincap) +{ + struct raw_vector *v = (struct raw_vector *)vec; + v->size = 0; + v->capacity = 32 / elsize; + if (v->capacity < 4) v->capacity = 4; + if (v->capacity < mincap) v->capacity = mincap; + if (!(v->data = malloc(elsize * v->capacity))) + v->capacity = 0; + return v->data ? 0 : -1; +} + +int erase_raw_vector(void *vec, size_t elsize, int i, unsigned cnt) +{ + struct raw_vector *v = (struct raw_vector *)vec; + if (i < 0 && i >= v->size) return -1; + if (cnt == 0) cnt = 1; + if (i + cnt >= (unsigned)v->size) { + v->size = i; + } else { + char *p = (char *)v->data + i*elsize; + memmove(p, p + elsize*cnt, elsize * (v->size - i - cnt)); + v->size -= cnt; } + return 0; +} + +int expand_raw_vector(void *vec, size_t elsize) +{ + struct raw_vector *v = (struct raw_vector *)vec; + size_t ncap = v->capacity * 2U; + void *n; + if (ncap == 0) { + ncap = 32 / elsize; + if (ncap < 4) ncap = 4; } + if (ncap > (size_t)INT_MAX && (int)(ncap = INT_MAX) == v->capacity) { + errno = ERANGE; + return -1; } + if (!(n = realloc(v->data, elsize * ncap))) return -1; + v->capacity = ncap; + v->data = n; + return 0; +} + +int insert_raw_vector(void *vec, size_t elsize, int i, unsigned cnt) +{ + struct raw_vector *v = (struct raw_vector *)vec; + if (i < 0 && i > v->size) return -1; + if (cnt == 0) cnt = 1; + if (v->size + cnt > (unsigned)INT_MAX) { + errno = ERANGE; + return -1; } + if ((int)(v->size + cnt) > v->capacity) { + int newsz = v->size + cnt; + void *n; + if (newsz < v->capacity * 2) newsz = v->capacity * 2; + if (!(n = realloc(v->data, elsize * newsz))) return -1; + v->capacity = newsz; + v->data = n; } + if (i < v->size) { + char *p = (char *)v->data + i*elsize; + memmove(p + cnt*elsize, p, elsize * (v->size - i)); } + v->size += cnt; + return 0; +} + +int reserve_raw_vector(void *vec, size_t elsize, int size, int shrink) +{ + struct raw_vector *v = (struct raw_vector *)vec; + void *n; + if (v->capacity < size || (shrink && v->capacity > size)) { + if (!(n = realloc(v->data, elsize * size))) return -1; + v->capacity = size; + if (size < v->size) + v->size = size; + v->data = n; } + return 0; +} + +int shrink_raw_vector(void *vec, size_t elsize) +{ + struct raw_vector *v = (struct raw_vector *)vec; + void *n; + if (v->size < v->capacity) { + if (!(n = realloc(v->data, elsize * v->size))) return -1; + v->capacity = v->size; + v->data = n; } + return 0; +} diff --git a/backends/tofino/bf-asm/vector.h b/backends/tofino/bf-asm/vector.h new file mode 100644 index 00000000000..520fa636058 --- /dev/null +++ b/backends/tofino/bf-asm/vector.h @@ -0,0 +1,229 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_VECTOR_H_ +#define BACKENDS_TOFINO_BF_ASM_VECTOR_H_ + +/* C code and macros for VECTOR objects similar to C++ std::vector */ +#include + +#define CAT(A, B) A##B +#define VECTOR(NAME) CAT(NAME, _VECTOR) +#define DECLARE_VECTOR(TYPE, ...) \ + typedef struct CAT(TYPE, _VECTOR) { \ + int capacity, size; \ + TYPE *data; \ + __VA_ARGS__ \ + } CAT(TYPE, _VECTOR); +#define DECLARE_VECTOR2(NAME, ELTYPE, ...) \ + typedef struct CAT(NAME, _VECTOR) { \ + int capacity, size; \ + ELTYPE *data; \ + __VA_ARGS__ \ + } CAT(NAME, _VECTOR); + +#define RAW(X) X + +/* VECTOR constructors/destrutor + * can safely use memset(&vec, 0, sizeof(vec)) for initial capacity of 0, + * so global and calloc'd VECTORs are safe to use immediately + * local and malloc's VECTORs must be initialized before use, as they may + * contain garbage */ + +/* VECTOR_init(vec, capacity) + * initialize an empty vector with optional initial capacity + * VECTOR_initcopy(vec, from) + * initialize a vector as a copy of an existing vector + * VECTOR_initN(vec, val1, ...) + * initialize a vector with N values + * RETURNS + * 0 success + * -1 failure (out of memory), vector has capacity 0 + */ +#define VECTOR_init(vec, ...) init_raw_vector(&(vec), sizeof((vec).data[0]), RAW(__VA_ARGS__ + 0)) + +#define VECTOR_initcopy(vec, from) \ + (init_raw_vector(&(vec), sizeof((vec).data[0]), (from).size) \ + ? -1 \ + : (memcpy((vec).data, (from).data, ((vec).size = (from).size) * sizeof((vec).data[0])), \ + 0)) + +#define VECTOR_init1(vec, v1) \ + (init_raw_vector(&(vec), sizeof((vec).data[0]), 1) \ + ? -1 \ + : ((vec).size = 1, (vec).data[0] = (v1), 0)) +#define VECTOR_init2(vec, v1, v2) \ + (init_raw_vector(&(vec), sizeof((vec).data[0]), 2) \ + ? -1 \ + : ((vec).size = 2, (vec).data[0] = (v1), (vec).data[1] = (v2), 0)) +#define VECTOR_init3(vec, v1, v2, v3) \ + (init_raw_vector(&(vec), sizeof((vec).data[0]), 3) \ + ? -1 \ + : ((vec).size = 3, (vec).data[0] = (v1), (vec).data[1] = (v2), (vec).data[2] = (v3), 0)) +#define VECTOR_init4(vec, v1, v2, v3, v4) \ + (init_raw_vector(&(vec), sizeof((vec).data[0]), 4) \ + ? -1 \ + : ((vec).size = 3, (vec).data[0] = (v1), (vec).data[1] = (v2), (vec).data[2] = (v3), \ + (vec).data[3] = (v4), 0)) +#define VECTOR_init5(vec, v1, v2, v3, v4, v5) \ + (init_raw_vector(&(vec), sizeof((vec).data[0]), 5) \ + ? -1 \ + : ((vec).size = 3, (vec).data[0] = (v1), (vec).data[1] = (v2), (vec).data[2] = (v3), \ + (vec).data[3] = (v4), (vec).data[4] = (v5), 0)) + +#define EMPTY_VECTOR_INIT \ + { 0, 0, 0 } + +/* VECTOR_fini(vec) + * destroys a vector, freeing memory + * RETURNS + * void + */ +#define VECTOR_fini(vec) free((vec).data) + +/* VECTOR methods */ + +/* VECTOR_add(vec, val) + * add a single value to the end of a vector, increasing its size (and + * capacity if necessary) + * VECTOR_addcopy(vec, ptr, n) + * add a multiple value to the end of a vector, increasing its size (and + * capacity as necessary) + * VECTOR_copy(vec, from) + * replace a vector with a copy of another vector + * RETURNS + * 0 success + * -1 failure (out of memory), vector is unchanged + */ +#define VECTOR_add(vec, val) \ + (((vec).size == (vec).capacity && expand_raw_vector(&(vec), sizeof((vec).data[0]))) \ + ? -1 \ + : ((vec).data[(vec).size++] = (val), 0)) +#define VECTOR_addcopy(vec, ptr, n) \ + (VECTOR_reserve(vec, (vec).size + (n)) \ + ? -1 \ + : (memcpy((vec).data + (vec).size, (ptr), (n) * sizeof((vec).data[0])), \ + (vec).size += (n), 0)) +#define VECTOR_copy(vec, from) \ + (VECTOR_reserve(vec, (from).size) \ + ? -1 \ + : (memcpy((vec).data, (from).data, (from).size * sizeof((vec).data[0])), \ + (vec).size = (from).size, 0)) + +#define VECTOR_begin(vec) ((vec).data) +#define VECTOR_end(vec) ((vec).data + (vec).size) +#define VECTOR_empty(vec) ((vec).size == 0) + +/* VECTOR_erase(vec, idx, cnt) + * erase cnt elements from a vector (defaults to 1). If there are fewer + * than cnt elements in the vector after idx (inclusive), all will be + * erased + * RETURNS + * 0 success + * -1 idx is out of range + */ +#define VECTOR_erase(vec, idx, ...) \ + erase_raw_vector(&(vec), sizeof((vec).data[0]), idx, RAW(__VA_ARGS__ + 0)) + +/* VECTOR_expand(vec) + * increase the capacity of a vector, if possible. Does not affect the size + * RETURNS + * 0 success + * -1 failure (out of memory), vector is unchanged + */ +#define VECTOR_expand(vec) expand_raw_vector(&(vec), sizeof((vec).data[0])) + +/* VECTOR_foreach(vec, apply) + * apply a function or macro to every element of a vector + * not a valid expression, so doesn't really return anything + */ +#define VECTOR_foreach(vec, apply) \ + do { \ + for (int i_ = 0; i_ < (vec).size; i_++) { \ + apply((&(vec).data[i_])); \ + } \ + } while (0) + +/* VECTOR_insert(vec, idx, cnt) + * increase the size of a vector, adding uninitialized space at idx, and + * moving later elements of the vector up. cnt defaults to 1 + * RETURNS + * 0 success + * -1 failure -- idx is out of range[ERANGE], or out of memeory[ENOMEM] + * vector is unchanged + */ +#define VECTOR_insert(vec, idx, ...) \ + insert_raw_vector(&(vec), sizeof((vec).data[0]), idx, RAW(__VA_ARGS__ + 0)) + +#define VECTOR_pop(vec) ((vec).data[--(vec).size]) +#define VECTOR_push(vec, val) VECTOR_add(vec, val) + +/* VECTOR_reserve(vec, size, shrink) + * change the capacity of a vector. If shrink is false (default), will only + * increase the capacity. + * RETURNS + * 0 success + * -1 failure (out of memory), vector is unchanged + */ +#define VECTOR_reserve(vec, size, ...) \ + reserve_raw_vector(&(vec), sizeof((vec).data[0]), size, RAW(__VA_ARGS__ + 0)) + +/* VECTOR_resize(vec, size, shrink) + * change the size of a vector. If shrink is false (default), will only + * increase the capacity. + * RETURNS + * 0 success + * -1 failure (out of memory), vector is unchanged + */ +#define VECTOR_resize(vec, sz, ...) \ + (VECTOR_reserve(vec, sz, __VA_ARGS__) ? -1 : ((vec).size = (sz), 0)) + +/* VECTOR_shrink_to_fit(vec) + * reduce capacity to match the size, releasing memory if possible + * RETURNS + * 0 success + * -1 failure (realloc failed to shrink?), vector is unchanged + */ +#define VECTOR_shrink_to_fit(vec) shrink_raw_vector(&(vec), sizeof((vec).data[0])) + +/* VECTOR_terminate(vec, val) + * ensure that capacity is greater than size, and store val after + * the end of the vector. + * RETURNS + * 0 success + * -1 failure (out of memory), vector is unchanged + */ +#define VECTOR_terminate(vec, val) \ + (((vec).size == (vec).capacity && expand_raw_vector(&(vec), sizeof((vec).data[0]))) \ + ? -1 \ + : ((vec).data[(vec).size] = (val), 0)) +#define VECTOR_top(vec) ((vec).data[(vec).size - 1]) + +#ifdef __cplusplus +extern "C" { +#endif +extern int erase_raw_vector(void *vec, size_t elsize, int idx, unsigned cnt); +extern int expand_raw_vector(void *vec, size_t elsize); +extern int init_raw_vector(void *vec, size_t elsize, int mincap); +extern int insert_raw_vector(void *vec, size_t elsize, int idx, unsigned cnt); +extern int reserve_raw_vector(void *vec, size_t elsize, int size, int shrink); +extern int shrink_raw_vector(void *vec, size_t elsize); +#ifdef __cplusplus +} +#endif + +#endif /* BACKENDS_TOFINO_BF_ASM_VECTOR_H_ */ diff --git a/backends/tofino/bf-asm/version.h b/backends/tofino/bf-asm/version.h new file mode 100644 index 00000000000..d4fd9442c71 --- /dev/null +++ b/backends/tofino/bf-asm/version.h @@ -0,0 +1,44 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once +#include + +namespace BFASM { +// Singleton class representing the assembler version +class Version { + public: + static const std::string getVersion() { + static Version v; + return std::to_string(v.major) + "." + std::to_string(v.minor) + "." + + std::to_string(v.patch); + } + + private: + static constexpr int major = 1; + static constexpr int minor = 0; + static constexpr int patch = 1; + + Version() {} + + public: + // disable any other constructors + Version(Version const &) = delete; + void operator=(Version const &) = delete; +}; + +} // namespace BFASM diff --git a/backends/tofino/bf-asm/walle/README.md b/backends/tofino/bf-asm/walle/README.md new file mode 100644 index 00000000000..2e9b6a4b16a --- /dev/null +++ b/backends/tofino/bf-asm/walle/README.md @@ -0,0 +1,263 @@ +Walle - JSON-to-binary cruncher tool +==================================================== + +Walle serves as a layer of abstraction between the Tofino compiler and chip, +presenting the chip's memory hierarchy to the compiler as a set of JSON +structures that contain register/memory names and their values, while +abstracting away the actual addresses of these registers and the methods by +which they are programmed (DMA/direct PCIe writes/indirect instruction lists). + +Walle stores the exact structure of the chip's memory hierarchy in a +"chip.schema" file, which has to be generated from raw register data whenever +the chip registers change, and is then used afterwards to crunch compiler output +into a binary config file. It can also be used to generate "template" JSON that +looks like compiler output with the hardware's default values for all fields (in +most cases, 0). These templates are used by the compiler to enforce the correct +structure on its output data, and also in general should be regenerated whenever +the chip's registers change. + +Using Walle +---------------------------------------------------- +### Basic usage +#### Generating a schema +First, generate a chip schema. Invoke Walle with the `--generate-schema` flag +followed by the directory containing raw CSV files output by csrCompiler. If +the bfnregs repo is cloned into ~/bfnregs, this would be: + + ./walle.py --generate-schema ~/bfnregs/modules/tofino_regs/module/csv/ + +This will generate a file named `chip.schema` in the current working directory, +which is where it will look for the chip schema by default. The +`--schema SCHEMA-FILE` flag can be used to point Walle to a different schema, +or a different location to output the schema it is generating. + +#### Crunching compiler output +The most common use case for Walle is taking multiple config JSONs and +crunching them into a binary file Tofino's drivers can read. Just invoke +Walle with the names of all relevant JSON files, and optionally the name of +the file to output: + + ./walle.py cfg1.json cfg2.json cfg3.json -o chip_config.bin + +If the compiler was set up to dump all of its config output into an otherwise +empty directory, shell wildcards can be used to shorten this command. If that +dir is called 'cfgs', this would look like: + + ./walle.py cfgs/*.json -o chip_config.bin + +#### Generating templates +Walle can be used to generate blank register templates to be filled in by the +compiler. These templates are the JSON files that Walle would expect to see +given the current chip schema, but with all of the data set to the corresponding +hardware register's power-on default (in most cases, 0). + +To do so, Walle must be fed a JSON file enumerating the Semifore addressmap +objects it should generate templates for. This file must take this structure: + + { + "generated": { + "memories":[ + // memory addressmap names + ], + "regs":[ + // register addressmap names + ] + }, + "ignored": { + "memories":[ + // memory addressmap names + ], + "regs":[ + // register addressmap names + ] + } + } + +Names under 'memories' keys refer to addressmaps included by the top-level +'pipe_top_level.csr' file, while names under 'regs' keys refer to those included +by 'tofino.csr'. + +Address maps listed under 'generated' will cause a JSON template file to be +generated. Wherever that address map appears elsewhere in the hierarchy will be +replaced with a string reference to said JSON file. + +Address maps listed under 'ignored' will be replaced with a 0 when they appear +elsewhere in the hierarchy, and no JSON template file will be generated. + +Use the `--generate-templates` flag followed by the path to a file of the +format just discussed to generate template JSONs in a directory called +`templates` (which will be created in the working directory if it doesn't +already exist): + + ./walle.py --generate-templates templates_file + +These files can then be copied to the compiler's source tree. + +The templates themselves end in the extension '.cfg.json'. Walle will also +generate an identical hierarchy containing the bit-widths of each field, and +these files end in the extension '.size.json'. + +### Advanced usage +#### Directing the crunch process +Walle crunches by first loading all provided JSON files and verifying them +against its chip schema, and then drilling down from specified "top-level" +points in that cloud of JSON data. By default, these points are called +`memories.top` and `regs.top` and represent the memory and register hierarchies +of the chip, respectively. + +The `--top NAME` flag can be used to manually specify the top-level points to +drill down from. Multiple `--top NAME` flags can be included, and if any are +present the default top-level names are not used. + +This is equivalent to the default behavior: + + ./walle.py cfgs/*.json --top memories.top --top regs.top -o chip_config.bin + +One of them can be left out to only generate, say, only register configuration: + + ./walle.py cfgs/*.json --top regs.top -o chip_config.bin + +Walle calculates addresses relative to the top-level points specified, so it is +important that these points only ever refer to actual top-level points in the +Semifore register hierarchy. If it is desired to only generate, for example, +config data for the MAU or one pipe, the top-level JSON files should be +hand-tweaked to disable other parts of the configuration binary. See the +specification of the JSON config format for more details. + +#### Directing the template generation process +Walle generates a template file for each addressmap type specified in the +`template_objects` file which sits in the same folder as the Walle script. If +Walle encounters an instance of these addressmap types during template +generation, it leaves that tree of the JSON data unexpanded and replaces it +with a string indicating it expects a template to be plugged in to that +location. + +The type names of these addressmaps can be found by viewing the Semifore HTML +output of the reg and memory hierarchies and checking 'Header File Information' +at the top of the page. The 'Type Name' field that then appears within each +address map indicates the type which should be passed to Walle for +templatization. Semifore incorrectly capitlizes the first letter of the type +name - it should be all lowercase when specified to Walle. + +Note that the JSON fed to Walle does *not* have to follow the same template +structure as specified in the `template_objects` file - this templatization +control is just for convenience and reducing the file size of the generated +blank templates. + +Configuration JSON format +---------------------------------------------------- +Walle consumes JSON files that specify values to be written registers named in +the chip's Semifore specification. The structure of these JSON files directly +mirrors the structure found in the chip's Semifore specification. + +Each JSON file contains a dictionary that represents one instance of a Semifore +addressmap. Addressmap dictionaries' keys represent the Semifore names of +registers and nested addressmaps, while the values are either: + + * Dictionaries representing those objects + * Lists of dictionaries, in the case the object in question is an array + * Lists of lists (of lists of lists of lists of...) in the case the object + in question is an N-dimensional array + +Register dictionaries have field names as keys and integers as values. They +follow the same rules for lists in the event of a field array. The outer-most +dictionary also has these special Walle keys: + + * `_type` : The full type name that this file provides values for, of the + form `section.semifore_type`. For example, the parser's memories are of + type `memories.prsr_mem_rspec`, while its registers are of type + `regs.prsr_reg_rspec` + * `_name` : A name used to reference this file and its data elsewhere in + the config JSON + * `_schema_hash`: The MD5 hash of the raw Semifore output used to generate + the chip schema from which this file's structure was derived, used + to ensure the chip schema and JSON input match + * `_reg_version`: The git tag of the bfnregs repo commit used to generate + the chip schema from which this file's structure was derived. This value + isn't used by Walle itself, but is useful to manually determine which + version of the compiler or model a given config JSON was created for. + +At any point in the hierarchy, a register/addressmap value may be replaced +with: + + * A string containing the name of another JSON input file, which "stamps" + that other data down at this point in the memory hierarchy + * 0, indicating no write operation should be generated for the given object + +Fields cannot be "disabled with 0s" the way registers and addressmaps can, +since the register is the level of granularity at which the drivers write data. + +Config JSON can be hand-tweaked with 0's to produce a binary blob that +only writes to specific registers and leaves everything else alone, in order +to produce "initial boot" config blobs and then "soft reboot" config blobs. + +#### Error checking +Walle will fail to generate output if: + + * A field value ever exceeds the field's bit width as specified in the chip + schema + * A template is instantiated at a point in the hierarchy that does not + match the type expected by the chip schema (eg, naming an instance of + `memories.prsr_mem_rspec` in the top-level *register* JSON) + * A file's `_schema_hash` value does not match the hash stored in the chip + schema. This check can be suppressed with the flag + `--ignore-schema-mismatch`: + + ./walle.py cfgs/*.json --ignore-schema-mismatch -o chip_config.bin + + This flag is provided for development purposes, because even a small + change at one end of the register hierarchy (like correcting a typo in a + register *description*) will change the hash without actually affecting + the structure of the chip schema, and it would be a pain to have to + regenerate all templates and copy them over into the compiler source tree + just to get things working again. + + In the long run, however, this flag should not be used and schema hashes + should be consistent. + +Binary blob format +---------------------------------------------------- +Walle generates a sequence of binary write instructions for the driver which +are of the following types: + + * Direct register write - For 32 bit registers that can be addressed + directly from the PCIe bus, a simple address-data pair of the form: + + 4 bytes: "\0\0\0R" + 4 bytes: 32-bit PCIe address + 4 bytes: Data + All fields little-endian + + * Indirect register write - For registers wider than 32 bits, or to compose + many direct register writes into one write list that can be transmitted + across PCIe as a single transaction. + + TODO: not actually implemented driver-or-Walle-side yet, since the model + doesn't currently support indirect reg addressing + + * DMA block write - Automatically chosen for arrays of registers larger + than 4 elements, a base address and block of data: + + 4 bytes: "\0\0\0D" + 8 bytes: 42-bit chip address + 4 bytes: Bit-length of word + 4 bytes: Number of words + Following: Data, in 32-bit word chunks + All fields little-endian + + TODO: currently only registers in the 'memories' half of the hierarchy + will get rolled into DMA blocks, again because the model doesn't + currently support indirect reg addressing. Eventually this won't + be a problem + +The driver should execute these instructions in the order they are read. The +binary blob has no header or structure aside from these write instructions, +so multiple binary files can be concatenated together or split into parts as +needed. + +Walle can be optionally instructed to generate a direct register write to +address 0xFFFFFFFF at the very end of the file to signify to the model the end +of configuration data. This is enabled with the flag `--append-sentinel`: + + ./walle.py cfgs/*.json --append-sentinel -o chip_config.bin + diff --git a/backends/tofino/bf-asm/walle/chip.py b/backends/tofino/bf-asm/walle/chip.py new file mode 100644 index 00000000000..8156a5ee62c --- /dev/null +++ b/backends/tofino/bf-asm/walle/chip.py @@ -0,0 +1,183 @@ +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 + +""" +TODO: document this file +""" +import struct +from copy import copy + + +class chip_object(object): + """ + TODO: docstring + """ + + def __init__(self, addr, src_key): + self.addr = addr + self.src_key = src_key + + def add_offset(self, offset): + self.addr += offset + + +class direct_reg(chip_object): + """ + A single register write operation, of the format: + + 4 bytes: "\0\0\0R" + 4 bytes: 32-bit PCIe address + 4 bytes: Data + + All fields little-endian + """ + + def __init__(self, addr, value, src_key=None): + chip_object.__init__(self, addr, src_key) + self.value = struct.pack("= 0: + byte_str += ( + "\0\0\0R" + + struct.pack("" + + def deepcopy(self): + new = copy(self) + new.values = new.values[:] + return new + + def bytes(self): + if self.width > 128: + # FIXME: this only works cleanly if width is a multiple of 128, can it be otherwise? + if self.width % 128 != 0: + sys.stderr.write("ERROR: register width %d not a multiple of 128" % self.width) + sys.exit(1) + new_values = [] + for value in self.values: + for chunk in range(0, self.width // 8, 16): + new_values.append(value[chunk : chunk + 16].rjust(128 // 8, chr(0))) + self.values = new_values + self.width = 128 + + if self.is_reg: + op_type = "\0\0\0B" + else: + op_type = "\0\0\0D" + bytestr = ( + op_type + + struct.pack(" 0: + outfile.write(indent) + outfile.write("// ") + outfile.write(' ' * pfx) + line = line.lstrip() + pt = line.rfind(' ', 0, maxlen - pfx - len(indent)) + if len(line) + len(indent) + pfx > maxlen: + if pt > 0: + outfile.write(line[0:pt]) + line = line[pt + 1 :] + else: + # line is longer than maxlen, but has no spaces. So don't split it or + # subsequent lines (this is probably a wide table with columns) + maxlen = len(line) + len(indent) + pfx + outfile.write(line) + line = '' + else: + outfile.write(line) + line = '' + outfile.write("\n") + + +def indent_comment(indent, text): + if not text: + return text + if text[-2:-1] != '\n': + text = text + '\n' + return indent + text.replace('\n', indent + '\n') + + +######################################################################## +## Structures + + +class CsrException(Exception): + """ + An exception that occured while crunching malformed data according to the + given chip schema. An exception handler in walle.py will catch these + exceptions and then attempt to print a "traceback" recording where in the + chip schema the exception occured. + + This traceback is maintained by keeping a local variable called 'path' in + any scope where a CsrException may be raised. 'path' is a list of + traversal_history objects. + """ + + pass + + +class traversal_history(object): + """ + A class which records part of Walle's traversal through input JSON data. A + single traversal_history corresponds to the traversal of one JSON file. + + Attributes: + @attr template_name + The value at the top level "_name" key of the file currently being + processed + @attr path + An ordered list of keys and list indices visited in the current + traversal of this file. + If we drill down into a dictionary at key "a", push "a" onto the list. + If we access elements of a list, push a tuple recording the index at + each dimension of the list and then the list name. Eg, + [(4,),"a"] to represent a[4] + [(1,2,3),"b"] to represent b[1][2][3] + """ + + def __init__(self, template_name): + self.template_name = template_name + self.path = [] + + +class binary_cache(object): + """ + A class used to store flat chip_obj lists, each corresponding to one JSON + file. The "_name" at the top of each JSON is used to index into the cache. + Requesting a JSON file from here will crunch it into binary if it hasn't + been already. + """ + + def __init__(self, schema): + self.schema = schema + self.templates = {} + self.binary_templates = {} + + def get_type(self, key): + """ + Get the addressmap name that the binary data at the given key + corresponds to, of the form "section_name.addressmap_name". + """ + return self.templates[key]["_type"] + + def get_data(self, key, path=None): + """ + Return a list of objects inheriting from chip.chip_obj, representing + the write operations that must be done in the hardware to program a + hardware object of the given JSON file's "_type" + + These lists are a deep copy of the one stored internally, so it is safe + to modify them + """ + if path == None: + path = [] + + if key not in self.binary_templates: + obj_section, obj_type = self.templates[key]["_type"].split(".") + obj_schema = self.schema[obj_section][obj_type] + # TODO: There used to be a first deepcopy here, before the one in the + # return statement. 99% sure it was unnecessary, but if things + # seem broken revisit this + path.append(traversal_history(key)) + self.binary_templates[key] = obj_schema.generate_binary(self.templates[key], self, path) + path.pop() + + binary_data_copy = [] + for chip_obj in self.binary_templates[key]: + binary_data_copy.append(chip_obj.deepcopy()) + return binary_data_copy + + +class csr_object(object): + """ + Base class for objects in a Semifore register hierarchy + + A Semifore object array is still represented as one csr_object instance, + albeit with a "count" attribute expressing how many hardware objects this + Semifore node actually corresponds to. + + Since all objects in Semifore have names and can be arrays, all csr_objects + have name and count attributes. Since arrays can be multidimensional, + count is _always_ a tuple of array sizes, even if that tuple has only one + element. Single elements will have a count of (1,). + """ + + def __init__(self, name, count): + self.name = name + self.count = count + + def replicate(self, templatized_self): + if self.count != (1,): + last_dim_obj = templatized_self + for dim in reversed(self.count): + last_dim_obj = [copy.deepcopy(last_dim_obj) for _ in range(0, dim)] + return last_dim_obj + else: + return templatized_self + + def is_field(self): + return False + + def is_singleton(self): + return False + + def singleton_obj(self): + return self + + def contains_reference(self): + return False + + +class csr_composite_object(csr_object): + """ + Base class for composite (non-leaf) CSR objects. All such objects have one + or more children + """ + + def __init__(self, name, count): + csr_object.__init__(self, name, count) + + def children(self): + raise CsrException("Unimplemented abstract method for " + type(self)) + + def check_child_rewrite(self, child, args): + """ + Check to see if the child needs to be rewritten per something in the args, and, if + so, rewrite it and return it. We call this a fair amount with the same child (so + work is duplicated); if that is a problem we should memoize. + """ + if self.name not in args.rewrite: + return child + if self.name not in args.rewrite_used: + args.rewrite_used[self.name] = {} + rewrite = args.rewrite[self.name] + if child.name not in rewrite: + return child + args.rewrite_used[self.name][child.name] = True + rewrite = rewrite[child.name] + if rewrite[0] == 'delete': + return None + elif rewrite[0] == 'scan_chain': + description = '' + offset = child.offset + while not isinstance(child, reg): + if hasattr(child, 'description') and child.description: + description = description + child.description + if description[-2:-1] != '\n': + description = description + '\n' + if len(child.children()) != 1 or child.count != (1,): + raise CsrException( + "unknown rewrite '%s' for %s.%s" + % (rewrite[child.name][0], name, child.name) + ) + child = child.children()[0] + if hasattr(child, 'description') and child.description: + description = description + child.description + if description[-2:-1] != '\n': + description = description + '\n' + child = scanset_reg( + rewrite[1], tuple(rewrite[2]), offset, child.width, self, child.fields + ) + child.description = description + if len(child.fields) == 1: + child.fields = copy.copy(child.fields) + child.fields[0].name = rewrite[1] + if len(rewrite) > 3: + # import pdb; pdb.set_trace() + def find_scan_sel(obj, name, offset): + desc = '' + for ch in obj.children(): + pfx = len(ch.name) + 1 + if ch.name + "." == name[:pfx]: + return find_scan_sel(ch, name[pfx:], offset + ch.offset) + if ch.name == name: + offset = offset + ch.offset + desc_hdr = ch.name + ':\n' + if hasattr(ch, 'description') and ch.description: + desc = desc + desc_hdr + indent_comment(' ', ch.description) + desc_hdr = '' + if ( + len(ch.fields) == 1 + and hasattr(ch.fields[0], 'description') + and ch.fields[0].description + ): + desc = ( + desc + desc_hdr + indent_comment(' ', ch.fields[0].description) + ) + return offset, desc + return None, None + + offset, desc = find_scan_sel(self, rewrite[3], 0) + if offset is None: + raise CsrException( + "No " + rewrite[3] + " in " + self.name + " for scan selector" + ) + child.sel_offset = offset + child.description = child.description + desc + return child + else: + raise CsrException( + "unknown rewrite '%s' for %s.%s" % (rewrite[el.name][0], name, el.name) + ) + return None + + def contains_reference(self): + """ + return true if this object (directly or indirectly) contains a reference to + a top_level object + """ + if not hasattr(self, 'contains_reference_cache'): + self.contains_reference_cache = False + for a in self.children(): + if a.top_level() or a.contains_reference(): + self.contains_reference_cache = True + break + return self.contains_reference_cache + + def gen_method_declarator(self, outfile, args, rtype, classname, name, argdecls, suffix): + outfile.write("%s " % rtype) + if args.gen_decl == 'defn': + outfile.write("%s::" % classname) + outfile.write("%s(" % name) + first = True + for a in argdecls: + if not first: + outfile.write(", ") + if type(a) is tuple: + outfile.write(a[0]) + if args.gen_decl != 'defn': + outfile.write(" = " + a[1]) + else: + outfile.write(a) + first = False + outfile.write(")") + if suffix != '': + outfile.write(" %s" % suffix) + if args.gen_decl == 'decl': + outfile.write(";\n") + return True + outfile.write(" {\n") + return False + + def gen_emit_method(self, outfile, args, schema, classname, name, nameargs, indent): + outfile.write(indent) + argdecls = ["std::ostream &out"] + for idx, argtype in enumerate(nameargs): + argdecls.append("%sna%d" % (argtype, idx)) + if args.gen_decl == 'defn': + argdecls.append("indent_t indent") + else: + argdecls.append("indent_t indent = indent_t(1)") + if self.gen_method_declarator( + outfile, args, "void", classname, "emit_json", argdecls, "const" + ): + return + indent += " " + if args.enable_disable and not self.top_level(): + outfile.write("%sif (disabled_) {\n" % indent) + outfile.write('%s out << "0";\n' % indent) + outfile.write("%s return; }\n" % indent) + outfile.write("%sout << '{' << std::endl;\n" % indent) + first = True + if self.top_level(): + if len(nameargs) > 0: + tmplen = len(name) + len(nameargs) * 10 + 32 + outfile.write("%schar tmp[%d];\n" % (indent, tmplen)) + outfile.write('%ssnprintf(tmp, sizeof(tmp), "%s"' % (indent, name)) + for i in range(0, len(nameargs)): + outfile.write(", na%d" % i) + outfile.write(");\n") + outfile.write('%sout << indent << "\\"_name\\": \\"" << tmp << "\\"";\n' % indent) + else: + outfile.write('%sout << indent << "\\"_name\\": \\"%s\\"";\n' % (indent, name)) + outfile.write('%sout << ", \\n";\n' % indent) + outfile.write( + '%sout << indent << "\\"_reg_version\\": \\"%s\\"";\n' + % (indent, schema["_reg_version"]) + ) + outfile.write('%sout << ", \\n";\n' % indent) + outfile.write( + '%sout << indent << "\\"_schema_hash\\": \\"%s\\"";\n' + % (indent, schema["_schema_hash"]) + ) + outfile.write('%sout << ", \\n";\n' % indent) + outfile.write( + '%sout << indent << "\\"_section\\": \\"%s\\"";\n' % (indent, self.parent) + ) + outfile.write('%sout << ", \\n";\n' % indent) + outfile.write( + '%sout << indent << "\\"_type\\": \\"%s.%s\\"";\n' + % (indent, self.parent, self.name) + ) + outfile.write('%sout << ", \\n";\n' % indent) + outfile.write( + '%sout << indent << "\\"_walle_version\\": \\"%s\\"";\n' + % (indent, schema["_walle_version"]) + ) + first = False + for a in sorted(self.children(), key=lambda a: a.name): + a = self.check_child_rewrite(a, args) + if a is None: + continue + if not first: + outfile.write('%sout << ", \\n";\n' % indent) + outfile.write('%sout << indent << "\\"%s\\": ";\n' % (indent, a.name)) + if a.disabled() and not args.expand_disabled_vector: + outfile.write('%sout << "0";\n' % indent) + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if a.count != (1,): + for idx_num, idx in enumerate(a.count): + if args.enable_disable and args.checked_array and not a.disabled(): + outfile.write("%sif (%s" % (indent, field_name)) + for i in range(0, idx_num): + outfile.write("[i%d]" % i) + outfile.write(".disabled()) {\n") + outfile.write('%s out << "0";\n' % indent) + outfile.write("%s} else {\n" % indent) + indent += ' ' + outfile.write('%sout << "[\\n" << ++indent;\n' % indent) + outfile.write( + '%sfor (int i%d = 0; i%d < %d; i%d++) { \n' + % (indent, idx_num, idx_num, idx, idx_num) + ) + outfile.write('%s if (i%d) out << ", \\n" << indent;\n' % (indent, idx_num)) + indent += ' ' + single = a.singleton_obj() + if single != a: + outfile.write( + '%sout << "{\\n" << indent+1 << "\\"%s\\": " << %s' + % (indent, a.name, field_name) + ) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(" << '\\n';\n") + outfile.write("%sout << indent << '}';\n" % indent) + elif a.is_field() or a.top_level(): + outfile.write("%sout << %s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(";\n") + elif a.disabled(): + outfile.write('%sout << 0;\n' % indent) + else: + outfile.write("%s%s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(".emit_json(out, indent+1);\n") + if a.count != (1,): + for i in range(0, len(a.count)): + indent = indent[2:] + outfile.write("%s}\n" % indent) + outfile.write("%sout << '\\n' << --indent << ']';\n" % indent) + if args.enable_disable and args.checked_array and not a.disabled(): + indent = indent[2:] + outfile.write("%s}\n" % indent) + first = False + outfile.write("%sout << '\\n' << indent-1 << \"}\";\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_uint_conversion(self, outfile, args, classname, indent): + pass + + def gen_emit_binary_method(self, outfile, args, classname, indent): + def child_name(child): + name = child.name + if name in args.cpp_reserved: + name += '_' + return name + + def field_name(child): + name = child_name(child) + if child.count != (1,): + for i in range(0, len(child.count)): + name += "[j%d]" % i + return name + + outfile.write(indent) + if self.gen_method_declarator( + outfile, + args, + "void", + classname, + "emit_binary", + ["std::ostream &out", "uint64_t a"], + "const", + ): + return + indent += " " + if args.enable_disable: + outfile.write("%sif (disabled_) return;\n" % indent) + root_parent = self.parent + while type(root_parent) is not str: + root_parent = root_parent.parent + addr_decl = "auto " + for a in self.children(): + addr_var = "a" + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + if root_parent == "memories": + indirect = True + width_unit = 128 + address_unit = 16 + type_tag = 'D' + elif a.name in args.write_dma: + indirect = True + width_unit = 32 + address_unit = 1 + type_tag = 'B' + else: + indirect = False + width_unit = 32 + address_unit = 1 + type_tag = 'R' + if isinstance(a, scanset_reg): + a.output_binary(outfile, args, indent, address_unit, width_unit) + continue + if args.enable_disable: + outfile.write("%sif (!%s.disabled()) {\n" % (indent, child_name(a))) + indent += ' ' + if indirect and type(a) is reg: + outfile.write( + "%sout << binout::tag('%s') << binout::byte8" % (indent, type_tag) + + "(a + 0x%x) << binout::byte4(%d) << binout::byte4(%d);\n" + % ( + a.offset // address_unit, + width_unit, + product(a.count) * a.width // width_unit, + ) + ) + if a.count != (1,): + if args.enable_disable: + outfile.write("%sauto addr = a;\n" % indent) + else: + outfile.write("%s%saddr = a;\n" % (indent, addr_decl)) + addr_decl = "" + addr_var = "addr" + for idx_num, idx in enumerate(a.count): + outfile.write( + '%sfor (int j%d = 0; j%d < %d; j%d++) { \n' + % (indent, idx_num, idx_num, idx, idx_num) + ) + indent += ' ' + single = a.singleton_obj() + if not indirect and single != a: + # FIXME -- should check each element being written singly to see if its + # disabled and not write it if so? The generate_binary code does not + # do that, so we don't emit C++ code to do it either. + # Would it cause problems for register arrays that are actually wideregs + # under the hood? See 3.2.1.1 in the Tofino Switch Architecture doc. + outfile.write("%sif (!%s.disabled()) {\n" % (indent, field_name(a))) + indent += ' ' + if single.msb >= 64: + for w in ( + list(range(single.msb // 32, -1, -1)) + if args.reverse_write + else list(range(0, single.msb // 32 + 1)) + ): + outfile.write( + "%sout << binout::tag('R') << binout::byte4" % indent + + "(%s + 0x%x) << binout::byte4(%s.value.getrange(%d, 32));\n" + % (addr_var, a.offset // address_unit + 4, field_name(a), w * 32) + ) + else: + if not args.reverse_write: + outfile.write( + "%sout << binout::tag('R') << binout::byte4" % indent + + "(%s + 0x%x) << binout::byte4(%s);\n" + % (addr_var, a.offset // address_unit, field_name(a)) + ) + if single.msb >= 32: + outfile.write( + "%sout << binout::tag('R') << binout::byte4" % indent + + "(%s + 0x%x) << binout::byte4(%s >> 32);\n" + % (addr_var, a.offset // address_unit + 4, field_name(a)) + ) + if args.reverse_write: + outfile.write( + "%sout << binout::tag('R') << binout::byte4" % indent + + "(%s + 0x%x) << binout::byte4(%s);\n" + % (addr_var, a.offset // address_unit, field_name(a)) + ) + indent = indent[2:] + outfile.write("%s}\n" % indent) + else: + outfile.write(indent) + if a.top_level(): + outfile.write("if (%s)" % field_name(a)) + outfile.write(field_name(a)) + outfile.write("->" if a.top_level() else ".") + outfile.write( + "emit_binary(out, %s + 0x%x);\n" % (addr_var, a.offset // address_unit) + ) + if a.count != (1,): + outfile.write("%saddr += 0x%x;\n" % (indent, a.address_stride() // address_unit)) + for i in range(0, len(a.count)): + indent = indent[2:] + outfile.write("%s}\n" % indent) + if args.enable_disable: + indent = indent[2:] + outfile.write("%s}\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_input_binary_method(self, outfile, args, classname, indent): + def child_name(child): + name = child.name + if name in args.cpp_reserved: + name += '_' + return name + + def field_name(child): + name = child_name(child) + if child.count != (1,): + for i in range(0, len(child.count)): + name += "[i%d]" % i + return name + + outfile.write(indent) + if self.gen_method_declarator( + outfile, + args, + "void", + classname, + "input_binary", + ["uint64_t a", "char t", "uint32_t *d", "size_t l"], + "", + ): + return + indent += " " + root_parent = self.parent + while type(root_parent) is not str: + root_parent = root_parent.parent + if root_parent == "memories": + width_unit = 128 + address_unit = 16 + outfile.write("%sBUG_CHECK(t == 'D', \"'%%c' tag in memories\", t);\n" % indent) + else: + width_unit = 32 + address_unit = 1 + outfile.write( + "%sBUG_CHECK(t != 'D', \"'%%c' tag in %s\", t);\n" % (indent, root_parent) + ) + first = True + for a in sorted(self.children(), key=lambda a: -a.offset): + outfile.write( + '%s%sif (a >= 0x%x) {\n' + % (indent, '' if first else '} else ', a.offset // address_unit) + ) + indent += ' ' + t = a + a = self.check_child_rewrite(a, args) + if a is None: + outfile.write( + '%sstd::cerr << "Address in ignored reg " << ' % indent + + 'string_regname(this, this+1) << ".%s" << std::endl;\n' % t.name + ) + elif isinstance(a, scanset_reg): + a.input_binary(outfile, args, indent, address_unit, width_unit) + elif a.disabled(): + outfile.write( + '%sstd::cerr << "Address in disabled reg " << ' % indent + + 'string_regname(this, this+1) << ".%s" << std::endl;\n' % a.name + ) + else: + outfile.write('%sa -= 0x%x;\n' % (indent, a.offset // address_unit)) + idx_suffix = '' + if a.count != (1,): + outfile.write( + '%ssize_t idx = a / 0x%x;\n' % (indent, a.address_stride() // address_unit) + ) + for idx_num, idx in reversed(list(enumerate(a.count))): + outfile.write('%sint i%d = idx %% %d;\n' % (indent, idx_num, idx)) + if idx_num == 0: + outfile.write( + '%sBUG_CHECK(idx < %d, "Index too' % (indent, idx) + + ' large for %%s.%s[%%zd]",\n' % a.name + ) + outfile.write( + '%s ' % indent + + 'string_regname(this, this+1).c_str(), idx);\n' + ) + else: + outfile.write('%sidx /= %d;\n' % (indent, idx)) + idx_suffix = ('[i%d]' % idx_num) + idx_suffix + outfile.write( + '%sa -= 0x%x * %s' + % (indent, a.address_stride() // address_unit, '(' * (len(a.count) - 1)) + ) + for idx_num, idx in enumerate(a.count): + if idx_num != 0: + outfile.write('*%d + ' % idx) + outfile.write('i%d' % idx_num) + if idx_num != 0: + outfile.write(')') + outfile.write(';\n') + # outfile.write('%sstd::cout << string_regname(this, this+1) << ".%s' % + # (indent, a.name)) + # if a.count != (1,): + # for idx_num, idx in enumerate(a.count): + # outfile.write('[" << i%d << "]' % idx_num) + # outfile.write('" << std::endl;\n'); + access = '.' + if a.top_level(): + outfile.write('%sif (!%s) {\n' % (indent, field_name(a))) + outfile.write( + '%s auto *n = new %s;\n' % (indent, a.canon_name(a.map.object_name)[0]) + ) + outfile.write('%s auto fn = string_regname(this, this+1);\n' % indent) + outfile.write('%s declare_registers(n, sizeof(*n),\n' % indent) + outfile.write( + '%s [=](std::ostream &out, const char *addr, ' % indent + + 'const void *end) {\n' + ) + outfile.write('%s out << fn << ".%s' % (indent, child_name(a))) + if a.count != (1,): + for idx_num, idx in enumerate(a.count): + outfile.write('[" << i%d << "]' % idx_num) + outfile.write('";\n') + outfile.write('%s n->emit_fieldname(out, addr, end); });\n' % indent) + outfile.write( + '%s %s.set("%s", n); }\n' % (indent, field_name(a), child_name(a)) + ) + access = '->' + single = a.singleton_obj() + if single != a: + outfile.write( + "%sBUG_CHECK(t == 'R' && l == 1, \"tag '%%c' " % indent + + 'input to singleton %s", t);\n' % field_name(a) + ) + if single.msb >= 64: + outfile.write( + '%sBUG("widereg singleton %s not implemented");' + % (indent, field_name(a)) + ) + elif single.msb >= 32: + outfile.write( + '%sBUG_CHECK((a|4) == 4, "invalid addr %%zd in ' % indent + + '%s", a);\n' % field_name(a) + ) + outfile.write('%s%s.set_subfield(*d, a*8, 32);\n' % (indent, field_name(a))) + else: + outfile.write('%s%s = *d;\n' % (indent, field_name(a))) + elif isinstance(a, reg) and a.count != (1,): + outfile.write( + '%sBUG_CHECK(a == 0 || l == 1, "%%" PRIu64 " off ' % indent + + 'start of %s", a);\n' % a.name + ) + if a.width % 32 != 0: + raise CsrException("Register %s width not a multiple of 32" % a.name) + size = a.width // 32 + outfile.write('%swhile (l > %d) {\n' % (indent, size)) + indent += ' ' + outfile.write( + '%s%s%sinput_binary(a, t, d, %d);\n' % (indent, field_name(a), access, size) + ) + outfile.write('%sd += %d; l -= %d;\n' % (indent, size, size)) + for idx_num, idx in reversed(list(enumerate(a.count))): + outfile.write('%sif (++i%d >= %d) {\n' % (indent, idx_num, idx)) + indent += ' ' + if idx_num != 0: + outfile.write('%si%d = 0;\n' % (indent, idx_num)) + outfile.write( + '%sBUG("Too much data for %s");%s\n' + % (indent, a.name, ' }' * (len(a.count) + 1)) + ) + indent = indent[2 * (len(a.count) + 1) :] + outfile.write( + '%s%s%sinput_binary(a, t, d, l);\n' % (indent, field_name(a), access) + ) + else: + outfile.write( + '%s%s%sinput_binary(a, t, d, l);\n' % (indent, field_name(a), access) + ) + indent = indent[2:] + first = False + outfile.write('%s}\n' % indent) + + indent = indent[2:] + outfile.write('%s}\n' % indent) + + def gen_binary_offset_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator( + outfile, + args, + "uint64_t", + classname, + "binary_offset", + ["const void *addr", ("int *bit_offset", "0")], + "const", + ): + return + root_parent = self.parent + while type(root_parent) is not str: + root_parent = root_parent.parent + if root_parent == "memories": + width_unit = 128 + address_unit = 16 + else: + width_unit = 32 + address_unit = 1 + indent += " " + outfile.write("%suint64_t offset = 0;\n" % indent) + outfile.write("%sif (bit_offset) *bit_offset = 0;\n" % indent) + outfile.write("%sif (addr < this || addr >= this+1) " % indent) + if self.contains_reference(): + outfile.write("{\n") + indent += " " + for a in self.children(): + if a.disabled(): + continue + if not (a.top_level() or a.contains_reference()): + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if a.count != (1,): + for i, idx in enumerate(a.count): + outfile.write( + '%sfor (int i%d = 0; i%d < %d; i%d++) { \n' % (indent, i, i, idx, i) + ) + indent += ' ' + field_name += "[i%d]" % i + outfile.write( + "%sif ((offset = %s%sbinary_offset(addr, bit_offset)) != -1)\n" + % (indent, field_name, "->" if a.top_level() else ".") + ) + outfile.write("%s return offset + 0x%x" % (indent, a.offset // address_unit)) + if a.count != (1,): + for i, idx in enumerate(a.count): + stride = a.address_stride() // address_unit + for cnt in a.count[i + 1 :]: + stride = stride * cnt + outfile.write(" + i%d*0x%x" % (i, stride)) + outfile.write(";\n") + if a.count != (1,): + for i, idx in enumerate(a.count): + indent = indent[2:] + outfile.write("%s}\n" % indent) + + indent = indent[2:] + outfile.write("%s}\n" % indent) + else: + outfile.write("return -1;\n") + + first = True + for a in sorted(self.children(), key=lambda a: a.name, reverse=True): + if a.disabled(): + continue + if a.top_level(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + outfile.write(indent) + if first: + first = False + else: + outfile.write("} else ") + outfile.write("if (addr >= &%s) {\n" % field_name) + indent += " " + outfile.write("%soffset = 0x%x;\n" % (indent, a.offset // address_unit)) + if a.count != (1,): + for i, idx in enumerate(a.count): + outfile.write("%sif (addr < &%s[0]) return offset;\n" % (indent, field_name)) + outfile.write( + "%sauto i%d = ((char *)addr - (char *)&%s[0])/sizeof(%s[0]);\n" + % (indent, i, field_name, field_name) + ) + stride = a.address_stride() // address_unit + for cnt in a.count[i + 1 :]: + stride = stride * cnt + outfile.write("%soffset += i%d * 0x%x;\n" % (indent, i, stride)) + field_name += "[i%d]" % i + single = a.singleton_obj() + if not single.is_field() and not single.top_level(): + outfile.write( + "%soffset += %s.binary_offset(addr, bit_offset);\n" % (indent, field_name) + ) + indent = indent[2:] + if first: + outfile.write("%sreturn -1;\n" % indent) + else: + outfile.write("%s} else {\n" % indent) + outfile.write("%s return -1;\n" % indent) + outfile.write("%s}\n" % indent) + outfile.write("%sreturn offset;\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_fieldname_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator( + outfile, + args, + "void", + classname, + "emit_fieldname", + ["std::ostream &out", "const char *addr", "const void *end"], + "const", + ): + return + indent += " " + if not self.is_singleton(): + outfile.write("%sif ((void *)addr == this && end == this+1) return;\n" % indent) + first = True + for a in sorted(self.children(), key=lambda a: a.name, reverse=True): + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + outfile.write(indent) + if first: + first = False + else: + outfile.write("} else ") + outfile.write("if (addr >= (char *)&%s) {\n" % field_name) + indent += " " + outfile.write('%sout << ".%s";\n' % (indent, a.name)) + if a.count != (1,): + for i, idx in enumerate(a.count): + outfile.write( + "%sint i%d = (addr - (char *)&%s[0])/(int)sizeof(%s[0]);\n" + % (indent, i, field_name, field_name) + ) + if idx > 1: + outfile.write( + "%sif (i%d < 0 || (i%d == 0 && 1 + &%s" % (indent, i, i, field_name) + ) + outfile.write(" == end)) return;\n") + outfile.write("%sout << '[' << i%d << ']';\n" % (indent, i)) + field_name += "[i%d]" % i + single = a.singleton_obj() + if not single.is_field() and not single.top_level(): + outfile.write("%s%s.emit_fieldname(out, addr, end);\n" % (indent, field_name)) + indent = indent[2:] + if not first: + outfile.write("%s}\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_unpack_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator( + outfile, args, "int", classname, "unpack_json", ["json::obj *obj"], "" + ): + return + indent += " " + outfile.write("%sint rv = 0;\n" % indent) + outfile.write("%sjson::map *m = dynamic_cast(obj);\n" % indent) + outfile.write("%sif (!m) return -1;\n" % indent) + for a in sorted(self.children(), key=lambda a: a.name): + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + index_num = 0 + if a.count != (1,): + for index_num, idx in enumerate(a.count): + outfile.write( + "%sif (json::vector *v%s = dynamic_cast(" + % (indent, index_num) + ) + indent += " " + if index_num > 0: + outfile.write("(*v%d)[i%d].get()" % (index_num - 1, index_num - 1)) + else: + outfile.write('(*m)["%s"].get()' % a.name) + outfile.write("))\n") + outfile.write( + "%sfor (int i%d = 0; i%d < %d; i%d++)\n" + % (indent, index_num, index_num, idx, index_num) + ) + indent += " " + index_num = len(a.count) + single = a.singleton_obj() + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if single != a: + outfile.write("%sif (json::map *s = dynamic_cast(" % indent) + indent += " " + if index_num > 0: + outfile.write("(*v%d)[i%d].get()" % (index_num - 1, index_num - 1)) + else: + outfile.write('(*m)["%s"].get()' % a.name) + outfile.write("))\n") + outfile.write("%sif (json::number *n = dynamic_cast" % indent) + indent += " " + outfile.write('((*s)["%s"].get()))\n' % a.name) + outfile.write("%s%s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(" = n->val;\n") + indent = indent[2:] + outfile.write("%selse rv = -1;\n" % indent) + indent = indent[2:] + outfile.write("%selse rv = -1;\n" % indent) + elif not a.is_field() and not a.top_level(): + outfile.write("%srv |= %s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(".unpack_json(") + if index_num > 0: + outfile.write("(*v%d)[i%d].get()" % (index_num - 1, index_num - 1)) + else: + outfile.write('(*m)["%s"].get()' % a.name) + outfile.write(");\n") + else: + jtype = "json::number" + access = " = n->val" + if a.top_level(): + jtype = "json::string" + access = ".set(n->c_str(), nullptr)" + outfile.write("%sif (%s *n = dynamic_cast<%s *>(" % (indent, jtype, jtype)) + indent += " " + if index_num > 0: + outfile.write("(*v%d)[i%d].get()" % (index_num - 1, index_num - 1)) + else: + outfile.write('(*m)["%s"].get()' % a.name) + outfile.write(")) {\n") + outfile.write("%s%s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write("%s;\n" % access) + if a.top_level(): + outfile.write( + "%s} else if (json::number *n = dynamic_cast(" % indent[2:] + ) + if index_num > 0: + outfile.write("(*v%d)[i%d].get()" % (index_num - 1, index_num - 1)) + else: + outfile.write('(*m)["%s"].get()' % a.name) + outfile.write(")) {\n") + outfile.write("%sif (n->val) rv = -1;\n" % indent) + indent = indent[2:] + outfile.write("%s} else rv = -1;\n" % indent) + for i in range(0, index_num): + indent = indent[4:] + outfile.write("%selse rv = -1;\n" % indent) + outfile.write("%sreturn rv;\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_dump_unread_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator( + outfile, + args, + "void", + classname, + "dump_unread", + ["std::ostream &out", "prefix *pfx"], + "const", + ): + return + indent += " " + need_lpfx = True + for a in sorted(self.children(), key=lambda a: a.name): + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if not a.singleton_obj().is_field() and not a.top_level(): + if need_lpfx: + outfile.write("%sprefix lpfx(pfx, 0);\n" % indent) + need_lpfx = False + outfile.write('%slpfx.str = "%s' % (indent, a.name)) + if a.count != (1,): + for idx in a.count: + outfile.write("[%d]" % idx) + outfile.write('";\n') + outfile.write("%s%s" % (indent, field_name)) + if a.count != (1,): + for idx in a.count: + outfile.write("[0]") + outfile.write(".dump_unread(out, &lpfx);\n") + else: + outfile.write("%sif (!%s" % (indent, field_name)) + if a.count != (1,): + for idx in a.count: + outfile.write("[0]") + outfile.write('.read) out << pfx << ".%s' % a.name) + if a.count != (1,): + for idx in a.count: + outfile.write("[%d]" % idx) + outfile.write('" << std::endl;\n') + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_modified_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator(outfile, args, "bool", classname, "modified", [], "const"): + return + indent += " " + for a in sorted(self.children(), key=lambda a: a.name): + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if not args.checked_array: + if a.count != (1,): + for index_num, idx in enumerate(a.count): + outfile.write( + "%sfor (int i%d = 0; i%d < %d; i%d++)\n" + % (indent, index_num, index_num, idx, index_num) + ) + indent += " " + outfile.write("%sif (%s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(".modified()) return true;\n") + if a.count != (1,): + indent = indent[2 * len(a.count) :] + else: + outfile.write("%sif (%s.modified()) return true;\n" % (indent, field_name)) + outfile.write("%sreturn false;\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_set_modified_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator( + outfile, args, "void", classname, "set_modified", [("bool v", "true")], "" + ): + return + indent += " " + for a in sorted(self.children(), key=lambda a: a.name): + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if not args.checked_array: + if a.count != (1,): + for index_num, idx in enumerate(a.count): + outfile.write( + "%sfor (int i%d = 0; i%d < %d; i%d++)\n" + % (indent, index_num, index_num, idx, index_num) + ) + indent += " " + outfile.write("%s%s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(".set_modified(v);\n") + if a.count != (1,): + indent = indent[2 * len(a.count) :] + else: + outfile.write("%s%s.set_modified(v);\n" % (indent, field_name)) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_disable_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator(outfile, args, "bool", classname, "disable", [], ""): + return + indent += " " + outfile.write("%sbool rv = true;\n" % indent) + outfile.write("%sif (modified()) {\n" % indent) + outfile.write('%s std::clog << "ERROR: Disabling modified record ";\n' % indent) + outfile.write("%s print_regname(std::clog, this, this+1);\n" % indent) + outfile.write("%s std::clog << std::endl; \n" % indent) + outfile.write("%s return false; }\n" % indent) + for a in sorted(self.children(), key=lambda a: a.name): + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if not args.checked_array: + if a.count != (1,): + for index_num, idx in enumerate(a.count): + outfile.write( + "%sfor (int i%d = 0; i%d < %d; i%d++)\n" + % (indent, index_num, index_num, idx, index_num) + ) + indent += " " + outfile.write("%sif (%s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(".disable()) rv = true;\n") + if a.count != (1,): + indent = indent[2 * len(a.count) :] + else: + outfile.write("%sif (%s.disable()) rv = true;\n" % (indent, field_name)) + outfile.write("%sif (rv) disabled_ = true;\n" % indent) + outfile.write("%sreturn rv;\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_disable_if_reset_value_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator( + outfile, args, "bool", classname, "disable_if_reset_value", [], "" + ): + return + indent += " " + outfile.write("%sbool rv = true;\n" % indent) + for a in sorted(self.children(), key=lambda a: a.name): + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if not args.checked_array: + if a.count != (1,): + for index_num, idx in enumerate(a.count): + outfile.write( + "%sfor (int i%d = 0; i%d < %d; i%d++)\n" + % (indent, index_num, index_num, idx, index_num) + ) + indent += " " + outfile.write("%sif (!%s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(".disable_if_reset_value()) rv = false;\n") + if a.count != (1,): + indent = indent[2 * len(a.count) :] + else: + outfile.write( + "%sif (!%s.disable_if_reset_value()) rv = false;\n" % (indent, field_name) + ) + outfile.write("%sif (rv) disabled_ = true;\n" % indent) + outfile.write("%sreturn rv;\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_disable_if_unmodified_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator( + outfile, args, "bool", classname, "disable_if_unmodified", [], "" + ): + return + indent += " " + outfile.write("%sbool rv = true;\n" % indent) + for a in sorted(self.children(), key=lambda a: a.name): + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if not args.checked_array: + if a.count != (1,): + for index_num, idx in enumerate(a.count): + outfile.write( + "%sfor (int i%d = 0; i%d < %d; i%d++)\n" + % (indent, index_num, index_num, idx, index_num) + ) + indent += " " + outfile.write("%sif (!%s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(".disable_if_unmodified()) rv = false;\n") + if a.count != (1,): + indent = indent[2 * len(a.count) :] + else: + outfile.write( + "%sif (!%s.disable_if_unmodified()) rv = false;\n" % (indent, field_name) + ) + outfile.write("%sif (rv) disabled_ = true;\n" % indent) + outfile.write("%sreturn rv;\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_disable_if_zero_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator(outfile, args, "bool", classname, "disable_if_zero", [], ""): + return + indent += " " + outfile.write("%sbool rv = true;\n" % indent) + for a in sorted(self.children(), key=lambda a: a.name): + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if not args.checked_array: + if a.count != (1,): + for index_num, idx in enumerate(a.count): + outfile.write( + "%sfor (int i%d = 0; i%d < %d; i%d++)\n" + % (indent, index_num, index_num, idx, index_num) + ) + indent += " " + outfile.write("%sif (!%s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(".disable_if_zero()) rv = false;\n") + if a.count != (1,): + indent = indent[2 * len(a.count) :] + else: + outfile.write("%sif (!%s.disable_if_zero()) rv = false;\n" % (indent, field_name)) + outfile.write("%sif (rv && modified()) {\n" % indent) + outfile.write('%s std::clog << "Disabling modified zero record ";\n' % indent) + outfile.write("%s print_regname(std::clog, this, this+1);\n" % indent) + outfile.write("%s std::clog << std::endl;\n" % indent) + outfile.write("%s rv = false; }\n" % indent) + outfile.write("%sif (rv) disabled_ = true;\n" % indent) + outfile.write("%sreturn rv;\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_enable_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator(outfile, args, "void", classname, "enable", [], ""): + return + indent += " " + for a in sorted(self.children(), key=lambda a: a.name): + if a.disabled(): + continue + a = self.check_child_rewrite(a, args) + if a is None: + continue + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if not args.checked_array: + if a.count != (1,): + for index_num, idx in enumerate(a.count): + outfile.write( + "%sfor (int i%d = 0; i%d < %d; i%d++)\n" + % (indent, index_num, index_num, idx, index_num) + ) + indent += " " + outfile.write("%s%s" % (indent, field_name)) + if a.count != (1,): + for i in range(0, len(a.count)): + outfile.write("[i%d]" % i) + outfile.write(".enable();\n") + if a.count != (1,): + indent = indent[2 * len(a.count) :] + else: + outfile.write("%s%s.enable();\n" % (indent, field_name)) + outfile.write("%sdisabled_ = false;\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def find_alias_arrays(self, args, classname): + self.alias_arrays = [] + potential_alias_arrays = {} + array_match = re.compile('^(\w+)_(\d+)$') + for el in self.children(): + el = self.check_child_rewrite(el, args) + if el is None: + continue + m = array_match.match(el.name) + if m: + base = m.group(1) + idx = int(m.group(2)) + typ = el.type_name(args, classname, "_" + el.name) + if base in potential_alias_arrays: + pot = potential_alias_arrays[base] + if typ != pot['type']: + pot['ok'] = False + if idx > pot['max']: + pot['max'] = idx + pot['mask'] |= 2**idx + else: + potential_alias_arrays[m.group(1)] = { + "ok": True, + "max": idx, + "mask": 2**idx, + "type": typ, + } + for base, pot in potential_alias_arrays.items(): + if pot['ok'] and pot['max'] > 0 and pot['mask'] == 2 ** (pot['max'] + 1) - 1: + self.alias_arrays.append((base, pot['type'], pot['max'] + 1)) + + def need_ctor(self): + if self.alias_arrays: + return True + for el in self.children(): + el = self.check_child_rewrite(el, args) + if el is None: + continue + s = el.singleton_obj() + if s.is_field() and s.default and s.default != 0: + if type(s.default) is tuple: + for v in s.default: + if v != 0: + return True + else: + return True + return False + + def gen_ctor(self, outfile, args, namestr, indent): + outfile.write("%s%s() : " % (indent, namestr)) + first = True + if args.enable_disable: + outfile.write("disabled_(false)") + first = False + for el in sorted(self.children(), key=lambda a: a.name): + if el.disabled(): + continue + el = self.check_child_rewrite(el, args) + if el is None: + continue + s = el.singleton_obj() + if s.is_field() and s.default and s.default != 0: + if type(s.default) is tuple: + ok = True + for v in s.default: + if v != 0: + ok = False + break + if ok: + continue + if first: + first = False + else: + outfile.write(", ") + outfile.write(el.name) + if el.name in args.cpp_reserved: + outfile.write('_') + if type(s.default) is tuple and len(s.default) > 1: + outfile.write("({ ") + for v in s.default: + outfile.write(str(v) + ", ") + outfile.write("})") + else: + outfile.write('(%d)' % s.default) + if hasattr(self, 'alias_arrays'): + for alias in self.alias_arrays: + outfile.write(",\n%s %s({" % (indent, alias[0])) + for idx in range(0, alias[2]): + if idx > 0: + outfile.write(",") + outfile.write(" &this->%s_%d" % (alias[0], idx)) + outfile.write(" })") + outfile.write(' {}\n') + + def canon_name(self, name): + namestr = '' + nameargs = [] + format = False + islong = False + for ch in name: + if format: + if ch in string.digits: + continue + elif ch == 'l': + islong = True + continue + elif ch == 'd' or ch == 'i' or ch == 'u' or ch == 'x': + nameargs.append('long ' if islong else 'int ') + elif ch == 'e' or ch == 'f' or ch == 'g': + nameargs.append('double ' if islong else 'float ') + elif ch == 's': + nameargs.append('const char *') + else: + raise CsrException("unknown conversion '%%%s' in name\n" % ch) + format = False + elif ch in string.ascii_letters or ch in string.digits or ch == '_': + namestr += ch + elif ch == '.': + namestr += '_' + elif ch == '%': + format = True + islong = False + else: + raise CsrException("invalid character '%s' in name\n" % ch) + return namestr, nameargs + + def type_name(self, args, parent, name): + namestr, nameargs = self.canon_name(name) + # FIXME -- should be checking for global names in args.global? + classname = parent + if classname != '': + classname += '::' + classname += namestr + rv = 'struct ' + classname + if self.count != (1,): + if args.checked_array: + for idx in self.count: + rv = "checked_array<%d, %s>" % (idx, rv) + else: + for idx in self.count: + rv = "%s[%d]" % (rv, idx) + return rv + + def gen_type(self, outfile, args, schema, parent, name, indent): + namestr, nameargs = self.canon_name(name) + classname = parent + if classname != '': + classname += '::' + classname += namestr + if args.alias_array and not hasattr(self, 'alias_arrays'): + self.find_alias_arrays(args, classname) + if args.gen_decl != 'defn': + indent += " " + outfile.write("struct %s {\n" % namestr) + if args.enable_disable: + outfile.write("%sbool disabled_;\n" % indent) + outfile.write("%sbool disabled() const { return disabled_; }\n" % indent) + if args.enable_disable or self.need_ctor(): + self.gen_ctor(outfile, args, namestr, indent) + if self.top_level(): + outfile.write( + '%sstatic constexpr const char *_reg_version = "%s";\n' + % (indent, schema['_reg_version']) + ) + outfile.write( + '%sstatic constexpr const char *_schema_hash = "%s";\n' + % (indent, schema['_schema_hash']) + ) + for el in sorted(self.children(), key=lambda a: a.name): + if el.disabled(): + continue + el = self.check_child_rewrite(el, args) + if el is None: + continue + typ = el.singleton_obj() + notclass = typ.is_field() or typ.top_level() + isglobal = el.name in args.global_types + if args.gen_decl != 'defn': + if hasattr(el, 'description') and el.description: + format_comment(outfile, indent, el.description) + if typ != el and hasattr(typ, 'description') and typ.description: + format_comment(outfile, indent, typ.description) + outfile.write(indent) + if args.checked_array and notclass and el.count != (1,): + for idx in el.count: + outfile.write("checked_array<%d, " % idx) + eltypenamestr = el.name + if isglobal: + eltypenamestr = "::" + eltypenamestr + else: + eltypenamestr = "_" + eltypenamestr + if el.name == self.name: + # FIXME -- maybe should elide the element if it is the only one? + # sort of like singleton_obj but deal with arrays too + eltypenamestr = eltypenamestr + "_el" + typ.gen_type(outfile, args, schema, classname, eltypenamestr, indent) + if args.gen_decl != 'defn': + field_name = el.name + if field_name in args.cpp_reserved: + field_name += '_' + if args.checked_array and el.count != (1,): + if not notclass: + if not isglobal: + outfile.write(";\n%s" % indent) + for idx in el.count: + outfile.write("checked_array<%d, " % idx) + outfile.write(eltypenamestr) + if el.count != (1,): + for idx in el.count: + outfile.write(">") + outfile.write(" %s;\n" % field_name) + else: + outfile.write(" %s" % field_name) + if el.count != (1,): + for idx in el.count: + outfile.write("[%d]" % idx) + outfile.write(";\n") + if args.gen_decl != 'defn' and hasattr(self, 'alias_arrays'): + for alias in self.alias_arrays: + outfile.write( + "%salias_array<%d, %s> %s;\n" % (indent, alias[2], alias[1], alias[0]) + ) + if args.delete_copy and args.gen_decl != 'defn': + if not args.enable_disable and not self.need_ctor(): + outfile.write("%s%s() = default;\n" % (indent, namestr)) + outfile.write("%s%s(const %s &) = delete;\n" % (indent, namestr, namestr)) + outfile.write("%s%s(%s &&) = delete;\n" % (indent, namestr, namestr)) + if args.emit_json: + self.gen_emit_method(outfile, args, schema, classname, name, nameargs, indent) + if args.emit_binary: + self.gen_uint_conversion(outfile, args, classname, indent) + self.gen_emit_binary_method(outfile, args, classname, indent) + if args.input_binary: + self.gen_input_binary_method(outfile, args, classname, indent) + if args.binary_offset: + self.gen_binary_offset_method(outfile, args, classname, indent) + if args.emit_fieldname: + self.gen_fieldname_method(outfile, args, classname, indent) + if args.unpack_json: + self.gen_unpack_method(outfile, args, classname, indent) + if args.dump_unread: + self.gen_dump_unread_method(outfile, args, classname, indent) + if args.enable_disable: + self.gen_modified_method(outfile, args, classname, indent) + self.gen_set_modified_method(outfile, args, classname, indent) + self.gen_disable_method(outfile, args, classname, indent) + self.gen_disable_if_reset_value_method(outfile, args, classname, indent) + self.gen_disable_if_unmodified_method(outfile, args, classname, indent) + self.gen_disable_if_zero_method(outfile, args, classname, indent) + self.gen_enable_method(outfile, args, classname, indent) + if args.gen_decl != 'defn': + indent = indent[2:] + outfile.write("%s}" % indent) + + def gen_global_types(self, outfile, args, schema): + for a in sorted(self.children(), key=lambda a: a.name): + if a.disabled(): + continue + if not a.is_field() and not a.top_level(): + a.gen_global_types(outfile, args, schema) + if a.name in args.global_types: + if a.name in args.global_types_generated: + if args.global_types_generated[a.name] != a: + raise CsrException("Inconsistent definition of type " + a.name) + else: + args.global_types_generated[a.name] = a + a.gen_type(outfile, args, schema, "", a.name, "") + outfile.write(";\n") + + +class address_map(csr_composite_object): + """ + A Semifore addressmap. Contains registers and instances of other + addressmaps. + + In practice, the count of an address_map is always (1,) and it is the + instances of the addressmap that are actually arrays. + + @attr templatization_behavior + Controls how this address map gets used during template generation: + - If None, it is expanded as a dictionary wherever it appears in the + register hierarchy + - If "top_level", it is split off into its own JSON file and + replaced wherever it appears in the register hierarchy with a + string reference to that JSON file + - If "disabled", it is replaced wherever it appears in the register + hierarchy with a 0 (indicating "don't write"). No JSON file for + the address map is generated + @attr objs + An ordered list of objects contained in the addressmap - either regs, + groups, or address_map_instances + + @attr parent + A string indicating which parent of the chip hierarchy the addressmap + falls under ("memories" or "regs) + """ + + def __init__(self, name, count, parent): + csr_composite_object.__init__(self, name, count) + + self.templatization_behavior = None + self.objs = [] + self.parent = parent + + def min_width(self, round_to_power_of_2=True): + """ + Some addressmap arrays have an explicit "stride" specifying how much + address space each element takes up. When they don't, we calculate the + stride by summing up the widths of all contained objects and rounding + to the next highest power of 2. + + Whether an addressmap has a stride or not is up to the programmer of + the original Semifore CSR and, as far as Walle is conserned, arbitrary. + """ + width = 0 + for obj in self.objs: + obj_end = obj.offset * 8 + if type(obj) is reg: + obj_end += obj.width * product(obj.count) + elif type(obj) is address_map_instance or type(obj) is group: + try: + multiplier = product(obj.count) * obj.stride + except: + multiplier = 1 + obj_end += obj.min_width() * multiplier + else: + raise CsrException("Unrecognized object in address map ('" + type(obj) + "')") + if obj_end > width: + width = obj_end + + width //= 8 + + if round_to_power_of_2: + # Round stride up to the next largest power of 2 + round_width = 1 + while round_width < width: + round_width *= 2 + return round_width + else: + return width + + def generate_binary(self, data, cache, path): + if data == 0: + # No-op + return {} + elif isinstance(data, basestring): + # Refernce to template + + type_name = self.parent + "." + self.name + + if data not in cache.templates: + raise CsrException("Could not find template with name '" + data + "'") + + if cache.get_type(data) != type_name: + raise CsrException( + "Expected type of instantiated object '" + + data + + "' to be '" + + type_name + + "', found '" + + cache.get_type(data) + + "'" + ) + + return cache.get_data(data, path) + elif type(data) is dict: + # Actual data + reg_values = [] + for obj in self.objs: + if obj.name not in data: + raise CsrException("Could not find key '" + obj.name + "'") + if data[obj.name] != 0 and not isinstance(data[obj.name], basestring): + if obj.count == (1,): + if type(data[obj.name]) is not dict: + raise CsrException("Expected dictionary at key '" + obj.name + "'") + else: + # TODO: check all dimensions are the right size, maybe if a 'strict errors' flag is used + if type(data[obj.name]) is not list or len(data[obj.name]) != obj.count[0]: + array_size = "x".join(map(str, obj.count)) + raise CsrException( + "Expected " + + array_size + + " element array of dictionaries at key '" + + obj.name + + "'" + ) + + if type(obj) is reg: + if obj.count == (1,): + chip_obj = obj.generate_binary(data[obj.name], cache, path) + reset_value = obj.get_reset_value() + + if chip_obj is not None and reset_value is not None: + # Check if a non-zero value to put into the binary file is the same as the reset (initial) value. + # (We will continue to write zero values, for caution's sake. + # Would block writes work if leave out subsection?) + # If the non-zero value is the reset value, do not output it in the binary file. + # We are having too many problems where the driver is clearing things (like interrupt enables) + # before the binary file is loaded, and then the binary file re-enables them. + if reset_value == chip_obj.orig_value: + # print "Skipping setting %s, because it has the same value (%s) as its reset value of %s." % (obj.name, str(chip_obj), hex(reset_value)) + continue + + if chip_obj != None: + reg_values.append(chip_obj) + elif data[obj.name] != 0: + # TODO: we should be able to DMA anything into the chip, + # so this count > 4 heuristic should work well + # + # but right now the model doesn't implement chip- + # side register addresses, so we have to force + # direct register writes for the regs part of + # the schema and DMA for the mem part. lame. + # use this count heuristic once the model is fixed. + # + # if product(obj.count) > 4: + root_parent = obj.parent + while type(root_parent) is not str: + root_parent = root_parent.parent + # Force the mapram_config register programming + # on the DMA block write path to avoid a race during + # chip init where the map ram is being written and the + # ECC mode is also being configured. Since the map ram + # is written with block writes, forcing this register + # configuration on the same path removes the race. + + registers_to_write_with_dma = [ + "mapram_config", + "imem_dark_subword16", + "imem_dark_subword32", + "imem_dark_subword8", + "imem_mocha_subword16", + "imem_mocha_subword32", + "imem_mocha_subword8", + "imem_subword16", + "imem_subword32", + "imem_subword8", + "galois_field_matrix", + ] + if product(obj.count) > 4 and ( + root_parent == "memories" or obj.name in registers_to_write_with_dma + ): + mem = chip.dma_block( + obj.offset, + obj.width, + src_key=obj.name, + is_reg=root_parent == "regs", + ) + + def mem_loop(sub_data, context): + path[-1].path.append(context) + for idx in range(0, obj.count[-1]): + context[-1] = idx + obj.generate_binary(sub_data[idx], cache, path, mem) + path[-1].path.pop() + + nd_array_loop(obj.count, data[obj.name], mem_loop) + reg_values.append(mem) + else: + offset = [0] + width = obj.width // 8 # TODO: warn if not power of (8 or 32 or w/e)? + + def reg_loop(sub_data, context): + path[-1].path.append(context) + for idx in range(0, obj.count[-1]): + context[-1] = idx + chip_obj = obj.generate_binary(sub_data[idx], cache, path) + if chip_obj != None: + chip_obj.add_offset(offset[0]) + reg_values.append(chip_obj) + offset[0] += width + path[-1].path.pop() + + nd_array_loop(obj.count, data[obj.name], reg_loop) + + elif type(obj) is address_map_instance or type(obj) is group: + if obj.count == (1,): + sub_chip_objs = obj.generate_binary(data[obj.name], cache, path) + for sub_chip_obj in sub_chip_objs: + sub_chip_obj.add_offset(obj.offset) + reg_values.append(sub_chip_obj) + elif data[obj.name] != 0: + offset = [0] + + def addr_map_loop(sub_data, context): + path[-1].path.append(context) + for idx in range(0, obj.count[-1]): + context[-1] = idx + sub_chip_objs = obj.generate_binary(sub_data[idx], cache, path) + for sub_chip_obj in sub_chip_objs: + sub_chip_obj.add_offset(obj.offset + offset[0]) + reg_values.append(sub_chip_obj) + offset[0] += obj.stride + path[-1].path.pop() + + nd_array_loop(obj.count, data[obj.name], addr_map_loop) + else: + raise CsrException("Unrecognized object in address map ('" + type(obj) + "')") + + return reg_values + else: + raise CsrException( + "Expected dictionary at addressmap node '%s' but found value of type %s" + % (self.name, type(data).__name__) + ) + + def generate_template(self, inject_size): + self_dict = {} + if self.templatization_behavior == "disabled": + return None + if self.templatization_behavior == "top_level": + self_dict["_type"] = self.parent + "." + self.name + self_dict["_name"] = "template(" + self_dict["_type"] + ")" + for obj in self.objs: + self_dict[obj.name] = obj.generate_template(inject_size) + return self.replicate(self_dict) + + def children(self): + return self.objs + + def is_singleton(self): + return len(self.objs) == 1 and self.objs[0].count == (1,) + + def disabled(self): + return self.templatization_behavior == "disabled" + + def top_level(self): + return self.templatization_behavior == "top_level" + + def generate_cpp(self, outfile, args, schema): + try: + name = args.name + except AttributeError: + name = self.name + self.gen_type(outfile, args, schema, '', name, '') + all_used = True + for obj in args.rewrite: + if obj not in args.rewrite_used: + sys.stderr.write("Rewrite object %s not found\n" % obj) + all_used = False + else: + for child in args.rewrite[obj]: + if child not in args.rewrite_used[obj]: + sys.stderr.write("Rewrite child %s.%s not found\n" % (obj, child)) + all_used = False + if not all_used: + raise CsrException("Unused rewrite clauses in templates") + + def print_as_text(self, indent): + if self.templatization_behavior != "disabled": + print("%saddress_map %s%s:" % (indent, self.name, str(self.count))) + for ch in self.objs: + ch.print_as_text(indent + " ") + + +class address_map_instance(csr_composite_object): + """ + TODO: docstring + @attr offset + offset from the start of the containing address_map (instance) + @attr map + address_map object that is an instance of + @attr stride + If @count is not (1,), this is the offset from each instance in the array to + the next. If @count is (1,) this should be null + """ + + def __init__(self, name, count, offset, addrmap, stride): + csr_composite_object.__init__(self, name, count) + + self.offset = offset + self.map = addrmap + self.stride = stride + + def min_width(self): + return self.map.min_width() + + def generate_binary(self, data, cache, path): + path[-1].path.append(self.name) + binary = self.map.generate_binary(data, cache, path) + path[-1].path.pop() + return binary + + def generate_template(self, inject_size): + if self.map.templatization_behavior == "top_level": + return self.replicate(self.map.name + "_object") + elif self.map.templatization_behavior == "disabled": + return self.replicate(0) + else: + return self.replicate(self.map.generate_template(inject_size)) + + def children(self): + return self.map.objs + + def is_singleton(self): + return len(self.map.objs) == 1 and self.map.objs[0].count == (1,) + + def disabled(self): + return self.map.templatization_behavior == "disabled" + + def top_level(self): + return self.map.templatization_behavior == "top_level" + + def address_stride(self): + return self.stride + + def type_name(self, args, parent, name): + self.map.type_name(args, parent, name) + + def gen_type(self, outfile, args, schema, parent, name, indent): + if self.map.templatization_behavior == "disabled": + raise CsrException("disabled address_map hit in gen_type") + elif self.map.templatization_behavior == "top_level": + if args.gen_decl != 'defn': + tname = self.map.object_name + if tname is None: + tname = self.map.parent + '.' + self.map.name + outfile.write("register_reference" % self.canon_name(tname)[0]) + else: + self.map.gen_type(outfile, args, schema, parent, name, indent) + + def print_as_text(self, indent): + print( + "%saddress_map_instance %s%s: offset=0x%x%s" + % ( + indent, + self.name, + str(self.count), + self.offset, + " stride=0x%x" % self.stride if self.stride else "", + ) + ) + if self.map.templatization_behavior == "top_level": + print( + "%s address_map %s%s: (top level %s)" + % (indent, self.name, str(self.count), self.map.name) + ) + else: + self.map.print_as_text(indent + " ") + + +class group(address_map): + """ + TODO: docstring + @attr stride + If @count is not (1,) this the offset from each element to the next + If @count is (1,) this should be null + @attr offset + offset from the start of the containing addres_map + """ + + def __init__(self, name, count, offset, parent, stride): + address_map.__init__(self, name, count, parent) + self.stride = stride + self.offset = offset + + def generate_binary(self, data, cache, path): + path[-1].path.append(self.name) + binary = address_map.generate_binary(self, data, cache, path) + path[-1].path.pop() + return binary + + def min_width(self): + """ + A group array's stride, unlike addressmap instance arrays, is not + rounded up to a power of two if it has to be calculated. + """ + if self.stride != None: + return self.stride + else: + return address_map.min_width(self, round_to_power_of_2=False) + + def address_stride(self): + return self.stride + + def print_as_text(self, indent): + print( + "%sgroup %s%s: offset=0x%x%s" + % ( + indent, + self.name, + str(self.count), + self.offset, + " stride=0x%x" % self.stride if self.stride else "", + ) + ) + for ch in self.objs: + ch.print_as_text(indent + " ") + + +class reg(csr_composite_object): + """ + TODO: docstring + @attr parent + Containing address_map object + @attr offset + Offset from the start of the containing address_map_instance + @attr width + width in bits + @attr fields + vector of fields in the register + """ + + def __init__(self, name, count, offset, width, parent): + csr_composite_object.__init__(self, name, count) + + self.parent = parent + self.offset = offset + self.width = width + self.fields = [] + + def __str__(self): + f = "(" + sep = "" + for x in self.fields: + f += sep + str(x) + sep = ", " + f += ")" + return "reg %s fields:%s" % (self.name, f) + + def get_reset_value(self): + rv = 0 + for f in self.fields: + rv |= f.default[0] << f.lsb + return rv + + def generate_binary(self, data, cache, path, mem=None): + if data == 0: + # No-op + return None + elif isinstance(data, basestring): + # Refernce to template + + path[-1].path.append(self.name) + + type_name = self.parent + "." + self.name + + if data not in cache.templates: + raise CsrException("Could not find template with name '" + data + "'") + + if cache.get_type(data) != type_name: + raise CsrException( + "Expected type of instantiated object '" + + data + + "' to be '" + + type_name + + "', found '" + + cache.get_type(data) + + "'" + ) + + cached_data = cache.get_data(data, path) + path[-1].path.pop() + + if mem: + mem.add_word(cached_data) + else: + return cached_data + elif type(data) is dict: + path[-1].path.append(self.name) + + reg_value = [0] + # TODO: put field names in path histories + for field in self.fields: + + if field.name not in data: + raise CsrException("Could not find key '" + field.name + "'") + + width = field.msb - field.lsb + 1 + if field.count == (1,): + value = data[field.name] + if type(value) is not int: + raise CsrException( + "Expected integer value for field '%s.%s' but found value of type %s" + % (self.name, field.name, type(value).__name__) + ) + elif value < 0: + raise CsrException( + "Value for field '%s.%s' is negative (%i)" + % (self.name, field.name, value) + ) + elif value <= pow(2, width): + reg_value[0] |= value << field.lsb + else: + raise CsrException( + "Width of field '%s.%s' (%i bits) not large enough to hold value (%i)" + % (self.name, field.name, width, value) + ) + else: + offset = [0] + + def field_loop(sub_data, context): + path[-1].path.append(context) + for idx in range(0, field.count[-1]): + context[-1] = idx + value = sub_data[idx] + if type(value) is not int: + raise CsrException( + "Expected integer value for field '%s.%s%s' but found value of type %s" + % ( + self.name, + field.name, + array_str(context), + type(value).__name__, + ) + ) + elif value < 0: + raise CsrException( + "Value for field '%s.%s%s' is negative (%i)" + % (self.name, field.name, array_str(context), value) + ) + elif value <= pow(2, width): + reg_value[0] |= value << field.lsb + offset[0] + offset[0] += width + else: + raise CsrException( + "Width of field '%s.%s%s' (%i bits) not large enough to hold value (%i)" + % (self.name, field.name, array_str(context), width, value) + ) + path[-1].path.pop() + + # TODO: check all dimension sizes + if ( + type(data[field.name]) is not list + or len(data[field.name]) != field.count[0] + ): + array_size = "x".join(map(str, field.count)) + raise CsrException( + "Expected " + + array_size + + " element array of integers at key '" + + field.name + + "'" + ) + + nd_array_loop(field.count, data[field.name], field_loop) + + path[-1].path.pop() + + if mem: + mem.add_word(reg_value[0]) + elif self.width <= 32: + return chip.direct_reg(self.offset, reg_value[0], src_key=self.name) + else: + return chip.indirect_reg(self.offset, reg_value[0], self.width, src_key=self.name) + else: + raise CsrException( + "Expected dictionary at register node '%s' but found value of type %s" + % (self.name, type(data).__name__) + ) + + def generate_template(self, inject_size): + self_dict = {} + for field in self.fields: + self_dict[field.name] = field.generate_template(inject_size) + return self.replicate(self_dict) + + def children(self): + return self.fields + + def is_singleton(self): + return len(self.fields) == 1 and self.fields[0].count == (1,) + + def singleton_obj(self): + if self.is_singleton() and self.fields[0].name == self.name: + return self.fields[0] + return self + + def address_stride(self): + return self.width // 8 + + def disabled(self): + return False + + def top_level(self): + return False + + def gen_word_expressions(self, args, prefix): + """ + generate expressions to calculate the value of each word of the register + """ + + class context: + shift = 0 + words = [] + + context.words = [None] * ((self.width + 31) // 32) + for a in self.fields: + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + if prefix: + if self.name == a.name and len(self.fields) == 1: + field_name = prefix + else: + field_name = prefix + "." + field_name + context.shift = a.lsb + + def emit_field_slice(field, word, shift): + if context.words[word] is None: + context.words[word] = '' + else: + context.words[word] += " + " + if shift != 0: + context.words[word] += "(" + context.words[word] += field + if shift > 0: + context.words[word] += " << %d)" % shift + elif shift < 0: + context.words[word] += " >> %d)" % -shift + + def emit_ubits_field(index_list): + word = context.shift // 32 + shift = context.shift % 32 + name = field_name + array_str(index_list) + emit_field_slice(name, word, shift) + if shift + a.msb - a.lsb >= 32: + emit_field_slice(name, word + 1, shift - 32) + if shift + a.msb - a.lsb >= 64: + emit_field_slice(name, word + 2, shift - 64) + context.shift = context.shift + a.msb - a.lsb + 1 + + def emit_widereg_field(index_list): + word = context.shift // 32 + shift = context.shift % 32 + name = field_name + array_str(index_list) + ".value.getrange(" + emit_field_slice(name + "0, %d)" % (32 - shift), word, shift) + shift = 32 - shift + while shift < a.msb - a.lsb + 1: + word += 1 + emit_field_slice(name + "%d, 32)" % shift, word, 0) + shift += 32 + + if a.count != (1,): + if a.msb - a.lsb + 1 > 64: + count_array_loop(a.count, emit_widereg_field) + else: + count_array_loop(a.count, emit_ubits_field) + else: + if a.msb - a.lsb + 1 > 64: + emit_widereg_field(None) + else: + emit_ubits_field(None) + return context.words + + def gen_uint_conversion(self, outfile, args, classname, indent): + if self.width > 32: + return + outfile.write(indent) + if self.gen_method_declarator( + outfile, args, "", classname, "operator uint32_t", [], "const" + ): + return + outfile.write("%s return " % indent) + outfile.write("%s;\n" % self.gen_word_expressions(args, None)[0]) + outfile.write("%s}\n" % indent) + + def gen_emit_binary_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator( + outfile, + args, + "void", + classname, + "emit_binary", + ["std::ostream &out", "uint64_t a"], + "const", + ): + return + indent += " " + if self.count != (1,): + pass + indirect = (self.parent.parent == "memories") or (self.name in args.write_dma) + if not indirect: + outfile.write("%sif (!disabled_) {\n" % indent) + indent += " " + pairs = enumerate(self.gen_word_expressions(args, None)) + if not indirect and args.reverse_write: + # DANGER -- certain registers must be written in reverse order (higher + # address then lower), so we reverse the order of register writes here. + # block writes must be in order (lowest to highest) as they are a block + pairs = reversed(list(pairs)) + for idx, val in pairs: + if val is None: + val = '0' + outfile.write("%sout << " % indent) + if not indirect: + outfile.write("binout::tag('R') << binout::byte4(a + %d)\n" % (idx * 4)) + outfile.write("%s << " % indent) + outfile.write("binout::byte4(%s);\n" % val) + if not indirect: + indent = indent[2:] + outfile.write("%s}\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def gen_input_binary_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator( + outfile, + args, + "void", + classname, + "input_binary", + ["uint64_t a", "char t", "uint32_t *d", "size_t l"], + "", + ): + return + indent += ' ' + words = (self.width + 31) // 32 + indirect = self.parent.parent == "memories" or (self.name in args.write_dma) or words == 1 + # words == 1 is not really indirect, but we don't need to figure out which word is + # being written, so we can use the simpler code + zero_default = True + for a in self.fields: + if isinstance(a.default, tuple): + if reduce(ior, a.default, 0) != 0: + zero_default = False + break + elif a.default is None or a.default != 0: + zero_default = False + break + if indirect: + outfile.write( + '%sBUG_CHECK(l == %d, "expecting %d words, got %%zd in %s", l);\n' + % (indent, words, words, self.name) + ) + if zero_default: + outfile.write('%sif ((d[0]' % indent) + for i in range(1, words): + outfile.write('|d[%d]' % i) + outfile.write(') == 0) return;\n') + else: + outfile.write( + '%sBUG_CHECK(t == \'R\' && l == 1, "expecting direct in %s");\n' + % (indent, self.name) + ) + if zero_default: + outfile.write('%sif (d[0] == 0) return;\n' % indent) + outfile.write('%sa /= 4;\n' % indent) + for a in self.fields: + field_name = a.name + if field_name in args.cpp_reserved: + field_name += '_' + lsb = a.lsb + size = a.msb - a.lsb + 1 + + def input_ubits_field(index_list): + nonlocal lsb + outfile.write(indent) + if indirect: + word = lsb // 32 + aop = '=' + else: + outfile.write('if (a == %d) ' % (lsb // 32)) + word = 0 + aop = '|=' + outfile.write('%s%s %s ' % (field_name, array_str(index_list), aop)) + if lsb % 32 + size < 32: + outfile.write('(d[%d] >> %d) & 0x%x;\n' % (word, lsb % 32, (1 << size) - 1)) + elif lsb % 32 + size == 32: + outfile.write('d[%d] >> %d;\n' % (word, lsb % 32)) + else: + outfile.write('(d[%d] >> %d)' % (word, lsb % 32)) + if indirect: + outfile.write(' | ') + else: + outfile.write(';\n') + msb = lsb + size - 1 + for i in range(lsb // 32 + 1, msb // 32): + if indirect: + outfile.write('((uint64_t)d[%d] << %d) | ' % (i, i * 32 - lsb)) + else: + outfile.write( + '%sif (a == %d) %s%s |= (uint64_t)d[0] << %d;\n' + % (indent, i, field_name, array_str(index_list), i * 32 - lsb) + ) + if indirect: + outfile.write( + '(((uint64_t)d[%d] & 0x%x) << %d);\n' + % (msb // 32, (1 << (msb % 32 + 1)) - 1, msb // 32 * 32 - lsb) + ) + else: + outfile.write( + '%sif (a == %d) %s%s |= ((uint64_t)d[0] & 0x%x) << %d;\n' + % ( + indent, + msb // 32, + field_name, + array_str(index_list), + (1 << (msb % 32 + 1)) - 1, + msb // 32 * 32 - lsb, + ) + ) + lsb += size + + def input_widereg_field(index_list): + nonlocal lsb + outfile.write('%sBUG("widereg input not implemented");\n' % indent) + lsb += size + + if a.count != (1,): + if a.msb - a.lsb + 1 > 64: + count_array_loop(a.count, input_widereg_field) + else: + count_array_loop(a.count, input_ubits_field) + else: + if a.msb - a.lsb + 1 > 64: + input_widereg_field(None) + else: + input_ubits_field(None) + + indent = indent[2:] + outfile.write('%s}\n' % indent) + + def gen_binary_offset_method(self, outfile, args, classname, indent): + outfile.write(indent) + if self.gen_method_declarator( + outfile, + args, + "uint64_t", + classname, + "binary_offset", + ["const void *addr", ("int *bit_offset", "0")], + "const", + ): + return + indent += " " + outfile.write("%sif (bit_offset) {" % indent) + indent += " " + outfile.write("%s/* TDB */\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + outfile.write("%sreturn 0;\n" % indent) + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def print_as_text(self, indent): + print( + "%sreg %s%s: offset=0x%x width=%d" + % (indent, self.name, str(self.count), self.offset, self.width) + ) + for ch in self.fields: + ch.print_as_text(indent + " ") + + +class scanset_reg(reg): + """ + A register that needs to be written multiple times (same address) to hold an array + @attr parent + Containing address_map object + @attr offset + Offset from the start of the containing address_map_instance + @attr sel_offset + offset from the start of the containing address_map_instance for the selector reg + @attr width + width in bits + @attr fields + vector of fields in the register + """ + + def __init__(self, name, count, offset, width, parent, fields): + reg.__init__(self, name, count, offset, width, parent) + if isinstance(fields, list): + self.fields = fields + else: + self.fields = [fields] + + def __str__(self): + f = "(" + sep = "" + for x in self.fields: + f += sep + str(x) + sep = ", " + f += ")" + return "scanset %s%s fields:%s" % (self.name, str(self.count), f) + + def output_binary(self, outfile, args, indent, address_unit, width_unit): + # import pdb; pdb.set_trace() + name = self.name + if name in args.cpp_reserved: + name += '_' + if self.count == (1,): + raise CsrException("invalid count in scanset_reg") + if args.enable_disable: + outfile.write("%sif (!%s.disabled()) {\n" % (indent, name)) + indent += ' ' + if not hasattr(self, 'sel_offset'): + if args.enable_disable: + outfile.write("%sif (!%s.disabled()) {\n" % (indent, name)) + indent += ' ' + outfile.write( + "%sout << binout::tag('S') << binout::byte8(0)" % indent + + " << binout::byte4(0)\n%s " % indent + + " << binout::byte8(a + 0x%x)" % (self.offset // address_unit) + + " << binout::byte4(32) << binout::byte4(%d);\n" + % (product(self.count) * self.width // width_unit) + ) + for idx_num, idx in enumerate(self.count): + outfile.write( + '%sfor (int j%d = 0; j%d < %d; j%d++) { \n' + % (indent, idx_num, idx_num, idx, idx_num) + ) + name = name + "[j%d]" % idx_num + if hasattr(self, 'sel_offset') and idx_num == 0: + if args.enable_disable: + outfile.write("%sif (!%s.disabled()) {\n" % (indent, name)) + indent += ' ' + outfile.write( + "%sout << binout::tag('S') << binout::byte8" % indent + + "(a + 0x%x)" % (self.sel_offset // address_unit) + + " << binout::byte4(j0)\n%s " % indent + + " << binout::byte8(a + 0x%x)" % (self.offset // address_unit) + + " << binout::byte4(32) << binout::byte4(%d);\n" + % (product(self.count[1:]) * self.width // width_unit) + ) + indent += ' ' + pairs = enumerate(self.gen_word_expressions(args, name)) + for idx, val in pairs: + if val is None: + val = '0' + outfile.write("%sout << binout::byte4(%s);\n" % (indent, val)) + for i in range(0, len(self.count) + (2 if args.enable_disable else 0)): + indent = indent[2:] + outfile.write("%s}\n" % indent) + + def input_binary(self, outfile, args, indent, address_unit, width_unit): + raise CsrException("scanset_reg.input_binary not implemented") + + +class field(csr_object): + """ + TODO: docstring + @attr default + default (reset-init) value + @attr parent + containing register object + @attr msb, lsb + Range of bits in containing register for this field. If @count is not (1,), this + is just the first element; the second will be at lsb = msb+1 etc + """ + + def __init__(self, name, count, msb, lsb, default, parent): + csr_object.__init__(self, name, count) + + self.default = default + self.parent = parent + self.msb = msb + self.lsb = lsb + + def __str__(self): + return "%s[%d:%d]" % (self.name, self.msb, self.lsb) + + def generate_template(self, inject_size): + if inject_size: + return self.replicate(self.msb - self.lsb + 1) + else: + if self.count == (1,): + return self.default[0] + else: + return self.default + + def is_field(self): + return True + + def disabled(self): + return False + + def top_level(self): + return False + + def type_name(self, args, parent, name): + size = self.msb - self.lsb + 1 + if size > 64: + rv = "widereg<%d>" % size + else: + rv = "ubits<%d>" % size + if self.count != (1,): + if args.checked_array: + for idx in self.count: + rv = "checked_array<%d, %s>" % (idx, rv) + else: + for idx in self.count: + rv = "%s[%d]" % (rv, idx) + return rv + + def gen_type(self, outfile, args, schema, parent, name, indent): + size = self.msb - self.lsb + 1 + if args.gen_decl != 'defn': + if size > 64: + outfile.write("widereg<%d>" % size) + else: + outfile.write("ubits<%d>" % size) + + def print_as_text(self, indent): + print( + "%sfield %s%s: [%d:%d]%s" + % ( + indent, + self.name, + str(self.count), + self.msb, + self.lsb, + " default=" + str(self.default) if self.default else "", + ) + ) + + +######################################################################## +## Utility functions + + +def parse_resets(reset_str): + """ + Turn a reset value from a Semifore CSV string into a tuple of ints + + Semifore CSV formats most reset values as hex integers of the from 0x___ + Arrays of fields, however, result in comma-separated lists: + [0x__, 0x__, ...] + + If the array is 1D this function will still output the size as a 1-element + tuple, just for consistency of iterability. + """ + reset_strs = reset_str.replace("[", "").replace("]", "").split(",") + resets = [int(x, 0) for x in reset_strs] + return tuple(resets) + + +def parse_array_size(size_str): + """ + Turn an array size from a Semifore CSV string into a tuple of ints + + Semifore CSV formats the size of an array as an int in square brackets. + Multidimensional arrays are just a lot of these concatenated together: + [i] + [i][j][k] + ... + + If the array is 1D this function will still output the size as a 1-element + tuple, just for consistency of iterability. + """ + size_strs = size_str.replace("]", "").split("[")[1:] + sizes = list(map(int, size_strs)) + if len(sizes) > 0: + return tuple(sizes) + else: + return (1,) + + +def parse_csrcompiler_csv(filename, section_name): + """ + Given a Semifore CSV file, parse it into a bunch of csr_object instances. + Since the chip hierarchy is contained across multiple CSV files, each one + has a unique "section name". + + @param filename The filename of the CSV file to parse + @param section_name A string meaningfully describing the contents of + the CSV (eg, "memories" and "regs") + @return A list of all addressmaps parsed out of the file. + """ + + csv_field_types = { + "configuration", + "userdefined configuration", + "constant", + "counter", + "status", + "hierarchicalInterrupt", + "interrupt", + } + + csv_addressmap_types = { + "addressmap", + "userdefined addressmap", + } + + csv_register_types = { + "register", + "wide register", + "userdefined register", + "userdefined wide register", + } + + csv_group_types = { + "group", + "userdefined group", + } + + addr_maps = {} + active_addr_map = None + active_group = [] + active_reg = None + active_reg_default = 0 + + with open(filename, "rt", encoding='utf-8', errors='ignore') as csv_file: + csv_reader = csv.DictReader(csv_file) + row_num = 0 + for row in csv_reader: + array_size = parse_array_size(row["Array"]) + + active_object = None + if len(active_group) > 0: + active_container = active_group[-1] + else: + active_container = active_addr_map + + if row["Type"] in csv_addressmap_types: + addr_maps[row["Identifier"]] = address_map( + row["Identifier"], array_size, section_name + ) + active_addr_map = addr_maps[row["Identifier"]] + elif row["Type"] in csv_register_types: + reg_width = int(row["Register Size"].replace(" bits", ""), 0) + active_container.objs.append( + reg( + row["Identifier"], + array_size, + int(row["Offset"], 0), + reg_width, + active_addr_map, + ) + ) + active_reg = active_container.objs[-1] + active_object = active_reg + if row["Reset Value"] == "": + active_reg_default = 0 + else: + active_reg_default = int(row["Reset Value"], 0) + elif row["Type"] in csv_field_types: + if len(array_size) > 1: + raise CsrException( + "Multi-dimensional field arrays not currently supported (in CSV file '" + + filename + + "' line " + + str(row_num) + + ")" + ) + range_tokens = row["Position"].replace("[", "").replace("]", "").split(":") + msb = int(range_tokens[0]) + if len(range_tokens) == 1: + lsb = msb + else: + lsb = int(range_tokens[1]) + if row["Reset Value"] == "": + default = [] + elem_width = msb - lsb + 1 + elem_mask = 2**elem_width - 1 + for elem_idx in range(array_size[0]): + default_offset = lsb + elem_width * elem_idx + default_val = (active_reg_default >> default_offset) & elem_mask + default.append(default_val) + default = tuple(default) + else: + default = parse_resets(row["Reset Value"]) + if array_size != (1,): + if len(default) == 1: + default = default * array_size[0] + elif len(default) != array_size[0]: + raise CsrException( + "Field reset value list is not the same length as the field array itself (in CSV file '" + + filename + + "' line " + + str(row_num) + + ")" + ) + + active_reg.fields.append( + field(row["Identifier"], array_size, msb, lsb, default, active_reg) + ) + active_object = active_reg.fields[-1] + elif row["Type"] == "addressmap instance": + try: + stride = int(row["Stride"].replace(" bytes", ""), 0) + except: + stride = addr_maps[row["Type Name"]].min_width() + + active_container.objs.append( + address_map_instance( + row["Identifier"], + array_size, + int(row["Offset"], 0), + addr_maps[row["Type Name"]], + None if array_size == (1,) else stride, + ) + ) + elif row["Type"] in csv_group_types: + try: + stride = int(row["Stride"].replace(" bytes", ""), 0) + except: + stride = None + + active_container.objs.append( + group( + row["Identifier"], + array_size, + int(row["Offset"], 0), + section_name, + None if array_size == (1,) else stride, + ) + ) + active_group.append(active_container.objs[-1]) + active_object = active_group[-1] + elif row["Type"] == "endgroup": + popped_group = active_group.pop() + if popped_group.stride == None: + popped_group.stride = popped_group.min_width() + elif row["Type"] == "userdefined memory": + # ignore for now? + pass + elif row["Type"] == "reserved": + # ignore for now? + pass + elif row["Type"] == "unknown": + # ignore for now? + pass + else: + raise CsrException( + "Unrecognized type '" + + row["Type"] + + "' in CSV file '" + + filename + + "' line " + + str(row_num) + ) + + if "Description" in row and row["Description"] and active_object: + active_object.description = row["Description"] + + row_num += 1 + + return addr_maps + + +def build_schema(dir, walle_version): + """ + Build a chip schema based on the top-level CSV files from Semifore + + The schema is a dictionary of dictionaries. The top-level keys are the + "sections" of the chip's interface and metadata: + - memories: Memories and large register arrays. Things like the parser + TCAM are found here. Taken from the _mem Semifore + hierarchy, in byte-granularity chip addresses + - regs: Registers, like statistics counters and MAU crossbars. + Taken from the Semifore hierarchy, in 32-bit PCIe + addresses + - _schema_hash: An MD5 hash of the CSV file contents used to generate + the rest of the schema + + The non-metadata entries contain a dictionary of all of that hierarchy's + addressmaps, mapping from addressmap name to addrses_map objects. + + @param dir A string pointing to the directory containing (a copy of) the + bfnregs repo subdir "modules/_reg" generated by Semifore + using csr_config.css + @return A new schema object + """ + new_schema = {} + schema_hash = 0 + hasher = hashlib.md5() + + version_file = os.path.join(dir, "..", "..", "VERSION") + csv_files = os.path.join(dir, "module", "csv") + if not os.path.isdir(csv_files): + csv_files = dir + + if not os.path.isfile(version_file) or not os.path.isdir(csv_files): + raise Exception( + "Directory '" + + os.path.abspath(dir) + + "' could not be opened, " + + "does not exist, or does not appear to be a valid bfnregs " + + "chip module." + ) + + for filename in os.listdir(csv_files): + if filename.endswith(".csv"): + key = os.path.splitext(filename)[0] + if key == "pipe_top_level": + key = "memories" + elif key == "tofino": + key = "regs" + elif key.endswith("_mem"): + key = "memories" + elif key.endswith("_reg"): + key = "regs" + filename = os.path.join(csv_files, filename) + + new_schema[key] = parse_csrcompiler_csv(filename, key) + + with open(filename, "rb") as csv_file: + hasher.update(csv_file.read()) + + if len(new_schema) == 0: + raise Exception("No csv files found under '" + os.path.abspath(csv_files) + "'") + + with open(version_file, "r") as version_file_handle: + reg_version = version_file_handle.read() + hasher.update(reg_version.encode('utf-8')) + + new_schema["_reg_version"] = reg_version + new_schema["_walle_version"] = walle_version + new_schema["_schema_hash"] = hasher.hexdigest() + + return new_schema + + +# Unit tests +if __name__ == "__main__": + y = reg("bar", (1,), 0, 32, None) + y.fields.append(field("y1", (1,), 7, 0, y)) + y.fields.append(field("y2", (1,), 15, 8, y)) + y.fields.append(field("y3", (1,), 23, 16, y)) + y.fields.append(field("y4", (1,), 31, 24, y)) + data = { + "y1": 0x30, + "y2": 0x32, + "y3": 0x34, + "y4": 0x36, + } + z = y.generate_binary(data, None, [traversal_history("root")]) + if z.value != "0246": + print("ERROR: Expected 32-bit object to have string value '6420'") + print(" 32 bit value was " + z.value) + + y = reg("baz", (1,), 0, 40, None) + y.fields.append(field("y1", (1,), 7, 0, y)) + y.fields.append(field("y2", (1,), 15, 8, y)) + y.fields.append(field("y3", (1,), 23, 16, y)) + y.fields.append(field("y4", (1,), 31, 24, y)) + y.fields.append(field("y5", (1,), 39, 32, y)) + data = { + "y1": 0x30, + "y2": 0x32, + "y3": 0x34, + "y4": 0x36, + "y5": 0x38, + } + z = y.generate_binary(data, None, [traversal_history("root")]) + if z.value != "02468": + print("Expected 40-bit field to have string value '86420'") + print(" 40 bit value was " + z.value) diff --git a/backends/tofino/bf-asm/walle/walle.py b/backends/tofino/bf-asm/walle/walle.py new file mode 100755 index 00000000000..ec2f3ebef53 --- /dev/null +++ b/backends/tofino/bf-asm/walle/walle.py @@ -0,0 +1,830 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 +""" +Walle - JSON-to-binary cruncher tool +See README.md for usage instructions + +The code is organized into three main modules: + - walle: Command-line interface and glue code + - csr: Code dealing with compiler-facing JSON files and raw Semifore CSV + files + - chip: Code dealing with driver-facing binary config files + +The main program flow is, on a first run of Walle: + - The csr module is used to parse Semifore CSV files into the classes + that inherit from csr_object, each of which being a Python representation + of a Semifore object. + - These objects are pickled into a file called "chip.schema" so the raw CSV + does not have to be used again or distributed with the toolchain. + +Thereafter, Walle operates on JSON files that mirror the structure of the +Semifore hierarchy and assign integer values to register fields from the +schema. The details of this format are specified in the README.md fileself. + - To generate blank JSON, the csr module will recursively call the + generate_template() methods of the csr_objects in the schema. + - To crunch JSON into binary, the csr module will recursively call + generate_binary() methods of the csr_objects in the schema, passing + along the relevent tree of JSON data. These methods create a flat list of + objects that represent driver write operations, all of which are classes + from the chip module that inherit from chip_obj. + - The flat list of chip objects is looped over, calling each one's bytes() + method which returns the actual binary string to be passed to the driver. + These bytes are concatenated onto the binary file being output. + The address of this write may be manipulated, since Semifore addresses + are auto-generated and may need to be operated on before they appear as + the chip expects (for instance, chip memories are word-addressed while + Semifore addresses are byte-addressed) + +It's important to note that all addresses calculated within one JSON file are +relative, so to produce correct chip addresses the binary __must__ be +calculated starting at a top-level addressmap in the Semifore hierarchy. When +expanding a JSON config to contain data instead of references to other JSON +files, Walle will alter the addresses calculated for the _included_ JSON +to be relative to the addresses in the _including_ JSON. +""" +import argparse +import copy +import json +import os +import pickle +import subprocess +import sys + +import chip +import csr +import yaml + +__version__ = '0.4.13' + +######################################################################## +## Utility functions + + +class CsrUnpickler(pickle.Unpickler): + """ + This module is a hacky fix for a bug that sometimes shows up when using a + chip.schema file generated across different systems. + + Specifically: + On system A, walle is a globally installed package via setup.py and can + be accessed from the terminal with the command 'walle' + On system B, walle is NOT globally installed and instead accessed locally + by directly pointing to walle.py + + The chip.schema files generated on A and B will encode Python classes + from two different module paths. A will have classes from "walle.csr" while + B will have classes from "csr". If we move a schema from one system to + another, the module lookup will fail. + + We fix this by looking for references to classes from the 'csr' module and + directly retrieving them from the csr module that this top-level script has + already imported. + """ + + def find_class(self, module, name): + if module == "csr" or module[-4:] == ".csr": + return getattr(csr, name) + else: + return pickle.Unpickler.find_class(self, module, name) + + +def annotate_names(obj, threshold, path=""): + if type(obj) is list: + if threshold > 0 and len(obj) > threshold: + for idx, elem in enumerate(obj): + if type(elem) is dict: + elem["_absolute_name"] = path + "_" + str(idx) + annotate_names(elem, threshold, path) + else: + for elem in obj: + annotate_names(elem, threshold, path) + elif type(obj) is dict: + for key, elem in list(obj.items()): + annotate_names(elem, threshold, (path + "_" if len(path) > 0 else "") + key) + + +def print_schema_info(schema_file, schema): + hierarchies = [] + sys.stdout.write("file: " + schema_file + "\n") + for key in schema: + if key[0] == "_": + sys.stdout.write(key[1:] + ": " + str(schema[key]) + "\n") + else: + hierarchies.append(key) + sys.stdout.write("hierarchies: " + ", ".join(hierarchies) + "\n") + + +def parse_template_args(args, params): + """ + Extend argparse.Namespace with additional arguments for cpp generation + from templates.yaml that may not exist as command line arguments + FIXME -- should be a way to do this with ArgParse? + """ + + def bool_arg(args, attr, val): + if not type(val) is bool: + raise Exception("Attribute " + attr + " requires bool argument, got " + str(val)) + setattr(args, attr, val) + + def str_arg(args, attr, val): + if not type(val) is str: + raise Exception("Attribute " + attr + " requires string argument, got " + str(val)) + setattr(args, attr, val) + + def add_list_arg(args, attr, val): + getattr(args, attr).append(val) + + def add_set_arg(args, attr, val): + getattr(args, attr).add(val) + + def set_decl(args, attr, val): + args.gen_decl = 'decl' + + def set_defn(args, attr, val): + args.gen_decl = 'defn' + + def no_arg(args, attr, val): + pass + + options = { + 'alias_array': (True, bool_arg), + 'binary_offset': (False, bool_arg), + 'checked_array': (True, bool_arg), + 'decl': (None, set_decl), + 'delete_copy': (False, bool_arg), + 'defn': (None, set_defn), + 'dump_unread': (False, bool_arg), + 'emit_binary': (False, bool_arg), + 'emit_fieldname': (False, bool_arg), + 'emit_json': (False, bool_arg), + 'enable_disable': (False, bool_arg), + 'expand_disabled_vector': (False, bool_arg), + 'gen_decl': ('both', str_arg), + 'global_types': (set(), add_set_arg), + 'global': (None, lambda args, attr, val: add_set_arg(args, 'global_types', val)), + 'include': ([], add_list_arg), + 'input_binary': (False, bool_arg), + 'name': (None, str_arg), + 'namespace': (False, str_arg), + 'reverse_write': (False, bool_arg), + 'rewrite': ({}, no_arg), + 'rewrite_used': ({}, no_arg), + 'unpack_json': (False, bool_arg), + 'widereg': (False, bool_arg), + 'write_dma': (set(), add_set_arg), + } + + if not hasattr(args, 'cpp_reserved'): + args.cpp_reserved = set( + [ + "and", + "asm", + "auto", + "break", + "case", + "catch", + "char", + "class", + "const", + "continue", + "default", + "delete", + "do", + "double", + "else", + "enum", + "extern", + "float", + "for", + "friend", + "goto", + "if", + "inline", + "int", + "long", + "new", + "not", + "or", + "operator", + "private", + "protected", + "public", + "register", + "return", + "short", + "signed", + "sizeof", + "static", + "struct", + "switch", + "template", + "this", + "throw", + "try", + "typedef", + "union", + "unsigned", + "virtual", + "void", + "volatile", + "while", + "xor", + ] + ) + for opt in options: + if options[opt][0] is not None: + if hasattr(args, opt): + setattr(args, opt, copy.copy(getattr(args, opt))) + else: + setattr(args, opt, copy.copy(options[opt][0])) + for p in params: + s = p.split('=', 1) + if p in options: + options[p][1](args, p, True) + elif p[0] == '-' and p[1:] in options: + options[p[1:]][1](args, p[1:], False) + elif s[0] in options: + options[s[0]][1](args, s[0], s[1]) + elif p[:2] == "-I": + args.include.append(p[2:]) + else: + sys.stderr.write("Unknown parameter %s\n" % str(p)) + + if args.enable_disable: + args.cpp_reserved = args.cpp_reserved.copy() + args.cpp_reserved.update( + [ + "disable", + "disabled", + "disable_if_unmodified", + "disable_if_zero", + "enable", + "modified", + "set_modified", + ] + ) + + +def read_template_file(template_file, args, schema): + with open(template_file, "rb") as template_objects_file: + templatization_cfg = yaml.load(template_objects_file, Loader=yaml.SafeLoader) + top_level_objs = templatization_cfg["generate"] + disabled_objs = templatization_cfg["ignore"] + if "global" in templatization_cfg: + parse_template_args(args, templatization_cfg["global"]) + for section_name, section in list(schema.items()): + if section_name not in top_level_objs: + if section_name[0] != "_": + sys.stderr.write("no template cfg for " + section_name + ", ignoring\n") + continue + for obj in top_level_objs[section_name]: + section[obj].templatization_behavior = "top_level" + section[obj].object_name = None + if top_level_objs[section_name][obj] is None: + continue + for fname, params in list(top_level_objs[section_name][obj].items()): + for p in params: + if p[:5] == 'name=': + section[obj].object_name = p[5:] + break + for obj in disabled_objs[section_name]: + if section[obj].templatization_behavior != None: + raise Exception(obj + " cannot be both templatized and ignored") + section[obj].templatization_behavior = "disabled" + return top_level_objs + + +def generate_templates(args, schema): + if args.o == None: + args.o = "templates" + if not os.path.exists(args.o): + os.makedirs(args.o) + + top_level_objs = read_template_file(args.generate_templates, args, schema) + for section_name, section in list(schema.items()): + if section_name not in top_level_objs: + continue + for top_level_obj in top_level_objs[section_name]: + template = section[top_level_obj].generate_template(False) + sizes = section[top_level_obj].generate_template(True) + + if args.template_indices != None: + annotate_names(template, args.template_indices) + annotate_names(sizes, args.template_indices) + + # Copy in schema metadata + schema_metadata = [key for key in list(schema.keys()) if key[0] == "_"] + for metadata in schema_metadata: + template[metadata] = schema[metadata] + sizes[metadata] = schema[metadata] + template["_section"] = section_name + sizes["_section"] = section_name + + cfg_name = section_name + "." + top_level_obj + ".cfg.json" + with open(os.path.join(args.o, cfg_name), "wb") as outfile: + json.dump(template, outfile, indent=4, sort_keys=True) + size_name = section_name + "." + top_level_obj + ".size.json" + with open(os.path.join(args.o, size_name), "wb") as outfile: + json.dump(sizes, outfile, indent=4, sort_keys=True) + + +def arbitrary_ASCII_text_to_52digit_decimal_hash(input): + import hashlib + + # 52 characters left in 63 after the prefix "IDENTIFIER_", + # and math.log2(10**52) => 172.74026093414284, + # and math.log2(10**52)/8 => 21.592532616767855, + # so going to request 22 bytes of hash digest. + # the next line of commented-out code is _fantastic_ in/on Python 3.8.10, + # but fails in/on Python 3.5.2 [as present in/on the Jarvis image on my old BXDSW VM as of Sept. 7 2022 1:40am NY time] + ### hash_digest_as_bytes = hashlib.shake_256( bytes(input, "ASCII") ).digest(22) + hash_digest_as_bytes = hashlib.sha224(bytes(input, "ASCII")).digest() + # sha224 => 28 bytes of digest, the closest match that is >= 22 bytes and available in/on Python 3.5.2 + + hash_digest_as_int = int.from_bytes(hash_digest_as_bytes, "big") + return ("%052d" % hash_digest_as_int)[:52] + + +def arbitrary_text_to_valid_C_identifier( + input_iterable_of_characters, dry_run_to_get_hash_input=False +): + """Takes a single input, which must be an iterable of characters for correct behavior to be + promised. When given valid input, returns a string that is a valid C and C++ identifier, + regardless of what characters are used in the input. + + _Intentionally_ *not* considering [ASCII] underscores as OK to copy untranslated as-is, + since _both_ leading underscores _and_ 2-or-more underscores in a row are considered as + ''reserved'' by the ISO C++ standard [and probably also by the ISO C standard]. + + _Only_ ASCII alphanumerics are ''OK as is''. + + Quoting : + + "The C standard requires only that the first 63 be significant" + + In other words, the first 63 characters are definitely going to be "paid attention to", + and the rest may be handled as "comments". I think we are probably safe with shifting + our upper bound to 200 or 999 characters. + + Using a decimal hash to almost-guarantee uniqueness in the first 63 characters.""" + + if (not input_iterable_of_characters) or (len(input_iterable_of_characters) < 1): + raise ValueError("This function requires an input of positive length.") + + INCLUSIVE_MAX_OUTPUT_LENGTH = 255 # D. R. Y. + + temp_ASCIIonly_string = "" + for char in input_iterable_of_characters: + if ( + len(temp_ASCIIonly_string) > 999 + ): # there`s not much good in letting it go on for an arbitrarily-long time + break + if ( + '/' == char + ): # {part 1 of 3} of a kludge so that we can include path separators in the hash input + temp_ASCIIonly_string += char + if char.isalnum() and (ord(char) >= 32) and (ord(char) <= 126): + # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + # "char.isascii()" does _not_ always work + temp_ASCIIonly_string += char + elif not (temp_ASCIIonly_string.endswith('_') or temp_ASCIIonly_string.endswith('/')): + # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + # {part 2 of 3} of a kludge so that we can include path separators in the hash input + temp_ASCIIonly_string += '_' + + if dry_run_to_get_hash_input: + return temp_ASCIIonly_string + + result = "IDENTIFIER_" + arbitrary_ASCII_text_to_52digit_decimal_hash(temp_ASCIIonly_string) + if not temp_ASCIIonly_string.startswith('_'): + result += '_' + result += temp_ASCIIonly_string + + result = result[:INCLUSIVE_MAX_OUTPUT_LENGTH] + result = result.replace( + '/', '_' + ) # {part 3 of 3} of a kludge so that we can include path separators in the hash input + return result + + +def pathname_to_valid_C_identifier(file_pathname, dry_run_to_get_hash_input=False): + """This makes the assumption that the input is a string + [or at least "string-like object"] + with the data in a format along the lines of "/a/b/c/d/e/file" + """ + assert len(file_pathname) > 0 + + first_char_upper_case = lambda x: "" if (len(x) < 1) else x[0].upper() + x[1:].lower() + + # somewhat hackish... does anybody want to propose an "elegant" alternative for the next 3 lines? + if file_pathname.endswith(".cpp"): + file_pathname = file_pathname[:-4] + if file_pathname.endswith(".hpp"): + file_pathname = file_pathname[:-4] + if file_pathname.endswith(".h"): + file_pathname = file_pathname[:-2] + + split = file_pathname.split('/') # POSIXism warning re '/' + file = first_char_upper_case(split[-1]) + last_4_dirs_if_possible = [ + first_char_upper_case(x) for x in split[-5:-1] + ] # worst-case scenario, this is an empty list + + return arbitrary_text_to_valid_C_identifier( + '/'.join(last_4_dirs_if_possible + [file]), dry_run_to_get_hash_input + ) + + +def generate_cPlusPlus_file(outfile, top_level, args, schema, file_basename): + outfile.write( + "/* Autogenerated from %s and %s -- DO NOT EDIT */\n" % (args.schema, args.generate_cpp) + ) + + fake_pathname = args.o + '/' + top_level.name + '/' + file_basename + + synthetic_identifier = pathname_to_valid_C_identifier(fake_pathname) + outfile.write("/* --- vvv --- DEBUG --- vvv ---\n") + outfile.write("DEBUG: args.o = ''%s''\n" % args.o) + outfile.write("DEBUG: file_basename = ''%s''\n" % file_basename) + outfile.write("\n") + outfile.write("DEBUG: args.schema = ''%s''\n" % args.schema) + outfile.write("DEBUG: top_level.name = ''%s''\n" % top_level.name) + outfile.write("DEBUG: top_level.parent = ''%s''\n" % top_level.parent) + outfile.write("\n") + outfile.write("DEBUG: fake_pathname = ''%s''\n" % fake_pathname) + outfile.write("\n") + outfile.write( + "DEBUG: input to hash algo.: ''%s''\n" + % pathname_to_valid_C_identifier(fake_pathname, dry_run_to_get_hash_input=True) + ) + outfile.write(" --- ^^^ --- DEBUG --- ^^^ --- */\n") + del fake_pathname + + if args.gen_decl == 'decl': + outfile.write('#ifndef %s\n' % synthetic_identifier) + outfile.write('#define %s 1\n\n' % synthetic_identifier) + + for incl in args.include: + outfile.write('#include "%s"\n' % incl) + if args.emit_json or args.emit_fieldname or args.dump_unread: + outfile.write('#include "lib/indent.h"\n') + if args.unpack_json: + outfile.write('#include "backends/tofino/bf-asm/json.h"\n') + if args.alias_array: + outfile.write('#include "backends/tofino/bf-asm/alias_array.h"\n') + if args.checked_array: + outfile.write('#include "backends/tofino/bf-asm/checked_array.h"\n') + if args.emit_binary: + outfile.write('#include "backends/tofino/bf-asm/binary_output.h"\n') + outfile.write('#include "backends/tofino/bf-asm/ubits.h"\n') + outfile.write('#include "backends/tofino/bf-asm/register_reference.h"\n') + if args.widereg: + outfile.write('#include "backends/tofino/bf-asm/widereg.h"\n') + outfile.write('\n') + outfile.write("using namespace P4;") + if len(args.global_types) > 0: + args.global_types_generated = {} + top_level.gen_global_types(outfile, args, schema) + if args.namespace: + outfile.write('namespace %s {\n\n' % args.namespace) + top_level.generate_cpp(outfile, args, schema) + outfile.write(";\n") + if args.namespace: + outfile.write('\n} // end namespace %s\n\n' % args.namespace) + if args.gen_decl == 'decl': + outfile.write('\n#endif /* end of "ifndef %s" */\n' % synthetic_identifier) + + +def extend_args(args, params): + """ + parse additional template arguments into a copy of 'args' + """ + args = copy.copy(args) + parse_template_args(args, params) + return args + + +def generate_cpp(args, schema): + if args.o == None: + args.o = "gen" + if not os.path.exists(args.o): + os.makedirs(args.o) + + top_level_objs = read_template_file(args.generate_cpp, args, schema) + global_args = args + for section_name, section in list(schema.items()): + if section_name not in top_level_objs: + continue + for top_level_obj, files in list(top_level_objs[section_name].items()): + if files is None: + continue + args = global_args + if 'args' in files: + args = extend_args(args, files['args']) + if 'rewrite' in files: + args = copy.copy(args) + args.rewrite = files['rewrite'] + for generate_file, params in list(files.items()): + if generate_file == 'args': + continue + if generate_file == 'rewrite': + continue + if ( + (("DEBUG" in globals().keys()) and globals()["DEBUG"]) + or ("DEBUG" in locals().keys()) + and locals()["DEBUG"] + ): + print("===vvv=== DEBUG ===vvv===") + print("globals:", globals()) + print("locals:", locals()) + print("===^^^=== DEBUG ===^^^===") + generate_cPlusPlus_file( + open(os.path.join(args.o, generate_file), "w"), + section[top_level_obj], + extend_args(args, params), + schema, + generate_file, + ) + + +def print_schema_text(args, schema): + def do_print(indent, obj): + for key, val in list(obj.items()): + if type(val) is str: + print("%s%s: %s" % (indent, key, val)) + elif type(val) is dict: + print("%s%s:" % (indent, key)) + do_print(indent + " ", val) + elif val.templatization_behavior == "top_level": + print("%s%s:" % (indent, key)) + val.print_as_text(indent + " ") + + read_template_file(args.print_schema, args, schema) + do_print("", schema) + + +def build_binary_cache(args, schema): + cache = csr.binary_cache(schema) + try: + for config_filename in args.configs: + with open(config_filename, "rb") as configfile: + try: + template = json.load(configfile) + except: + sys.stderr.write( + "ERROR: Input file '" + + config_filename + + "' could not be decoded as JSON.\n" + ) + sys.exit(1) + + if type(template) is not dict or "_name" not in template or "_type" not in template: + sys.stderr.write( + "ERROR: Input file '" + + config_filename + + "' does not appear to be valid Walle configuration JSON.\n" + ) + sys.exit(1) + + if ( + "_schema_hash" not in template + or template["_schema_hash"] != schema["_schema_hash"] + ): + sys.stderr.write( + "ERROR: Input file '" + + config_filename + + "' does not match the current chip schema.\n" + ) + if not args.ignore_schema_mismatch: + sys.exit(1) + + cache.templates[template["_name"]] = template + except IOError as e: + sys.stderr.write( + "ERROR: Could not open '%s' for reading: %s (errno %i).\n" + % (config_filename, e[1], e[0]) + ) + sys.exit(e[0]) + + return cache + + +def dump_binary(args, binary_cache, out_file): + addr_func = { + # Memories are ram-word addressed, not byte addressed + "memories": lambda addr: addr >> 4, + # TODO: use actual func once model+indirect writes are fixed + "regs": lambda addr: addr, + # # Regs are give in 32-bit PCIe address space and need to be + # # converted to 42-bit chip address space + # "regs": lambda addr: ((addr&0x0FF80000)<<14)|(addr&0x0007FFFF) + } + + for template in args.top: + try: + path = [] + data = binary_cache.get_data(template, path=path) + data_type = binary_cache.get_type(template) + except csr.CsrException as e: + # TODO: decompose: + sys.stderr.write("ERROR: " + str(e) + "\n") + tb = [] + for frame in path: + tb.append("{" + frame.template_name + "}") + arr_subscript = None + for node in frame.path: + if type(node) is str: + tb.append(node) + if arr_subscript != None: + tb[-1] += arr_subscript + arr_subscript = None + elif type(node) is list: + arr_subscript = csr.array_str(node) + else: + tb.append(str(node)) + sys.stderr.write("Traceback: " + ".".join(tb) + "\n") + sys.exit(1) + + template_section = data_type.split(".")[0] + for chip_obj in data: + chip_obj.addr = addr_func[template_section](chip_obj.addr) + out_file.write(chip_obj.bytes()) + + if args.append_sentinel: + out_file.write(chip.direct_reg(0xFFFFFFFF, 0).bytes()) + + +def walle_process(parser, args=None): + if len(args.top) == 0: + args.top = ["memories.top", "regs.top"] + + if args.generate_schema != None: + schema = csr.build_schema(args.generate_schema, __version__) + with open(args.schema, "wb") as outfile: + pickle.dump(schema, outfile, protocol=2) + print("Schema generated from:\n") + cmd = 'echo | git -C %s log -1' % args.generate_schema + output = subprocess.check_output(cmd, shell=True) + print(output.decode('utf-8')) + outfile.write(b'\n\n' + output) + + if args.generate_templates != None: + if not os.path.isfile(args.schema): + sys.stderr.write( + "ERROR: Schema file '" + + os.path.abspath(args.schema) + + "' could not be opened or does not exist.\n" + ) + sys.exit(1) + generate_templates(args, schema) + else: + + if not os.path.isfile(args.schema): + sys.stderr.write( + "ERROR: Schema file '" + + os.path.abspath(args.schema) + + "' could not be opened or does not exist.\n" + ) + sys.exit(1) + + with open(args.schema, "rb") as infile: + schema = CsrUnpickler(infile).load() + + if args.schema_info: + print_schema_info(os.path.abspath(args.schema), schema) + elif args.dump_schema: + print(yaml.dump(schema)) + elif args.print_schema: + print_schema_text(args, schema) + elif args.generate_templates != None: + generate_templates(args, schema) + elif args.generate_cpp != None: + generate_cpp(args, schema) + else: + if len(args.configs) == 0: + parser.print_help() + else: + if args.o == None: + args.o = "a.out" + cache = build_binary_cache(args, schema) + with open(args.o, "wb") as binfile: + dump_binary(args, cache, binfile) + + sys.stdout.write("Binary '" + args.o + "' generated successfully.\n") + + +def main(): + """ + The main entry point for the script + """ + + parser = argparse.ArgumentParser() + parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__) + parser.add_argument( + "--schema", + '-s', + metavar='SCHEMA-FILE', + help="The chip schema to use", + type=str, + default="chip.schema", + ) + parser.add_argument( + "--schema-info", + action='store_true', + help="Print metadata stored in the selected chip schema and exit", + ) + parser.add_argument("--target", "-t", help="The chip target", type=str, default="tofino") + parser.add_argument("--dump-schema", action='store_true', help="Dump chip schema as yaml") + parser.add_argument( + "--print-schema", + metavar='TOP-LEVEL-OBJS-FILE', + type=str, + help="Dump chip schema as (readable?) text", + ) + parser.add_argument( + "--generate-schema", + metavar='BFNREGS-TARGET-DIR', + type=str, + help="Generate a chip schema from the bfnregs target regs directory", + default=None, + ) + parser.add_argument( + "--ignore-schema-mismatch", + action='store_true', + help="Attempt to crunch input files, even if they do not match the current chip schema", + ) + parser.add_argument( + "--generate-templates", + metavar='TOP-LEVEL-OBJS-FILE', + type=str, + help="Generate an 'all-0s' template for each addressmap listed in the given top-level objects file", + ) + parser.add_argument( + "--generate-cpp", + metavar='TOP-LEVEL-OBJS-FILE', + type=str, + help="Generate C++ code for each addressmap listed in the given top-level objects file", + ) + parser.add_argument( + "--template-indices", + metavar='THRESHOLD', + help="Include human-readable index keys for register arrays greater than the specified threshold size", + type=int, + default=None, + ) + parser.add_argument( + "--append-sentinel", + action='store_true', + help="Append a direct register write to address 0xFFFFFFFF to the end of the binary output", + ) + parser.add_argument( + '--top', + metavar='IDENTIFIER', + type=str, + action='append', + default=[], + help='Identifier of a template to generate binary config data for', + ) + parser.add_argument( + '-o', + metavar='FILE', + type=str, + default=None, + help='Name of file to write binary config data into (or directory to write templates into)', + ) + parser.add_argument( + 'configs', + metavar='CONFIG-FILE', + type=str, + nargs='*', + help='A JSON configuration file to process', + ) + + args = parser.parse_args() + if getattr(sys, 'frozen', False): + # running as a bundle: look for the schema in the bundled directory + args.schema = os.path.join(sys._MEIPASS, 'lib', args.target, 'chip.schema') + walle_process(parser, args) + + +######################################################################## +## Frontend logic + +if __name__ == "__main__": + main() diff --git a/backends/tofino/bf-asm/widereg.cpp b/backends/tofino/bf-asm/widereg.cpp new file mode 100644 index 00000000000..f6b223423c0 --- /dev/null +++ b/backends/tofino/bf-asm/widereg.cpp @@ -0,0 +1,29 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "widereg.h" + +#include +#include + +#include "lib/log.h" + +void widereg_base::log(const char *op, bitvec v) const { + std::ostringstream tmp; + LOG1(this << ' ' << op << ' ' << v + << (v != value ? tmp << " (now " << value << ")", tmp : tmp).str()); +} diff --git a/backends/tofino/bf-asm/widereg.h b/backends/tofino/bf-asm/widereg.h new file mode 100644 index 00000000000..f6f8e37a05e --- /dev/null +++ b/backends/tofino/bf-asm/widereg.h @@ -0,0 +1,170 @@ +/** + * Copyright (C) 2024 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied. See the License for the specific language governing permissions + * and limitations under the License. + * + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef BACKENDS_TOFINO_BF_ASM_WIDEREG_H_ +#define BACKENDS_TOFINO_BF_ASM_WIDEREG_H_ + +#include + +#include +#include +#include + +#include "lib/bitvec.h" +#include "lib/log.h" + +using namespace P4; + +void print_regname(std::ostream &out, const void *addr, const void *end); + +struct widereg_base; + +struct widereg_base { + bitvec value, reset_value; + mutable bool read, write; + mutable bool disabled_; + + widereg_base() : read(false), write(false), disabled_(false) {} + explicit widereg_base(bitvec v) + : value(v), reset_value(v), read(false), write(false), disabled_(false) {} + explicit widereg_base(uintptr_t v) + : value(v), reset_value(v), read(false), write(false), disabled_(false) {} +#if __WORDSIZE == 64 + // For 32-bit systems intptr_t is defined as int + explicit widereg_base(intptr_t v) + : value(v), reset_value(v), read(false), write(false), disabled_(false) {} +#endif + explicit widereg_base(int v) + : value(v), reset_value(v), read(false), write(false), disabled_(false) {} + operator bitvec() const { + read = true; + return value; + } + bool modified() const { return write; } + void set_modified(bool v = true) { write = v; } + bool disabled() const { return disabled_; } + bool disable_if_unmodified() { return write ? false : (disabled_ = true); } + bool disable_if_zero() const { return value.empty() && !write; } + bool disable_if_reset_value() { return value == reset_value ? (disabled_ = true) : false; } + bool disable() const { + if (write) { + LOG1("ERROR: Disabling modified register in " << this); + return false; + } + disabled_ = true; + return disabled_; + } + void enable() const { disabled_ = false; } + void rewrite() { write = false; } + virtual bitvec operator=(bitvec v) = 0; + virtual unsigned size() = 0; + void log(const char *op, bitvec v) const; +}; + +inline static unsigned int to_unsigned(const bitvec &v) { + std::stringstream ss; + ss << v; + std::string str(ss.str()); + unsigned int rv = std::strtoul(str.c_str(), 0, 16); + return rv; +} + +inline std::ostream &operator<<(std::ostream &out, const widereg_base *u) { + print_regname(out, u, u + 1); + return out; +} +inline std::ostream &operator<<(std::ostream &out, const widereg_base &u) { + return out << to_unsigned(u.value); +} + +template +struct widereg : widereg_base { + widereg() : widereg_base() {} + const widereg &check() { + if (value.max().index() >= N) { + LOG1("ERROR: out of range for " << N << " bits in " << this); + value.clrrange(N, value.max().index() - N + 1); + } + return *this; + } + explicit widereg(bitvec v) : widereg_base(v) { check(); } + explicit widereg(uintptr_t v) : widereg_base(v) { check(); } +#if __WORDSIZE == 64 + // For 32-bit systems intptr_t is defined as int + explicit widereg(intptr_t v) : widereg_base(v) { check(); } +#endif + explicit widereg(int v) : widereg_base(v) { check(); } + widereg(const widereg &) = delete; + widereg(widereg &&) = default; + bitvec operator=(bitvec v) { + if (disabled_) LOG1("ERROR: Writing disabled register value in " << this); + if (write) LOG1("WARNING: Overwriting " << value << " with " << v << " in " << this); + value = v; + write = true; + log("=", v); + check(); + return v; + } + uintptr_t operator=(uintptr_t v) { + *this = bitvec(v); + return v; + } + intptr_t operator=(intptr_t v) { + *this = bitvec(v); + return v; + } + const widereg &operator=(const widereg &v) { + *this = v.value; + v.read = true; + return v; + } + const widereg_base &operator=(const widereg_base &v) { + *this = v.value; + v.read = true; + return v; + } + unsigned size() { return N; } + const widereg &operator|=(bitvec v) { + if (disabled_) LOG1("ERROR: Writing disabled register value in " << this); + if (write) + LOG1("WARNING: Overwriting " << value << " with " << (v | value) << " in " << this); + value |= v; + write = true; + log("|=", v); + return check(); + } + const widereg &set_subfield(uintptr_t v, unsigned bit, unsigned size) { + if (disabled_) LOG1("ERROR: Writing disabled register value in " << this); + if (bit + size > N) { + LOG1("ERROR: subfield " << bit << ".." << (bit + size - 1) << " out of range in " + << this); + } else if (auto o = value.getrange(bit, size)) { + if (write) + LOG1((o != v ? "ERROR" : "WARNING") + << ": Overwriting subfield(" << bit << ".." << (bit + size - 1) << ") value " + << o << " with " << v << " in " << this); + } + if (v >= (1U << size)) + LOG1("ERROR: Subfield value " << v << " too large for " << size << " bits in " << this); + value.putrange(bit, size, v); + write = true; + log("|=", bitvec(v) << bit); + return check(); + } +}; + +#endif /* BACKENDS_TOFINO_BF_ASM_WIDEREG_H_ */ diff --git a/backends/tofino/bf-p4c/CMakeLists.txt b/backends/tofino/bf-p4c/CMakeLists.txt index 0a5410a6d3e..412b5f3818b 100644 --- a/backends/tofino/bf-p4c/CMakeLists.txt +++ b/backends/tofino/bf-p4c/CMakeLists.txt @@ -558,9 +558,6 @@ target_compile_definitions(bfp4c INTERFACE "-DCONFIG_PREFIX=\"${CMAKE_INSTALL_PREFIX}\"" INTERFACE "-DCONFIG_PKGDATADIR=\"${CMAKE_INSTALL_PREFIX}/${P4C_ARTIFACTS_OUTPUT_DIRECTORY}\"" ) -target_compile_definitions(bfp4c - INTERFACE "-DHAVE_JBAY=1" -) if (ENABLE_BAREFOOT_INTERNAL) target_compile_definitions(bfp4c INTERFACE "-DBAREFOOT_INTERNAL=1" diff --git a/backends/tofino/bf-p4c/arch/psa/psa.cpp b/backends/tofino/bf-p4c/arch/psa/psa.cpp index dd430a6a501..c93f63e561c 100644 --- a/backends/tofino/bf-p4c/arch/psa/psa.cpp +++ b/backends/tofino/bf-p4c/arch/psa/psa.cpp @@ -900,13 +900,10 @@ class LoadTargetArchitecture : public Inspector { if (Device::currentDevice() == Device::TOFINO) { filenames.push_back("tofino1_specs.p4"); filenames.push_back("tofino1_base.p4"); - } -#if HAVE_JBAY - else { + } else { filenames.push_back("tofino2_specs.p4"); filenames.push_back("tofino2_base.p4"); } -#endif // HAVE_JBAY filenames.push_back("tofino/stratum.p4"); filenames.push_back("tofino/p4_14_prim.p4"); diff --git a/backends/tofino/bf-p4c/arch/v1model.cpp b/backends/tofino/bf-p4c/arch/v1model.cpp index b02b32930f4..9811778738b 100644 --- a/backends/tofino/bf-p4c/arch/v1model.cpp +++ b/backends/tofino/bf-p4c/arch/v1model.cpp @@ -170,7 +170,6 @@ class LoadTargetArchitecture : public Inspector { structure->addMetadata(EGRESS, MetadataField{"standard_metadata"_cs, "egress_rid"_cs, 16}, MetadataField{"eg_intr_md"_cs, "egress_rid"_cs, 16}); -#ifdef HAVE_JBAY structure->addMetadata(INGRESS, MetadataField{"ig_intr_md_for_mb"_cs, "mirror_io_select"_cs, 1}, MetadataField{"ig_intr_md_for_dprsr"_cs, "mirror_io_select"_cs, 1}); @@ -225,7 +224,6 @@ class LoadTargetArchitecture : public Inspector { structure->addMetadata( EGRESS, MetadataField{"eg_intr_md_for_mb"_cs, "mirror_coalesce_length"_cs, 8}, MetadataField{"eg_intr_md_for_dprsr"_cs, "mirror_coalesce_length"_cs, 8}); -#endif } void analyzeTofinoModel() { @@ -313,14 +311,10 @@ class LoadTargetArchitecture : public Inspector { if (Device::currentDevice() == Device::TOFINO) { filenames.push_back("tofino1_specs.p4"); filenames.push_back("tofino1_base.p4"); - } -#if HAVE_JBAY - else if (Device::currentDevice() == Device::JBAY) { + } else if (Device::currentDevice() == Device::JBAY) { filenames.push_back("tofino2_specs.p4"); filenames.push_back("tofino2_base.p4"); - } -#endif // HAVE_JBAY - else + } else BUG("Unsupported device id %s", Device::currentDevice()); filenames.push_back("tofino/stratum.p4"); filenames.push_back("tofino/p4_14_prim.p4"); diff --git a/backends/tofino/bf-p4c/driver/barefoot.py b/backends/tofino/bf-p4c/driver/barefoot.py index 23233325fa5..a0af756b1f1 100755 --- a/backends/tofino/bf-p4c/driver/barefoot.py +++ b/backends/tofino/bf-p4c/driver/barefoot.py @@ -179,12 +179,6 @@ def add_command_line_options(self): default=False, help="Add source outputs to the archive.", ) - self._argGroup.add_argument( - "--enable-bf-asm", - action="store_true", - default=False, - help="Use the assembler to generate a binary.", - ) self._argGroup.add_argument( "--bf-rt-schema", action="store", @@ -487,19 +481,18 @@ def process_command_line_options(self, opts): """! Main parsing or command line options @param opts Object holding set arguments """ - # Add assembler options if they are available. - if opts.enable_bf_asm or os.getenv("ENABLE_BF_ASM"): - if os.environ['P4C_BUILD_TYPE'] == "DEVELOPER": - bfas = find_file('bf-asm', 'bfas') - else: - bfas = find_file(os.environ['P4C_BIN_DIR'], 'bfas') - - bfrt_schema = find_file(os.environ['P4C_BIN_DIR'], 'bfrt_schema.py') - p4c_gen_conf = find_file(os.environ['P4C_BIN_DIR'], 'p4c-gen-conf') - self.add_command('assembler', bfas) - self.add_command('bf-rt-verifier', bfrt_schema) - self.add_command('p4c-gen-conf', p4c_gen_conf) - self._commandsEnabled.append('assembler') + # Add assembler options. + if os.environ['P4C_BUILD_TYPE'] == "DEVELOPER": + bfas = find_file('.', 'bfas') + else: + bfas = find_file(os.environ['P4C_BIN_DIR'], 'bfas') + + bfrt_schema = find_file(os.environ['P4C_BIN_DIR'], 'bfrt_schema.py') + p4c_gen_conf = find_file(os.environ['P4C_BIN_DIR'], 'p4c-gen-conf') + self.add_command('assembler', bfas) + self.add_command('bf-rt-verifier', bfrt_schema) + self.add_command('p4c-gen-conf', p4c_gen_conf) + self._commandsEnabled.append('assembler') BackendDriver.process_command_line_options(self, opts) diff --git a/backends/tofino/bf-p4c/driver/p4c.tofino.cfg b/backends/tofino/bf-p4c/driver/p4c.tofino.cfg index 09fbd5bfc24..05e840437b8 100644 --- a/backends/tofino/bf-p4c/driver/p4c.tofino.cfg +++ b/backends/tofino/bf-p4c/driver/p4c.tofino.cfg @@ -27,8 +27,7 @@ class TofinoBackend(bfn.BarefootBackend): self.config_compiler("__TARGET_TOFINO__=1") def process_command_line_options(self, opts): - if opts.enable_bf_asm or os.getenv("ENABLE_BF_ASM"): - self.config_assembler("tofino") + self.config_assembler("tofino") bfn.BarefootBackend.process_command_line_options(self, opts) diff --git a/backends/tofino/bf-p4c/driver/p4c.tofino2.cfg b/backends/tofino/bf-p4c/driver/p4c.tofino2.cfg index 1a8001e26fe..f452bdea69e 100644 --- a/backends/tofino/bf-p4c/driver/p4c.tofino2.cfg +++ b/backends/tofino/bf-p4c/driver/p4c.tofino2.cfg @@ -37,8 +37,7 @@ class Tofino2Backend(bfn.BarefootBackend): self.config_compiler("__TOFINO2_VARIANT__={}".format(Tofino2Variants[target])) def process_command_line_options(self, opts): - if opts.enable_bf_asm or os.getenv("ENABLE_BF_ASM"): - self.config_assembler(self._target) + self.config_assembler(self._target) bfn.BarefootBackend.process_command_line_options(self, opts) for t in Tofino2Variants.keys(): diff --git a/backends/tofino/bf-p4c/mau/action_analysis.cpp b/backends/tofino/bf-p4c/mau/action_analysis.cpp index 3ec4dc731b3..3697c4846b9 100644 --- a/backends/tofino/bf-p4c/mau/action_analysis.cpp +++ b/backends/tofino/bf-p4c/mau/action_analysis.cpp @@ -2417,9 +2417,7 @@ void ActionAnalysis::check_constant_to_actiondata(ContainerAction &cont_action, // 16 and 20, the range for instruction constants is different between architectures. // For Tofino it is -8..7 but for JBay it is -4..7 int const_src_min = CONST_SRC_MAX; -#ifdef HAVE_JBAY if (Device::currentDevice() == Device::JBAY) const_src_min = JBAY_CONST_SRC_MIN; -#endif /* HAVE_JBAY */ if (cont_action.convert_instr_to_bitmasked_set || cont_action.convert_instr_to_byte_rotate_merge) { diff --git a/backends/tofino/bf-p4c/mau/asm_output.cpp b/backends/tofino/bf-p4c/mau/asm_output.cpp index 5e19bef1cdd..040a9f3ad40 100644 --- a/backends/tofino/bf-p4c/mau/asm_output.cpp +++ b/backends/tofino/bf-p4c/mau/asm_output.cpp @@ -1053,9 +1053,7 @@ void MauAsmOutput::emit_table_format(std::ostream &out, indent_t indent, fmt_state fmt; out << indent << "format: {"; int group = (ternary || gateway) ? -1 : 0; -#ifdef HAVE_JBAY if (Device::currentDevice() == Device::JBAY && gateway) group = 0; -#endif for (auto match_group : use.match_groups) { int type; diff --git a/backends/tofino/bf-p4c/mau/gateway.cpp b/backends/tofino/bf-p4c/mau/gateway.cpp index 727e570d8cb..559cba36b27 100644 --- a/backends/tofino/bf-p4c/mau/gateway.cpp +++ b/backends/tofino/bf-p4c/mau/gateway.cpp @@ -43,7 +43,6 @@ const Device::GatewaySpec &TofinoDevice::getGatewaySpec() const { }; return spec; } -#if HAVE_JBAY const Device::GatewaySpec &JBayDevice::getGatewaySpec() const { static const Device::GatewaySpec spec = { /* .PhvBytes = */ 4, @@ -59,7 +58,6 @@ const Device::GatewaySpec &JBayDevice::getGatewaySpec() const { }; return spec; } -#endif class CanonGatewayExpr::NeedNegate : public Inspector { bool rv = false; diff --git a/backends/tofino/bf-p4c/mau/stateful_alu.cpp b/backends/tofino/bf-p4c/mau/stateful_alu.cpp index bb419a004e6..e2a6ae97804 100644 --- a/backends/tofino/bf-p4c/mau/stateful_alu.cpp +++ b/backends/tofino/bf-p4c/mau/stateful_alu.cpp @@ -46,7 +46,6 @@ const Device::StatefulAluSpec &TofinoDevice::getStatefulAluSpec() const { return spec; } -#if HAVE_JBAY const Device::StatefulAluSpec &JBayDevice::getStatefulAluSpec() const { static const Device::StatefulAluSpec spec = { /* .CmpMask = */ true, @@ -64,7 +63,6 @@ const Device::StatefulAluSpec &JBayDevice::getStatefulAluSpec() const { /* .MaxRegfileRows = */ 4}; return spec; } -#endif /** * @brief This class detects a following pattern: @@ -2283,13 +2281,11 @@ std::map, std::vectorleft_to_place(); diff --git a/backends/tofino/bf-p4c/mau/walk_power_graph.cpp b/backends/tofino/bf-p4c/mau/walk_power_graph.cpp index 7a727149c5d..5a495c12dfe 100644 --- a/backends/tofino/bf-p4c/mau/walk_power_graph.cpp +++ b/backends/tofino/bf-p4c/mau/walk_power_graph.cpp @@ -444,10 +444,8 @@ double WalkPowerGraph::estimate_power() { always_powered_on_.clear(); if (Device::currentDevice() == Device::TOFINO) { return estimate_power_tofino(); -#if HAVE_JBAY } else if (Device::currentDevice() == Device::JBAY) { return estimate_power_non_tofino(); -#endif /* HAVE_JBAY */ } else { BUG("estimate_power -- invalid device %d", Device::currentDevice()); } diff --git a/backends/tofino/bf-p4c/midend.cpp b/backends/tofino/bf-p4c/midend.cpp index 85467f960be..56552c9a28a 100644 --- a/backends/tofino/bf-p4c/midend.cpp +++ b/backends/tofino/bf-p4c/midend.cpp @@ -328,14 +328,12 @@ bool skipFlexibleHeader(const Visitor::Context *, const IR::Type_StructLike *e) */ class CompileTimeOperations : public P4::CompileTimeOperations { bool preorder(const IR::Declaration_Instance *di) { -#ifdef HAVE_JBAY // JBay supports (limited) div/mod in RegisterAction if (Device::currentDevice() == Device::JBAY) { if (auto st = di->type->to()) { if (st->baseType->path->name.name.endsWith("Action")) return false; } } -#endif return true; } }; diff --git a/backends/tofino/bf-p4c/midend/parser_enforce_depth_req.cpp b/backends/tofino/bf-p4c/midend/parser_enforce_depth_req.cpp index 5e9a7922c27..203bd3edd66 100644 --- a/backends/tofino/bf-p4c/midend/parser_enforce_depth_req.cpp +++ b/backends/tofino/bf-p4c/midend/parser_enforce_depth_req.cpp @@ -507,9 +507,7 @@ class AddParserPad : public Modifier { // Tofino1-like architectures std::set tofArch = { "tna"_cs, -#if HAVE_JBAY "t2na"_cs, -#endif /* HAVE_JBAY */ }; /** diff --git a/backends/tofino/bf-p4c/phv/phv_fields.cpp b/backends/tofino/bf-p4c/phv/phv_fields.cpp index 9d2f42115e3..45a616126fe 100644 --- a/backends/tofino/bf-p4c/phv/phv_fields.cpp +++ b/backends/tofino/bf-p4c/phv/phv_fields.cpp @@ -1568,9 +1568,9 @@ struct ComputeFieldAlignments : public Inspector { // For non-set instructions accessing an AttachedOutput if ((instr->operands.size() == 3) && (instr->name != "set")) { - int op_id = 0; + bool first = true; for (auto op_f : instr->operands) { - if (!op_id) { + if (first) { // Keep destination field that may need alignment setting dst_f = phv.field(op_f); } else { @@ -1592,7 +1592,7 @@ struct ComputeFieldAlignments : public Inspector { } } } - op_id++; + first = false; } } return false; diff --git a/backends/tofino/cmake/spdlog.cmake b/backends/tofino/cmake/spdlog.cmake index c4d8fdce85e..361da20dd29 100644 --- a/backends/tofino/cmake/spdlog.cmake +++ b/backends/tofino/cmake/spdlog.cmake @@ -1,21 +1,33 @@ message(STATUS "Fetching spdlog") -include(FetchContent) - # Preserve previous FETCHCONTENT_QUIET setting set(FETCHCONTENT_QUIET_PREV ${FETCHCONTENT_QUIET}) set(FETCHCONTENT_QUIET OFF) +set(SPDLOG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/spdlog) + +# Check if the source directory exists. +if(EXISTS ${SPDLOG_SOURCE_DIR}/CMakeLists.txt) + # If it exists but wasn't built before, manually add it. + set(FETCHCONTENT_SOURCE_DIR_SPDLOG ${SPDLOG_SOURCE_DIR}) + # Avoid fetching again. + set(FETCHCONTENT_UPDATES_DISCONNECTED_SPDLOG ON) +endif() + FetchContent_Declare( spdlog GIT_REPOSITORY https://github.com/gabime/spdlog.git GIT_TAG v1.8.3 - SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/spdlog + SOURCE_DIR ${SPDLOG_SOURCE_DIR} USES_TERMINAL_DOWNLOAD TRUE GIT_PROGRESS TRUE ) -FetchContent_MakeAvailable(spdlog) +FetchContent_GetProperties(spdlog) +if(NOT spdlog_POPULATED) + FetchContent_Populate(spdlog) + add_subdirectory(${SPDLOG_SOURCE_DIR} ${CMAKE_BINARY_DIR}/spdlog) +endif() # Restore FETCHCONTENT_QUIET setting set(FETCHCONTENT_QUIET ${FETCHCONTENT_QUIET_PREV}) diff --git a/backends/tofino/compiler_interfaces/schemas/mau_schema.py b/backends/tofino/compiler_interfaces/schemas/mau_schema.py index 72a64ef051d..39b0a53dfa8 100644 --- a/backends/tofino/compiler_interfaces/schemas/mau_schema.py +++ b/backends/tofino/compiler_interfaces/schemas/mau_schema.py @@ -233,7 +233,6 @@ class StageMatchMemoryDetails(StageMemoryDetailsWithEntryWidthAndIdeal): class MatchTables(jsl.Document): - class StageDetails(jsl.Document): title = "StageDetails" description = "Information about packing and resource usage on a per-stage basis." diff --git a/backends/tofino/compiler_interfaces/schemas/power_schema.py b/backends/tofino/compiler_interfaces/schemas/power_schema.py index 04e5cddc7e9..386c6da7242 100644 --- a/backends/tofino/compiler_interfaces/schemas/power_schema.py +++ b/backends/tofino/compiler_interfaces/schemas/power_schema.py @@ -111,7 +111,6 @@ class Features(jsl.Document): class MatchTables(jsl.Document): - class StageDetails(jsl.Document): title = "StageDetails" description = "Information about table power usage on a per-stage basis." diff --git a/backends/tofino/compiler_interfaces/tools/create_mau_characterize.py b/backends/tofino/compiler_interfaces/tools/create_mau_characterize.py index e2d117eaf3e..ed276579d82 100755 --- a/backends/tofino/compiler_interfaces/tools/create_mau_characterize.py +++ b/backends/tofino/compiler_interfaces/tools/create_mau_characterize.py @@ -766,9 +766,12 @@ def log_match_and_action_formats(all_match_and_action_formats): for table_name, stage in keys: if table_name not in tbl_to_info: tbl_to_info[table_name] = OrderedDict() - match_format_json, actual_match_entries, action_formats_json, actual_action_entries = ( - all_match_and_action_formats[(table_name, stage)] - ) + ( + match_format_json, + actual_match_entries, + action_formats_json, + actual_action_entries, + ) = all_match_and_action_formats[(table_name, stage)] tbl_to_info[table_name][stage] = ( match_format_json, actual_match_entries, @@ -789,9 +792,12 @@ def log_match_and_action_formats(all_match_and_action_formats): all_match_formats = [] all_action_formats = [] for stage in tbl_to_info[table_name]: - match_format_json, actual_match_entries, action_formats_json, actual_action_entries = ( - tbl_to_info[table_name][stage] - ) + ( + match_format_json, + actual_match_entries, + action_formats_json, + actual_action_entries, + ) = tbl_to_info[table_name][stage] all_match_formats.append((stage, match_format_json, actual_match_entries)) all_action_formats.append((stage, action_formats_json, actual_action_entries)) @@ -844,9 +850,12 @@ def produce_mau_characterize(source, output): log.info("%s\n" % box) # Populate table summary information - table_info, sram_summary, all_overhead_structures, all_match_and_action_formats = ( - _parse_mau_json(context) - ) + ( + table_info, + sram_summary, + all_overhead_structures, + all_match_and_action_formats, + ) = _parse_mau_json(context) # Output summary table in log file diff --git a/backends/tofino/compiler_interfaces/tools/create_mau_json.py b/backends/tofino/compiler_interfaces/tools/create_mau_json.py index d121e48f011..6874e88f50d 100755 --- a/backends/tofino/compiler_interfaces/tools/create_mau_json.py +++ b/backends/tofino/compiler_interfaces/tools/create_mau_json.py @@ -579,10 +579,11 @@ def get_match_memory(match_stage_table, match_table, context, entries_so_far): mem_elem["entry_bit_width_requested"] = ideal_entry_bits mem_elem["entry_bit_width_allocated"] = allocated_match_bits - mem_elem["ideal_entries_per_table_word"], mem_elem["ideal_table_word_bit_width"] = ( - get_ideal_match_entries( - match_stage_table, match_table, mem_elem["imm_bit_width_in_overhead_requested"] - ) + ( + mem_elem["ideal_entries_per_table_word"], + mem_elem["ideal_table_word_bit_width"], + ) = get_ideal_match_entries( + match_stage_table, match_table, mem_elem["imm_bit_width_in_overhead_requested"] ) memories.append(mem_elem) diff --git a/ir/json_loader.h b/ir/json_loader.h index 335a256630a..34fa571185c 100644 --- a/ir/json_loader.h +++ b/ir/json_loader.h @@ -92,11 +92,17 @@ class JSONLoader { const IR::Node *get_node() { if (!json || !json->is()) return nullptr; // invalid json exception? int id; - load("Node_ID", id); + auto success = load("Node_ID", id); + if (!success) { + return nullptr; + } if (id >= 0) { if (node_refs.find(id) == node_refs.end()) { cstring type; - load("Node_Type", type); + auto success = load("Node_Type", type); + if (!success) { + return nullptr; + } if (auto fn = get(IR::unpacker_table, type)) { node_refs[id] = fn(*this); // Creating JsonObject from source_info read from jsonFile