diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/Makefile b/RT-AK/rt_ai_tools/platforms/drivers/dpu/Makefile new file mode 100644 index 00000000..67f14cbf --- /dev/null +++ b/RT-AK/rt_ai_tools/platforms/drivers/dpu/Makefile @@ -0,0 +1,49 @@ +# makefile for DPU driver outside the linux kernel tree, and generate dpu.ko file +# +# a typical command for build the driver for Zynq 7000: +# make ARCH=arm CROSS_COMPILE=arm-linux-gnueabihf- KERNELDIR=/path/to/your/kernel +# +# a typical command for build the driver for UltraScale+: +# make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- KERNELDIR=/path/to/your/kernel +# +modname:=dpu +obj-m:=$(modname).o +dpu-objs:=dpucore.o + +PWD :=$(shell pwd) +MAKE :=make + + +KCFLAGS=KCFLAGS=" +ifeq ($(DPU_TARGET),1.1) + KCFLAGS +=-DCONFIG_DPU_v1_1_X +else + KCFLAGS +=-DCONFIG_DPU_v1_3_0 +endif + +ifeq ($(ARCH),arm) + KCFLAGS +=-DSIG_BASE_ADDR=0X4FF00000 -DCACHE_OFF +endif +ifeq ($(ARCH),arm64) + KCFLAGS +=-DSIG_BASE_ADDR=0X8FF00000 +endif + +# check the compiler version +GCCV1 := $(shell $(CROSS_COMPILE)gcc -dumpversion | cut -f1 -d. ) +GCCV2 := $(shell $(CROSS_COMPILE)gcc -dumpversion | cut -f2 -d. ) +GCCV3 := $(shell expr `echo $(GCCV1)"*100+"$(GCCV2) | bc` ) + +DT_FLAG := $(shell expr `echo $(GCCV3)` \>= 409) +ifeq ($(DT_FLAG),1) + KCFLAGS += -Wno-error=date-time -Wno-date-time +endif +KCFLAGS+=" + +all: + $(KCFLAGS) $(MAKE) ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C $(KERNELDIR) M=$(PWD) modules + +clean: + rm -rf $(modname).ko *.o *mod* \.*cmd *odule* .tmp_versions + +.PHONY: all clean + diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/README.md b/RT-AK/rt_ai_tools/platforms/drivers/dpu/README.md new file mode 100644 index 00000000..e12cecd7 --- /dev/null +++ b/RT-AK/rt_ai_tools/platforms/drivers/dpu/README.md @@ -0,0 +1,675 @@ +# 基于Zynq UltraScale+ MPSoC的DPU在RT-Thread Smart下的移植 + +## 项目介绍 + +Vitis AI开发环境可在赛灵思硬件平台上加速AI推理,该开发环境已将底层可编程逻辑的复杂细节抽象化,从而帮助不具备FPGA硬件专业知识的软件开发者开发AI应用。我们以Xilinx Vitis AI v1.2版本为基础,结合Xilinx官方文档等资料,首先收集并梳理Vitis AI开发流程与软件的内部运行机制: + +1. 硬件平台基于黑金AXU2CGB,其芯片类型为Zynq UltraScale+ MPSoC,可利用PS端ARM核与PL端可编程逻辑实现Vitis AI的深度学习处理器(DPU)的部署,与AI应用的加速推理; +2. 梳理基于Vitis AI的DPU硬件加速平台的基本概念,了解PL端DPU硬件与PS端如何通信与控制;梳理网络模型的量化、编译等处理是如何进行的,梳理经处理的网络模型以何种方式被DPU硬件所加速的; +3. 实现Vitis AI应用在开发板的全流程部署,并总结了文字资料,包括: + 1. CNN网络模型的量化、编译; + 2. 基于Zynq UltraScale+ MPSoC的DPU硬件(DPUCZDX8G)的开发流程与集成至定制开发板; + 3. 借助交叉编译工具链,完成边缘端应用程序的编写与编译; + 4. 部署并运行模型,借助DPU硬件,实现AI应用的加速推理; +4. 基于Vitis AI v3.0及配套软件,梳理通过Vivado工作流与Petalinux,将DPU硬件集成到定制开发板上,并构建Linux系统映像的开发流程; +5. 分析Vitis AI v1.2与v3.0软件平台的架构,梳理Vitis AI Library、VART运行时的运行机制。分析移植工作的软件层面为了实现控制DPU的所需的部件,从而裁剪出移植至RT Smart上所需的最少组件; + +在此之后,我们总结移植到RT Smart系统上所必需开展的工作,并尝试: + +1. 整合开发板启动时所需的相关文件,包含启动文件及必要的BSP、外设驱动; +2. 整合RT Smart的源码,与驱动DPU的最小组件源代码; +3. 使用特定的交叉编译工具链,编译源码至aarch64平台上; +4. 上板测试,验证能否通过软件控制DPU硬件,从而验证DPU移植是否成功; + +以下将分为几个部分: + +1. [基于Vivado 2022.2工作流集成适用于ZynqMP的DPU IP](#基于vivado-20222工作流集成适用于zynqmp的dpu-ip) +2. [基于Petalinux 2022.2创建并配置系统映像](#基于petalinux-20222创建并配置系统映像) +3. [基于黑金提供的示例的流程梳理](#基于黑金提供的示例的流程梳理) +4. [项目遇到的问题](#项目遇到的问题) + +## 基于Vivado 2022.2工作流集成适用于ZynqMP的DPU IP + +### 环境及软件 + +Linux/Windows均可。Vivado 2022.2,DPUCZDX8G V4.1,DPU IP及软件版本兼容性如[IP and Tool Version Compatibility](https://xilinx.github.io/Vitis-AI/3.0/html/docs/reference/version_compatibility.html#version-compatibility)所示: + +
ZynqMP_DPU-IP-1-IPandToolVersionCompatibility
+ +DPU V4.1参考设计及IP下载:[Vitis™ AI DPU IP and Reference Designs](https://github.com/Xilinx/Vitis-AI/blob/3.0/dpu/README.md) + +### 创建工程及添加Zynq IP核 + +1. 打开Vivado 2022.2,创建新工程,设置工程名、工程目录。如下图所示,工程将建立在 `~/WORK/ospp/workspace/axu2cgb_hw_dpu` 目录下,同时生成 `axu2cgb_hw_dpu.xpr` 工程文件。 + +
ZynqMP_DPU-IP-2-NewProject
+ +2. “Project Type“选择RTL Project,“Add Sources“、“Add Constraints“跳过,“Default Part“下,搜索FPGA型号xczu2cg-sfvc784-1-e,选中,最后完成工程的创建。 + +
ZynqMP_DPU-IP-3-DefaultType
+ +3. 点击左侧“Flow Navigator“栏目下“IP INTEGRATOR“内“Create Block Design”,创建一个图形化框图设计,“Design name”可修改,其余默认即可。 + +
ZynqMP_DPU-IP-4-CreateBlockDesign
+ +4. 点击加号,添加Zynq UltraScale+ MPSoC IP,添加到Diagram后,双击该IP核,进行参数配置。 + +
ZynqMP_DPU-IP-5-AddZynqIP
+ +### 配置Zynq IP核 + +#### I/O Configuration + +1. 上方“MIO Voltage Standard”内,BANK0~2均设置为LVCMOS18、BANK3设置为LVCMOS33。“Low Speed”下勾选QSPI,设置Single、x4、勾选Feedback Clk。 + +
ZynqMP_DPU-IP-6-BANKandQSPI
+ +1. 下方,勾选SD0,配置eMMC,选择MIO13..22、“Slot Type”选择eMMC、8bit,勾选Reset。 + +
ZynqMP_DPU-IP-7-SD0
+ +3. 勾选SD1,配置SD卡。**先将**Slot Type选择为SD2.0,**再选择**MIO46..51,4bit,勾选CD。 + +
ZynqMP_DPU-IP-8-SD1
+ +4. I/O Peripherals下,I2C勾选I2C1,选择MIO32..33;UART勾选UART1,选择MIO24..25; + +
ZynqMP_DPU-IP-9-I2CandUART
+ +5. Processing Unit下,TTC勾选TTC0~3。 + +
ZynqMP_DPU-IP-10-TTC
+ +6. 继续在“High Speed”下,勾选GEM3,配置以太网,勾选MDIO3。 + +
ZynqMP_DPU-IP-11-GEM
+ +7. USB下,勾选USB0,勾选USB3.0,选择 GT Lane1。“USB Reset”下,改成Shared MIO Pin,勾选USB0,选择MIO 44。 + +
ZynqMP_DPU-IP-12-USB
+ +8. 勾选PCIe,之后勾选左上角“Switch To Advanced Mode”,进入“PCIe Configuration”。 + +#### PCIe Configuration + +1. 修改如图所示的几个参数。 + +
ZynqMP_DPU-IP-13-PCIe
+ +2. 回到“I/O Configuration”,PCIe下“Rootport Mode Reset”选择MIO 37。勾选Display Port,“DPAUX”选择MIO27..30。 + +
ZynqMP_DPU-IP-14-DP
+ +#### Clock Configuration + +1. “Input Clocks”栏目下,PSS_REF_CLK默认33.333MHz,PCIe选择Ref Clk0,Display Port选择Ref Clk2,USB0选择Ref Clk1。 + +
ZynqMP_DPU-IP-15-InputClocks
+ +2. “Output Clocks”栏目下,“Low Power Domain CLocks“的前两项内,从CPU_R5开始到GEM_TSU的时钟均改为IOPLL。 + +
ZynqMP_DPU-IP-16-OutputClocks1
+ +3. 继续,“Full Power Domain Clocks”下,修改DP与最下方Interconnect相关的几个时钟\*。 + +
ZynqMP_DPU-IP-17-OutputClocks2
+ +#### DDR Configuration + +1. Load DDR Presets”选择DDR4_MICRON_MT40A256M16GE_083E。 + +
ZynqMP_DPU-IP-18-DDR
+ +#### DPU IP + +##### 配置DPU + +1. 添加DPU IP至IP Catalog。点击左侧“Project Manager“下”IP Catalog“,如下图所示。右键单击并选中Add Repository,然后选择 DPUCZDX8G IP的位置。例如,下载的DPU IP的文件为“DPUCZDX8G_VAI_v3.0” + +
ZynqMP_DPU-IP-19-AddDPUIP
+ +
ZynqMP_DPU-IP-20-AddDPUIP2
+ +2. 在Diagram内,将DPUCZDX8G IP添加到设计中。配置DPU IP,如下图所示。 + +
ZynqMP_DPU-IP-21-DPUConfig1
+ +
ZynqMP_DPU-IP-22-DPUConfig2
+ + 此时,DPU IP的端口如下图所示。 + +
ZynqMP_DPU-IP-23-DPUConfig3
+ +##### Zynq设置中断及AXI + +1. 配置Zynq IP核的中断、AXI总线。每个DPUCZDX8G都有3个主接口,1个用于指令提取,其余2个用于数据访问。配置Zynq IP,在”PS-PL Configuration”下,打开PL-PS的IRQ0中断。 + +
ZynqMP_DPU-IP-24-ZynqIRQ0
+ +2. 开启Zynq的AXI。在“PS-PL Interfaces”下Master端,开启AXI HPM0 LPD,位宽32,将DPU的从端口连接至此。Slave端,开启AXI HP0/1/2 FPD,位宽128,,用于与DPU的数据传输,开启AXI LPD,位宽32,用于指令提取。 + +
ZynqMP_DPU-IP-25-ZynqAXI
+ + 此时,Zynq IP如图所示。 + +
ZynqMP_DPU-IP-26-ZynqIPAdded
+ +3. 添加时钟。点击Diagram上方加号,或者右键“Add IP”,添加Clock Wizard IP,并配置如下:“Clock Options”下,勾选Auto、Frequency Synthesis; + +
ZynqMP_DPU-IP-30-ClockWizard4
+ + “Output Clocks”下,勾选clk_out1与clk_out2,分别命名clk_dsp、clk_dpu,频率分别为650、325MHz,拉到右侧,将clk_dsp的“Drives”修改为Buffer with CE;两个时钟均勾选“Matched Routing“。下方,勾选reset、locked、Active Low与LATENCY。 + +
ZynqMP_DPU-IP-27-ClockWizard1
+ +
ZynqMP_DPU-IP-28-ClockWizard2
+ +
ZynqMP_DPU-IP-29-ClockWizard3
+ +4. 为整个系统添加三个复位IP,添加Processor System Reset IP,分别命名rst_gen_clk、rst_gen_reg、rst_gen_clk_dsp。 + +5. 添加一个Concat IP,“Number of Ports”设置为2,In0与In1的宽度均为1。 + +6. 此时: + +
ZynqMP_DPU-IP-31-AddResets
+ +#### 系统连线 + +
ZynqMP_DPU-IP-32-BlocksConnection
+ +1. 复位IP连线: + 1. rst_gen_reg + - slowest_sync_clk连接至时钟IP的clk_in1 + - ext_reset_in连接至Zynq的pl_resetn0 + - dcm_locked连接至时钟IP的locked + - peripheral_aresetn连接至时钟IP的resetn + 2. rst_gen_clk + - slowest_sync_clk连接至时钟IP的clk_dpu + - ext_reset_in连接至时钟IP的resetn + - dcm_locked连接至时钟IP的locked + - peripheral_aresetn连接至DPU IP的m_axi_dpu_aresetn + 3. rst_gen_clk_dsp + - slowest_sync_clk连接至时钟IP的clk_dsp + - ext_reset_in连接至时钟IP的resetn + - dcm_locked连接至时钟IP的locked + - peripheral_aresetn连接至DPU IP的dpu_2x_resetn +2. DPU IP + - S_AXI连接Zynq M_AXI_HPM0_LPD + - s_axi_aclk连接Zynq pl_clk0 + - …… + +3. 其他连线参见图片 + +#### 分配地址 + +1. Diagram连线完毕后,切换到“Address Editor“下,先选中zynq_ultra_ps_e_0下的S_AXI,右键“Assign”。 + +
ZynqMP_DPU-IP-33-AddressAssign
+ +2. 待S_AXI分配好后,点击上方“Assign All”,为其他总线分配地址。 + +
ZynqMP_DPU-IP-34-AddressAssign2
+ +3. 回到Diagram下,点击上方“Validate Design”对设计进行检查,无误会提示检查成功。 + +
ZynqMP_DPU-IP-36-ValidateDesign
+ +### 生成顶层文件直到导出 `.xsa` + +1. 左侧“Source”下,选中这个 `.bd` 设计,右键选择“Generate Output Products”,“Synthesis Options”选择”Out of context per IP”。 + +
ZynqMP_DPU-IP-37-GenOutputProduct
+ +2. 同样的步骤,选择“Create HDL Wrapper”,之后默认选项,这将生成一个顶层的 `.v` 文件。 + +
ZynqMP_DPU-IP-38-CreateHDLWrapper
+ +3. 左侧“Flow Navigator”,点击最下方“Generate Bitstream”,会显示目前没有Implementation,点击Yes,默认设置继续。Vivado将自动完成综合、布局布线、生成比特流。 + + *也可以在左侧“Flow Navigator”,分步完成“Run Synthesis”、“Run Inplementation”、“Generate Bitstream”* + +
ZynqMP_DPU-IP-39-GenBitstream
+ +4. 软件左上角File->Export->Export Hardware,在“Output”页面选择“Include bitstream”,然后设置XSA文件名及导出路径,之后则可成功导出 `.xsa` 文件。 + +
ZynqMP_DPU-IP-40-ExportXSA
+ +## 基于Petalinux 2022.2创建并配置系统映像 + +### 环境及软件 + +1. Petalinux 2022.2,建议Ubuntu 20.04/22.04 + + *注意:系统glibc版本过高会导致Petalinux构建失败,Petalinux 2022.2的建议是glibc 2.34,可使用 `ldd --version` 查看glibc版本。构建Petalinux需要良好的网络连接。 + +2. DPUCZDX8G_VAI_v3.0(其中DPU v4.1)。DPU IP及软件版本兼容性参见上一章节[环境及软件](#环境及软件)。 + +### [创建Petalinux工程](https://github.com/Xilinx/Vitis-AI/blob/3.0/dpu/ref_design_docs/README_DPUCZ_Vivado_sw.md) + +1. 设置Petalinux工作区环境: + + ```shell + source /settings.sh + ``` + +2. 创建并配置工程,这样将在目录下创建名为 `PROJECT` 的工程目录。 + + ```shell + petalinux-create -t project -n --template zynqMP + ``` + + ![1-创建petalinux工程](images/1-创建petalinux工程.png) + + 之后进入该目录,并以 `xsa` 文件配置工程 + + ```shell + cd axu2cgb_petalinux + petalinux-config --get-hw-description --silentconfig + ``` + + ![2-xsa配置工程](images/2-xsa配置工程.png) + +3. 由于是使用Vivado集成DPU的方式,需要复制 DPU TRD目录下,`$TRD_HOME/prj/Vivado/sw/meta-vitis/recipes-kernel` 至 `/project-spec/meta-user` 下,这其中包含DPU驱动补丁及DPU配置文件。 + +4. 输入 `petalinux-config -c kernel` ,进入menuconfig配置页面,找到如下选项并开启,保存并退出。此时在kernel内启用了DPU驱动。 + + ``` shell + Device Drivers --> + Misc devices --> + <*> Xilinux Deep learning Processing Unit (DPU) Driver + ``` + +5. 使用recipes-vitis-ai,从而在构建Petalinux时将vitis ai library一并构建。 + + 1. 将vitis ai library添加到rootfs:复制 `$TRD_HOME/prj/Vivado/sw/meta-vitisrecipes-vitis-ai` 到 `/project-spec/meta-user` 下 + + 2. 将以下几行内容添加到 `/project-spec/meta-user/conf/user-rootfsconfig` 中 + + ```shell + CONFIG_vitis-ai-library + CONFIG_vitis-ai-library-dev + CONFIG_vitis-ai-library-dbg + ``` + + 之后,运行 `petalinux-config -c rootfs` ,找到vitis-ai-library并选中启用。 + + ```shell + Select user packages ---> + Select [*] vitis-ai-library + ``` + +6. 生成EXT4 rootfs:输入 `petalinux-config` ,选择root的文件系统为EXT4;同时将 `mmcblk0p2` 改为 `mmcblk1p2` ,因为AXU2CGB开发板SD1才为外接SD卡。 + + ```shell + Image Packaging Configuration ---> + Root filesystem type (EXT4 (SD/eMMC/SATA/USB)) + (/dev/mmcblk1p2) Device node of SD device + ``` + +7. 修改设备树。进入到 `/project-spec/meta-user/recipes-bsp/device-tree/files` 目录下,修改 `system-user.dtsi` 为: + + ```dts + /include/ "system-conf.dtsi" + /{ + }; + /* SD */ + &sdhci1 { + disable-wp; + no-1-8-v; + }; + /* USB */ + &dwc3_0 { + status = "okay"; + dr_mode = "host"; + }; + ``` + +8. 编译。输入 `petalinux-build` 。编译完成后,终端如下图所示。 + + ![Build成功](images/3-build成功.png) + +9. 创建启动映像、rootfs等文件。 + + ```shell + cd images/linux + petalinux-package --boot --fsbl zynqmp_fsbl.elf --u-boot u-boot.elf --pmufw pmufw.elf --fpga system.bit --force + ``` + +10. 将生成的 `BOOT.BIN`、`boot.scr`、`image.ub` 复制到SD卡 `/boot` FAT32分区,第二个分区存放根文件系统,EXT4,将 `rootfs.tar.gz` 解压后放入该分区。 + +11. 设置开发板为SD模式启动,上电。 + +## 基于黑金提供的示例的流程梳理 + +### 环境 + +Vitis AI v1.2、docker(并确保用户位于docker用户组内)、petalinux sdk 2020.1,黑金提供的 `SD_card.img` + +参考:[UG1414](https://docs.xilinx.com/r/1.2-English/ug1414-vitis-ai) + +1. [Vitis AI v1.2](https://github.com/Xilinx/Vitis-AI/tree/v1.2) + + ```shell + git clone -b v1.2 --recurse-submodules https://github.com/Xilinx/Vitis-AI.git + ``` + +2. 拉取Vitis AI docker,注意指定版本 `1.2.82` + + ```shell + docker pull xilinx/vitis-ai:1.2.82 + ``` + +3. 安装交叉编译工具 Petalinux sdk 2020.1。 + + 1. 下载 [sdk-2020.1.0.0.sh](https://www.xilinx.com/bin/public/openDownload?filename=sdk-2020.1.0.0.sh),并安装。之后通过 `source` 可启动该环境 + + ```shell + bash ./sdk-2020.1.0.0.sh + + bash /environment-setup-aarch64-xilinx-linux + ``` + + 2. 下载 [vitis_ai_2020.1-r1.2.0.tar.gz](https://www.xilinx.com/bin/public/openDownload?filename=vitis_ai_2020.1-r1.2.0.tar.gz),解压并安装到SDK内。 + + ```shell + tar -xzvf vitis_ai_2020.1-r1.2.x.tar.gz -C ~/petalinux_sdk/sysroots/aarch64-xilinx-linux + ``` + + 在此之后,对于Vitis AI的应用,使用该交叉编译工具完成在x86_64主机上对aarch64程序的编译。 + + 3. 要验证交叉编译工具是否安装成功,可尝试编译Vitis AI Library内的应用程序,例如: + + ```shell + cd ./Vitis-AI/Vitis-AI-Library/overview/demo/yolov3 + bash -x build.sh + ``` + + 如果没有报错,则成功。 + +### 应用示例 + +1. 模型下载。Vitis AI应用使用的模型,需要经过量化、结合DPU硬件规格信息( `.json`、`.dcf` )后编译生成 `.elf` 模型文件以供应用调用DPU进行加速。 + + 以使用 [tf_resnetv1_50_imagenet_224_224_6.97G_1.2](https://www.xilinx.com/bin/public/openDownload?filename=tf_resnetv1_50_imagenet_224_224_6.97G_1.2.zip) 模型、运行 `Vitis-AI/VART/samples/resnet50/src/main.cc` 图片分类任务为例。模型文件参考 `./Vitis-AI/AI-Model-Zoo/README.md` 内的说明,该模型对应编号47。 + + 下载后,目录 `tf_resnetv1_50_imagenet_224_224_6.97G_1.2/quantized` 下 `deploy_model.pb` 则为量化后的模型。 + +2. 模型编译。 + + 1. 准备DPU的架构文件。将黑金的 `AXU2CGB_DPU_B1152` 文件夹复制到 `Vitis-AI/AI-Model-Zoo` 目录下(或者任意位置),其中含有 `.json` 与 `.dcf`。并修改 `AXU2CGB_DPU_B1152.json` 文件中的dcf路径: + + ```json + { + "target" : "DPUCZDX8G", + "dcf" : "./AI-Model-Zoo/AXU2CGB_DPU_B1152/AXU2CGB_DPU_B1152.dcf", + "cpu_arch" : "arm64" + } + ``` + + 2. 在Vitis AI目录下,进入docker环境 + + ```shell + ./docker_run.sh xilinx/vitis-ai:1.2.82 + ``` + + 在docker内,激活conda环境,并编译模型,指定量化后的模型 `.pb`、模型名称、DPU架构文件 + + ```shell + $ conda activate vitis-ai-tensorflow + $ vai_c_tensorflow --frozen_pb ./AI-Model-Zoo/tf_resnetv1_50_imagenet_224_224_6.97G_1.2/quantized/deploy_model.pb --net_name resnet50_tf --arch ./AI-Model-Zoo/AXU2CGB_DPU_B1152/AXU2CGB_DPU_B1152.json --output_dir ./compiled_model --quant_info + ``` + + 3. 终端将输出: + + ```bash + ************************************************** + * VITIS_AI Compilation - Xilinx Inc. + ************************************************** + [VAI_C][Warning] layer [resnet_v1_50_SpatialSqueeze] (type: Squeeze) is not supported in DPU, deploy it in CPU instead. + [VAI_C][Warning] layer [resnet_v1_50_predictions_Softmax] (type: Softmax) is not supported in DPU, deploy it in CPU instead. + + Kernel topology "resnet50_tf_kernel_graph.jpg" for network "resnet50_tf" + kernel list info for network "resnet50_tf" + Kernel ID : Name + 0 : resnet50_tf_0 + 1 : resnet50_tf_1 + + Kernel Name : resnet50_tf_0 + -------------------------------------------------------------------------------- + Kernel Type : DPUKernel + Code Size : 1.00MB + Param Size : 24.35MB + Workload MACs : 6964.51MOPS + IO Memory Space : 2.25MB + Mean Value : 0, 0, 0, + Total Tensor Count : 59 + Boundary Input Tensor(s) (H*W*C) + input:0(0) : 224*224*3 + + Boundary Output Tensor(s) (H*W*C) + resnet_v1_50_logits_Conv2D:0(0) : 1*1*1000 + + Total Node Count : 58 + Input Node(s) (H*W*C) + resnet_v1_50_conv1_Conv2D(0) : 224*224*3 + + Output Node(s) (H*W*C) + resnet_v1_50_logits_Conv2D(0) : 1*1*1000 + + Kernel Name : resnet50_tf_1 + -------------------------------------------------------------------------------- + Kernel Type : CPUKernel + Boundary Input Tensor(s) (H*W*C) + resnet_v1_50_SpatialSqueeze:0(0) : 1*1*1000 + + Boundary Output Tensor(s) (H*W*C) + resnet_v1_50_predictions_Softmax:0(0) : 1*1*1000 + + Input Node(s) (H*W*C) + resnet_v1_50_SpatialSqueeze : 1*1*1000 + + Output Node(s) (H*W*C) + resnet_v1_50_predictions_Softmax : 1*1*1000 + ``` + + 4. 在 `compiled_model` 目录下,有 `.gv` 与 `.elf` 文件,其中 `dpu_resnet50_tf_0.elf` 为编译后的模型,该文件将在应用程序中调用。 + +3. 应用程序编译。启动交叉编译工具链,进入 `./Vitis-AI/VART/samples/resnet50` 目录: + + ```shell + bash /environment-setup-aarch64-xilinx-linux + cd Vitis-AI/VART/samples/resnet50 + bash -x build.sh + ``` + + 最终生成可执行文件 `resnet50` 。 + +### 上机 + +1. 使用Balena Etcher等SD卡烧录工具,将黑金提供的 `SD_card.img` 映像烧录至SD卡。之后,SD卡会有 `boot` 分区(FAT32)与 `rootfs` 分区(EXT4)。对于EXT4分区,其大小可以根据应用的资源需要,选择扩容。 + +2. 资源准备。 + + 1. 将VART rpm包提前复制到SD卡rootfs分区内。(下述文件准备可使用scp)下载 [vitis-ai-runtime-1.2.x.tar.gz](https://www.xilinx.com/bin/public/openDownload?filename=vitis-ai-runtime-1.2.0.tar.gz) + + 参考文档:[ug1354 v1.2/Installing-AI-Library-Package](https://docs.xilinx.com/r/1.2-English/ug1354-xilinx-ai-sdkStep-3-Installing-AI-Library-Package) + + ```shell + tar -xzvf vitis-ai-runtime-1.2.x.tar.gz + sudo cp -r vitis-ai-runtime-1.2.x/aarch64/centos /home/root/ + ``` + + 2. 下载 [vitis_ai_runtime_r1.2.0_image_video.tar.gz](https://www.xilinx.com/bin/public/openDownload?filename=vitis_ai_runtime_r1.2.0_image_video.tar.gz) ,解压后为一个 `samples` 目录,我们的应用示例仅会用到 `images` 目录下的图片,但需要保持这个目录层级(可在应用程序的相应部分修改、重新编译)。新建 `samples/resnet50` 目录,并将可执行文件 `resnet50` 与 `word.txt` 均复制到该目录下。同时将 `dpu_resnet50_tf_0.elf` 模型文件复制到 `samples` 目录下。最后将 `samples` 复制到rootfs分区内。 + + ```shell + sudo cp -r samples /home/root/Vitis-AI/VART/ + ``` + +3. 将开发板调成SD启动模式,插入SD卡,启动。 + +4. 安装VART。 + + ```shell + cd ~/centos + rpm -ivh --force libunilog-1.2.0-r10.aarch64.rpm + rpm -ivh --force libxir-1.2.0-r12.aarch64.rpm + rpm -ivh --force libtarget-factory-1.2.0-r10.aarch64.rpm + rpm -ivh --force libvart-1.2.0-r16.aarch64.rpm + ``` + +5. 修改 `vart.conf`。安装完VART后,`/etc/vart.conf` 文件将指向 `dpu.xclbin`,需修改路径,使其能找到 `dpu.xclbin`。 + + ```shell + echo "firmware: /media/sd-mmcblk1p1/dpu.xclbin" > /etc/vart.conf + ``` + + *注意是 `sd-mmcblk1p1` + +6. 运行程序,并指定 `.elf` 模型文件的路径,程序将自动读取位于 `../images` 下的图片,并根据 `./words.txt` 的类别给出分类结果。 + + ````shell + cd ~/Vitis-AI/VART/samples/resnet50 + ./resnet50 /dpu_resnet50_tf_0.elf + ```` + + 例如: + + ```bash + root@zynqmp-common-2020_1:~/Vitis-AI/samples/resnet50# ./resnet50 ../dpu_resnet50_tf_0.elf + WARNING: Logging before InitGoogleLogging() is written to STDERR + I0528 14:11:07.966706 713 main.cc:288] create running for subgraph: resnet50_tf_0 + [ 73.177126] [drm] Pid 713 opened device + [ 73.180999] [drm] Pid 713 closed device + [ 73.184937] [drm] Pid 713 opened device + [ 73.188780] [drm] Pid 713 closed device + [ 73.267527] [drm] Pid 713 opened device + [ 73.271410] [drm] Pid 713 closed device + [ 73.281589] [drm] Pid 713 opened device + [ 73.285520] [drm] Pid 713 closed device + [ 73.330933] [drm] Pid 713 opened device + [ 73.334880] [drm] Finding IP_LAYOUT section header + [ 73.334882] [drm] Section IP_LAYOUT details: + [ 73.339673] [drm] offset = 0x54fd18 + [ 73.343938] [drm] size = 0x58 + [ 73.347599] [drm] Finding DEBUG_IP_LAYOUT section header + [ 73.350734] [drm] AXLF section DEBUG_IP_LAYOUT header not found + [ 73.356039] [drm] Finding CONNECTIVITY section header + [ 73.361948] [drm] Section CONNECTIVITY details: + [ 73.366992] [drm] offset = 0x54fd70 + [ 73.371513] [drm] size = 0x7c + [ 73.375174] [drm] Finding MEM_TOPOLOGY section header + [ 73.378310] [drm] Section MEM_TOPOLOGY details: + [ 73.383354] [drm] offset = 0x54fbf8 + [ 73.387875] [drm] size = 0x120 + [ 73.391546] [drm] Download new XCLBIN C5C1F682-9C76-40F5-AA5C-7994437F69E9 done. + [ 73.394774] [drm] zocl_xclbin_read_axlf c5c1f682-9c76-40f5-aa5c-7994437f69e9 ret: 0. + [ 73.409898] [drm] -> Hold xclbin C5C1F682-9C76-40F5-AA5C-7994437F69E9, from ref=0 + [ 73.417637] [drm] <- Hold xclbin C5C1F682-9C76-40F5-AA5C-7994437F69E9, to ref=1 + [ 73.425160] [drm] No ERT scheduler on MPSoC, using KDS + [ 73.437648] [drm] scheduler config ert(0) + [ 73.437650] [drm] cus(1) + [ 73.441651] [drm] slots(16) + [ 73.444356] [drm] num_cu_masks(1) + [ 73.447317] [drm] cu_shift(16) + [ 73.450796] [drm] cu_base(0x80000000) + [ 73.454017] [drm] polling(0) + [ 73.457868] [drm] -> Release xclbin C5C1F682-9C76-40F5-AA5C-7994437F69E9, from ref=1 + [ 73.460911] [drm] now xclbin can be changed + [ 73.468656] [drm] <- Release xclbin C5C1F682-9C76-40F5-AA5C-7994437F69E9, to ref=0 + [ 73.473175] [drm] Pid 713 opened device + [ 73.484635] [drm] -> Hold xclbin C5C1F682-9C76-40F5-AA5C-7994437F69E9, from ref=0 + + Image : 001.jpg + top[0] prob = 0.493322 name = lampshade, lamp shade + top[1] prob = 0.110075 name = sea anemone, anemone + top[2] prob = 0.051996 name = wig + top[3] prob = 0.040494 name = coil, spiral, volute, whorl, helix + top[4] prob = 0.031537 name = vase + + (Classification of ResNet50:713): Gtk-[1;33mWARNING[0m **: [34m14:11:08.525[0m: cannot open display: + [ 73.484638] [drm] <- Hold xclbin C5C1F682-9C76-40F5-AA5C-7994437F69E9, to ref=1 + [ 73.694025] [drm] -> Release xclbin C5C1F682-9C76-40F5-AA5C-7994437F69E9, from ref=1 + [ 73.701329] [drm] now xclbin can be changed + [ 73.709066] [drm] <- Release xclbin C5C1F682-9C76-40F5-AA5C-7994437F69E9, to ref=0 + [ 73.713247] [drm] Pid 713 closed device + [ 73.724863] [drm] Pid 713 closed device + ``` + +### 项目遇到的问题 + +#### 基于Vitis AI v3.0流程系统映像无法启动 + +Vitis AI版本迭代快速。相较于之前的版本,新版在开发流程的难易程度、稳定性、适用性上更胜一筹。适用于Zynq UltraScale+ MPSoC开发板的最新版本为v3.0,相较于v1.2,新版对我们项目的好处在于: + +| v3.0 | v1.2 | +| :--------------------------------------: | :------------------------------: | +| 流程更新,部分流程更简单 | 流程较老,部分流程繁琐 | +| 参考文档更全面、易于概念的理解 | 参考文档较少,概念与新版存在差异 | +| 功能上,适配PyTorch、TF2等更为常用的框架 | 仅适配TF1、Caffe等框架 | +| 功能更加成熟,移植新版的适用性更佳 | 功能不全,移植老版的价值更低 | + +因此,我们首先基于Vitis AI v3.0展开研究与开发工作。基于Vitis Ai v3.0与Vivado 2022.2,实现了DPU硬件的配置与集成,并通过Petalinux将其编译至系统内,最终生成了系统映像。该部分工作参见上述已完成工作。 + +在部署至开发板并启动的过程中,我们遇到了严重的问题。这导致开发板无法正常启动,因此无法进行后续模型的部署与最终的应用推理。对此,我们对全开发流程进行了仔细的排查,包括: + +1. Vivado 2022.2下,开发板硬件(包括DPU)的配置与集成; +2. 使用Petalinux 2022.2,配置系统时的设置; +3. 最终生成的系统映像,其启动文件、设备树等设置是否正确; + +同时,我们将错误复现及一些有用的日志文件等信息记录了下来,将问题反馈至开发板厂商,同时在赛灵思官方论坛上进行求助,该问题参见:[Stuck when booting Petalinux image from SD card with custom zynqmp board and DPU IP in Vivado Integration](https://support.xilinx.com/s/feed/0D54U00007K4jRMSAZ?language=en_US) + +受限于项目的时间规划,由于对该问题的解决无明显进展,因此,我们最终放弃了基于Vitis AI v3.0的开发方案。转向使用参考资料较多的Vitis AI v1.2版本进行开发与移植。 + +#### VART/DNNDK源代码及版本差异 + +鉴于使用v3.0可以实现DPU硬件集成,但开发板无法启动。因此,我们使用黑金提供的基于Vitis AI v1.2可用的系统映像,其内已集成了特定配置的DPU硬件,因此我们可以简化研究步骤,在此基础上直接研究VART运行时等开发流程与架构之间的联系。 + +在我们需要获取与DPU关联的软件运行时时,我们发现赛灵思在[Vitis AI v1.2仓库](https://github.com/Xilinx/Vitis-AI/tree/1.2)下未开源其VART源代码,因此我们无法获取该部分源代码,并整合至RT Smart内,从而编译成最终系统。 + +解决方案:尝试切换至[v1.3版本](https://github.com/Xilinx/Vitis-AI/tree/v1.3/tools/Vitis-AI-Runtime/VART),VART v1.3版本包含如下4个模块: + +1. unilog +2. xir +3. target_factory +4. vart + +模块间耦合较多,相互存在依赖关系,因此难以拆分。若拆分需要修改大多数源代码,成本较高,且编译阶段会出现难以预测的问题。将v1.3与v1.2版本的开发流程进行比较,两版本的VART所承担的功能差异不明显,因此可尝试在黑金提供的v1.2系统映像中,将应用程序用v1.3版本的VART进行交叉编译并运行。 + +尝试该方案,原运行在v1.2运行时上的应用程序亦可在v1.3版本运行时上正常推理。因此,可以采用VART v1.3版本的源代码进行移植工作。 + +#### 移植DPU驱动至RT Smart + +待确定移植的VART版本后,我们对源码进行了分析。发现其依赖的库多且复杂,具体存在如下几方面的困难阻碍我们将其移植到RT Smart: + +1. 模块间耦合较多,相互存在依赖关系。例如,编译 `vart` ,依赖 `unilog` 、 `target_factory` ; +2. 依赖第三方库,例如 `unilog` 内部依赖 `glog` ,`target_fatory` 依赖 `protobuf` 等; +3. RT Smart使用的交叉编译工具链aarch64-linux-musleabi_for_x86_64-pc-linux-gnu缺少部分头文件,需要额外找齐; +4. RT Smart使用scons组织目录与管理编译,而VART内使用CMake管理编译,难以用scons完全替代。 + +限于时间规划,放弃完整移植VART。转而仅移植DPU驱动相关的源代码,若能将该部分代码使用aarch64-linux-musleabi_for_x86_64-pc-linux-gnu编译成动态库,后续即可加载至RT Smart系统内,并实现对DPU硬件的控制。 + +经分析,v1.3版本的Vitis AI,依旧使用DNNDK(深度神经网络开发套件)运行时框架驱动DPU。在DNNDK之上,是C++/Python的编程接口,其内部负责DPU的加载、调度、追踪等。其中,与DPU驱动相关的源代码包括:`dpucore.c`、`dpucore.h`、`dpudef.h`,具体参见链接:[DNNDK/driver](https://github.com/EmbeddedCamerata/Vitis-AI/tree/v1.3%2B/tools/Vitis-AI-Runtime/DNNDK/driver)。 + +#### DPU驱动头文件 + +上述与DPU驱动相关的源代码所依赖的头文件大多是Linux下的头文件,无法直接使用RT Smart所使用的aarch64-linux-musleabi_for_x86_64-pc-linux-gnu交叉编译工具进行编译。头文件依赖主要在 `dpucore.h` 中。下表总结了大部分依赖的头文件及其功能描述。 + +| 头文件 | 使用举例 | 功能描述 | +| :--------------------------: | :-----------------------------------------------: | :--------------------------: | +| linux/mutex.h | semaphore | 信号相关 | +| asm/\* | atomic_t | 多线程原子锁 | +| linux/mm.h->linux/mm_types.h | vm_area_struct | 虚拟地址空间区域相关 | +| linux/platform_device.h | platform_device, struct platform_driver | 设备资源信息相关 | +| linux/interrupt.h | irqreturn_t, irq_handler_t | 中断相关 | +| linux/of.h | of_find_compatible_node(), of_get_property(), etc | `of_` 开头的函数均在此头文件 | +| linux/list.h | list_head | 链表 | +| linux/dma-mapping.h | dma_addr_t, phys_addr_t | DMA | +| linux/wait.h | wait_queue_head_t, wake_up_interruptible() | 内核等待队列相关 | +| linux/fs.h | inode, file, struct file_operations | 字符驱动设备相关 | +| / | char __user | 特殊宏 | +| asm/io.h | iowrite32(), ioremap() | 内核读写寄存器相关 | +| linux/delay.h | udelay() | 内核级延迟 | +| linux/miscdevice.h | struct miscdevice, misc_register() | 注册字符设备驱动程序 | + +一种可行的解决方案是寻求用musl libc库,或者Posix的头文件进行替代,并尝试编译,但这其中涉及较为底层的实现。 diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/SConscript b/RT-AK/rt_ai_tools/platforms/drivers/dpu/SConscript new file mode 100644 index 00000000..a81e49c2 --- /dev/null +++ b/RT-AK/rt_ai_tools/platforms/drivers/dpu/SConscript @@ -0,0 +1,14 @@ +# RT-Thread building script for component + +from building import * + +cwd = GetCurrentDir() +CPPPATH = [cwd] + +src = Split("""dpucore.c""") + +group = DefineGroup('dpu', src, depend = [''], CPPPATH = CPPPATH) + +objs = [group] + +Return('objs') diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/dpucore.c b/RT-AK/rt_ai_tools/platforms/drivers/dpu/dpucore.c new file mode 100644 index 00000000..db7b2e05 --- /dev/null +++ b/RT-AK/rt_ai_tools/platforms/drivers/dpu/dpucore.c @@ -0,0 +1,954 @@ +/* + * Copyright (C) 2019 Xilinx, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + */ + +#include "dpucore.h" + +#define DEVICE_NAME "dpu" + +typedef struct { + uint8_t size; + uint32_t *address; +}signature_handle_t; + +typedef struct { + uint8_t core_num; // the numbers of DPU IPs + uint16_t regs_size; // register count in bytes for each DPU IP + void *address[MAX_CORE_NUM]; // the base address of earch DPU IP +}ip_dpu_handle_t; + +// Application functions +static unsigned long dpu_mem_alloc(uint32_t memsize); +static int dpu_mem_free(void *paddr); +static void dpu_regs_init(ioc_aol_init_t *data); +static int run_dpu(ioc_aol_run_t *prun); +static int run_softmax(ioc_aol_run_t *prun); +static void sync_mem_to_device(ioc_cache_ctrl_t *pmem); +static void sync_mem_from_device(ioc_cache_ctrl_t *pmem); +static void get_ips_device_handle(ioc_aol_device_handle_t *dev); +static int read_regs_32(void *address, uint32_t byte_len, uint32_t *output); + +// Lowlevel functions +static int mask2id(uint32_t mask); +static uint64_t get_kernel_time(void); +static uint32_t field_mask_value(uint32_t val, uint32_t mask); + +// Driver probe and remove +static int get_signature_base_addr(signature_handle_t *signature_handle); +static int get_dpu_cores_base_addr(uint32_t *signature_address, ip_dpu_handle_t *dpu_handle); +static int init_softmax(void *signature_address, struct device_node *pdpunode); +static int dpu_probe(struct platform_device *pdev); +static int dpu_remove(struct platform_device *pdev); +static void softmax_remove(void); + +// Device interrupt +irqreturn_t dpu_isr(int irq, void *data); +irqreturn_t softmax_isr(int irq, void *data); + +// Device operation +static long dpu_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +static int dpu_mmap(struct file *file, struct vm_area_struct *vma); +static ssize_t dpu_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos); +static ssize_t dpu_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos); +static int dpu_release(struct inode *inode, struct file *filp); +static int dpu_open(struct inode *inode, struct file *filp); + +// Driver entry +static int __init dpu_init(void); +static void __exit dpu_exit(void); + +static signature_handle_t g_signature_handle; +static ip_dpu_handle_t g_ip_dpu_handle; +static ioc_aol_device_handle_t g_aol_device_handle; + +//DPU signature base address +unsigned long signature_addr = SIG_BASE; + +uint accipmask = 0x0; +softmax_reg_t *gp_smfc_regs; + +#if defined(CACHE_OFF) +int cache = 0; // whether use cache; 0:no, 1:yes +#else +int cache = 1; // whether use cache; 0:no, 1:yes +#endif + +// the following parameters read from device tree +static int DPU_CORE_NUM; + +atomic_t g_ref_count; //< dpu device open count +static struct device *dev_handler; +dpu_intrrupt_data_t *gp_dpu_ip_data[IP_MAX_COUNT]; +dpu_intrrupt_data_t g_dpu_core_data[MAX_CORE_NUM]; +dpu_intrrupt_data_t g_smfc_core_data[1]; + +struct miscdevice g_misc_device_register; + +/*dpu registers*/ +DPUReg *pdpureg; + +struct list_head head_alloc; /*head of alloced memory block*/ + +struct semaphore memblk_lock; + +/** + * alloc a memory block from the available memory list. + * @memsize : size of memory + * + * RETURN: address of alloced memory; NULL returned if no enough space exists + */ +static unsigned long dpu_mem_alloc(uint32_t memsize) +{ + void *virtaddr; + dma_addr_t phy_addr; + struct memblk_node *pnewnode; + + memsize = (memsize + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); //at least one page frame + + virtaddr = dma_alloc_coherent(dev_handler, memsize, &phy_addr, GFP_KERNEL); + if (NULL != virtaddr) { + pnewnode = kmalloc(sizeof(struct memblk_node), GFP_KERNEL); + + if (pnewnode) { + pnewnode->virt_addr = (unsigned long)virtaddr; + pnewnode->size = memsize; + pnewnode->phy_addr = phy_addr; + + down(&memblk_lock); + list_add(&pnewnode->list, &head_alloc); + up(&memblk_lock); + } else { + dma_free_coherent(dev_handler, memsize, virtaddr, phy_addr); + phy_addr = 0; + dprint("kmalloc fail when adding memory node\n"); + } + return phy_addr; + } else { + return 0; + } +} + +/** + * Remove the memory block frome alloc list to the available + * memory list and merge with the neighbor node if necessary + * @paddr : address of memory block to be free + */ +static int dpu_mem_free(void *paddr) +{ + struct list_head *plist; + struct memblk_node *p; + + down(&memblk_lock); + + list_for_each (plist, &head_alloc) { + p = list_entry(plist, struct memblk_node, list); + if (p->phy_addr == (dma_addr_t)paddr) { + dma_free_coherent(dev_handler, p->size, (void *)p->virt_addr, p->phy_addr); + list_del(&p->list); + kfree(p); + up(&memblk_lock); + return 0; + } + } + up(&memblk_lock); + + dprint("free memory failed,address=0x%p\n", paddr); + + return -ENXIO; +} + +/** + * dpu registers initialize + * @channel: the dpu channel [0,DPU_CORE_NUM) need to be initialize, + * set all channel if the para is DPU_CORE_NUM + */ +static void dpu_regs_init(ioc_aol_init_t *data) +{ + int32_t i; + uint32_t index = 0; + uint32_t mask = data->core_mask; + uint8_t *base_addr; + uint8_t *p_regs; + + // IP name + for (i = 0; i < IP_MAX_COUNT; i++) { + if (data->ip_id == i) { + break; + } + index += g_aol_device_handle.core_count[i]; + } + if (i >= IP_MAX_COUNT) { + return; + } + + // core mask + for (i = 0; i < 32; i++) { + if (mask == 0) { // done + break; + } else if (mask & 0x01) { // Write regs + base_addr = (uint8_t *)g_aol_device_handle.core_phy_addr[index]; + p_regs = ioremap((phys_addr_t)base_addr, MAX_REG_SIZE); + for (i = 0; i < data->reg_count; i++) { + iowrite32(data->regs[i].value, p_regs + data->regs[i].offset); + if (data->regs_delay_us[i]) { + udelay(data->regs_delay_us[i]); + } + } + iounmap(p_regs); + } + + mask >>= 1; + index++; + } +} + +/** + * Run dpu function + * @prun : dpu run struct, contains the necessary address info + * + */ +static int run_dpu(ioc_aol_run_t *prun) +{ + int i, ret = 0; + uint8_t *pvalue; + int dpu_core = mask2id(prun->core_mask); + dpu_intrrupt_data_t *p_cur_core = &gp_dpu_ip_data[IP_ID_DPU][dpu_core]; + + if (dpu_core >= DPU_CORE_NUM) { + // should never get here + dprint("ERR_CORE_NUMBER %d!\n", dpu_core); + return -EINTR; + } + + down(&p_cur_core->dpu_lock); + prun->time_start = get_kernel_time(); + + pvalue = ((uint8_t *)pdpureg) + (dpu_core * 0x100); + for (i = 0; i < prun->reg_count; i++) { + iowrite32(prun->regs[i].value, pvalue + prun->regs[i].offset); + } + + iowrite32(0x1, &(pdpureg->ctlreg[dpu_core].start)); + + // wait for the dpu task to be finished + ret = wait_event_interruptible_timeout(p_cur_core->waitqueue, p_cur_core->irq_flag == TRUE, + prun->timeout * HZ); + p_cur_core->irq_flag = FALSE; + + prun->time_end = get_kernel_time(); + up(&p_cur_core->dpu_lock); + + if (ret == 0) { + dprint("[PID %d]Core %d Run timeout,failed to get finish interrupt!\n", current->pid, dpu_core); + } + + return ret > 0 ? 0 : (ret == 0 ? -ETIMEDOUT : ret); +} + +/** + * softmax calculation acceleration using softmax IP + * @para : softmax parameter structure + *· + * @return: 0 if successful; otherwise -errno + */ +static int run_softmax(ioc_aol_run_t *prun) +{ + int i; + int ret = 0; + dpu_intrrupt_data_t *p_cur_core = gp_dpu_ip_data[IP_ID_SOFTMAX]; + + down(&p_cur_core->dpu_lock); + + // write softmax parameters + for (i = 0; i < prun->reg_count; i++) { + iowrite32(prun->regs[i].value, ((uint8_t *)gp_smfc_regs) + prun->regs[i].offset); + } + + // start calculation + iowrite32(1, &gp_smfc_regs->start); + iowrite32(0, &gp_smfc_regs->start); + + ret = wait_event_interruptible_timeout(p_cur_core->waitqueue, p_cur_core->irq_flag == TRUE, + prun->timeout * HZ); + p_cur_core->irq_flag = FALSE; + + up(&p_cur_core->dpu_lock); + + if (ret == 0) { + dprint("softmax timeout!\n"); + } + + return ret > 0 ? 0 : (ret == 0 ? -ETIMEDOUT : ret); +} + +/** + * flush memory range to ensure content is flushed to RAM + * @pmem: memory fresh structure contains start virtual address and size + */ +static void sync_mem_to_device(ioc_cache_ctrl_t *pmem) +{ + dma_sync_single_for_device(dev_handler, pmem->addr_phy, pmem->size, DMA_BIDIRECTIONAL); +} + +/** + * invalid memory range to ensure following reading comes from RAM + * @pmem: memory fresh structure contains start virtual address and size + */ +static void sync_mem_from_device(ioc_cache_ctrl_t *pmem) +{ + dma_sync_single_for_cpu(dev_handler, pmem->addr_phy, pmem->size, DMA_BIDIRECTIONAL); +} + +static uint32_t field_mask_value(uint32_t val, uint32_t mask) +{ + int i; + int max_bit = sizeof(uint32_t) * 8; + int lowest_set_bit = max_bit - 1; + + /* Iterate through each bit of mask */ + for (i = 0; i < max_bit; i++) { + /* If current bit is set */ + if ((mask >> i) & 1) { + lowest_set_bit = i; + break; + } + } + + return (val & mask) >> lowest_set_bit; +}; + +static void get_ips_device_handle(ioc_aol_device_handle_t *dev) { + int i; + int counter = 0; + + // Get signature base address + dev->core_count[IP_ID_VER_REG] = 1; + dev->core_phy_addr[counter++] = (uint64_t)g_signature_handle.address; + + // Get DPU cores base address + dev->core_count[IP_ID_DPU] = g_ip_dpu_handle.core_num; + for (i = 0; i < dev->core_count[IP_ID_DPU]; i++) { + dev->core_phy_addr[counter++] = (uint64_t)g_ip_dpu_handle.address[i]; + } + + // Get Softmax base address + dev->core_count[IP_ID_SOFTMAX] = 1; + dev->core_phy_addr[counter++] = (uint64_t)DPU_EXT_SOFTMAX_BASE((unsigned long)g_signature_handle.address); +} + +/** + * 0, Success. -1, Invalid DPU core number + */ +static int get_dpu_cores_base_addr(uint32_t *signature_address, ip_dpu_handle_t *dpu_handle) { + int i; + uint32_t signature_field = 0; + + read_regs_32(signature_address + 3, 4, &signature_field); + dpu_handle->core_num = field_mask_value(signature_field, DPU_CORENUM_MASK); + dpu_handle->regs_size = DPU_SIZE; + + if ((dpu_handle->core_num == 0) || (dpu_handle->core_num > MAX_CORE_NUM)) { + dpr_init("Core number %d invalid!\n", DPU_CORE_NUM); + return -1; + } + + // Get each dpu base address + for (i = 0; i < dpu_handle->core_num; i++) { + dpu_handle->address[i] = (void *)DPU_BASE((unsigned long)signature_address); + } + + return 0; +} + +static int read_regs_32(void *address, uint32_t byte_len, uint32_t *output) { + uint32_t i; + uint32_t *p_regs; + + if (byte_len % 4 != 0) { + dpr_init("read_regs_32 only support uint32_t read \n"); + return -1; + } + + p_regs = ioremap((phys_addr_t)address, byte_len); + for (i = 0; i < (byte_len >> 2); i++) { + output[i] = ioread32(p_regs + i); + } + iounmap(p_regs); + + return 0; +} + +static uint64_t get_kernel_time(void) { + uint64_t time; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0) + time = ktime_get(); +#else + time = ktime_get().tv64; +#endif + return time; +} + +static int mask2id(uint32_t mask) { + int i; + uint32_t test = 1; + for (i = 0; i < 32; i++) { + if ((mask & test) != 0) { + break; + } + test <<= 1; + } + + return i; +} + +/** + * dpu open function + */ +static int dpu_open(struct inode *inode, struct file *filp) +{ + if (atomic_read(&g_ref_count) == 0) { + } + + atomic_inc(&g_ref_count); + + return 0; +} + +/** + * dpu close function + * */ +static int dpu_release(struct inode *inode, struct file *filp) +{ + struct list_head *plist, *nlist; + struct memblk_node *p; + + if (atomic_dec_and_test(&g_ref_count)) { + down(&memblk_lock); + list_for_each_safe (plist, nlist, &head_alloc) { + p = list_entry(plist, struct memblk_node, list); + dma_free_coherent(dev_handler, p->size, (void *)p->virt_addr, p->phy_addr); + list_del(&p->list); + kfree(p); + } + INIT_LIST_HEAD(&head_alloc); + up(&memblk_lock); + } + + return 0; +} + +static ssize_t dpu_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t dpu_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) +{ + return 0; +} + +/** + * dpu mmap function + */ +static int dpu_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + + if (!cache) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, vma->vm_page_prot)) { + return -EAGAIN; + } + return 0; +} + +/** + * dpu ioctl function + */ +static long dpu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int ret = 0; + + switch (cmd) { + case DPU_IOCTL_MEM_ALLOC: { // memory alloc + struct ioc_mem_alloc_t t; + if (copy_from_user(&t, (void *)arg, sizeof(struct ioc_mem_alloc_t))) { + return -EINVAL; + } + if (t.size == 0) + return -EINVAL; + t.addr_phy = dpu_mem_alloc(t.size); + if (t.addr_phy == 0) + return -ENOMEM; + if (copy_to_user((void *)arg, &t, sizeof(struct ioc_mem_alloc_t))) + return -EINVAL; + break; + } + case DPU_IOCTL_MEM_FREE: { // memory free + struct ioc_mem_free_t t; + if (copy_from_user(&t, (void *)arg, sizeof(struct ioc_mem_free_t))) { + return -EINVAL; + } + ret = dpu_mem_free((void *)t.addr_phy); + + break; + } + case DPU_IOCTL_RUN: { // run dpu + ioc_aol_run_t t; + if (copy_from_user(&t, (void *)arg, sizeof(ioc_aol_run_t))) { + return -EINVAL; + } + + if (t.ip_id == IP_ID_DPU) { + ret = run_dpu(&t); + } else if (t.ip_id == IP_ID_SOFTMAX) { + ret = run_softmax(&t); + } else { + return -EINVAL; + } + + if (copy_to_user((void *)arg, &t, sizeof(ioc_aol_run_t))) + return -EINVAL; + + break; + } + case DPU_IOCTL_GET_DEV_HANDLE: { + get_ips_device_handle(&g_aol_device_handle); + if (copy_to_user((void *)arg, &g_aol_device_handle, sizeof(ioc_aol_device_handle_t))) { + return -EINVAL; + } + + break; + } + case DPU_IOCTL_INIT: { // reset dpu + ioc_aol_init_t t; + if (copy_from_user(&t, (void *)arg, sizeof(ioc_aol_init_t))) { + return -EINVAL; + } + dpu_regs_init(&t); + break; + } + case DPU_IOCTL_SYNC_TO_DEV: { // flush cache range by physical address + ioc_cache_ctrl_t t; + if (copy_from_user(&t, (void *)arg, sizeof(ioc_cache_ctrl_t))) + return -EINVAL; + sync_mem_to_device(&t); + break; + } + case DPU_IOCTL_SYNC_FROM_DEV: { // invalidate cache range by physical address + ioc_cache_ctrl_t t; + if (copy_from_user(&t, (void *)arg, sizeof(ioc_cache_ctrl_t))) + return -EINVAL; + sync_mem_from_device(&t); + break; + } + case DPU_IOCTL_READ_REGS: { // dpu capabilities + ioc_aol_read_regs_t t; + if (copy_from_user(&t, (void *)arg, 16)) { + return -EINVAL; + } + if ((t.byte_size >> 2) >= READ_REG_DEFAULT_BUF_LEN) { + dpr_init("Read regs size %d is overflow, shoule no more than %d\n", t.byte_size, READ_REG_DEFAULT_BUF_LEN); + return -EINVAL; + } + if (read_regs_32((void *)t.phy_address, t.byte_size, t.out_buffer) != 0) { + return -EINVAL; + } + if (copy_to_user((void *)arg, &t, sizeof(ioc_aol_read_regs_t))) { + return -EINVAL; + } + break; + } + default: { + ret = -EPERM; + break; + } + } + + return ret; +} + +/*dpu file operation define */ +static struct file_operations dev_fops = { + + .owner = THIS_MODULE, + .unlocked_ioctl = dpu_ioctl, + .open = dpu_open, + .release = dpu_release, + .read = dpu_read, + .write = dpu_write, + .mmap = dpu_mmap, +}; + +/** + * dpu interrupt service routine + * when a task finished, dpu will generate a interrupt, + * we can look up the IRQ No. to determine the channel + */ +irqreturn_t dpu_isr(int irq, void *data) +{ + int i = 0; + dpu_intrrupt_data_t *p_cur_core = gp_dpu_ip_data[IP_ID_DPU]; + + // Determine which channel generated the interrupt + for (i = 0; i < DPU_CORE_NUM; i++) { + if (irq == p_cur_core[i].irq_no) { + // clear the interrupt + iowrite32(0, &pdpureg->ctlreg[i].prof_en); + iowrite32(0, &pdpureg->ctlreg[i].start); + iowrite32((1 << i), &pdpureg->intreg.icr); + udelay(1); + iowrite32(0, &pdpureg->intreg.icr); + + // set the finish flag,record the time,and notify the waiting queue + p_cur_core[i].irq_flag = TRUE; + + wake_up_interruptible(&p_cur_core[i].waitqueue); + } + } + + return IRQ_HANDLED; +} + +/** + * dpu extension modules isr + * @irq : interrupt number + * @data : additional data + */ +irqreturn_t softmax_isr(int irq, void *data) +{ + dpu_intrrupt_data_t *p_cur_core = gp_dpu_ip_data[IP_ID_SOFTMAX]; + if (irq == p_cur_core->irq_no) { + if (accipmask & DPU_EXT_SOFTMAX) { + p_cur_core->irq_flag = TRUE; + // clear smfc interrupt + iowrite32(1, &gp_smfc_regs->clr); + iowrite32(0, &gp_smfc_regs->clr); + + wake_up_interruptible(&p_cur_core->waitqueue); + } + } + + return IRQ_HANDLED; +} + +static const char *dts_node_prefix[] = { + "xilinx,", + "xilinx, ", + "Xilinx,", + "Xilinx, ", + "deephi,", + "deephi, ", + "Deephi,", + "Deephi, ", +}; + +struct device_node *dpu_compatible_node(const char *compat) +{ + int idx=0, max=0; + char dst_node[255]; + struct device_node *pdpu_node = NULL; + + if (strlen(compat)>128) { + return NULL; + } + + max = sizeof(dts_node_prefix)/sizeof(char *); + for (idx=0; idxirq_no = irq_of_parse_and_map(node, 0); + if (p_cur_core->irq_no < 0) { + dpr_init("Softmax IRQ res not found!\n"); + return p_cur_core->irq_no; + } + ret = request_irq(p_cur_core->irq_no, (irq_handler_t)softmax_isr, 0, "dpu_smfc", NULL); + if (ret != 0) { + dpr_init("Request softmax IRQ %d failed!\n", p_cur_core->irq_no); + return ret; + } else { + dpr_init("Request softmax IRQ %d successful.", p_cur_core->irq_no); + } + + // map smfc register + reg_base = DPU_EXT_SOFTMAX_BASE((unsigned long)signature_address); + reg_size = DPU_EXT_SOFTMAX_SIZE; + gp_smfc_regs = (ioremap(reg_base, reg_size)); + if (!gp_smfc_regs) { + dpr_init("Map softmax registers error!\n"); + return -EINVAL; + } + + // Init Softmax data + init_waitqueue_head(&p_cur_core->waitqueue); + sema_init(&p_cur_core->dpu_lock, 1); + p_cur_core->irq_flag = FALSE; + + accipmask |= (softmax_valid ? DPU_EXT_SOFTMAX : 0); + dpr_init("Init softmax IP done\n"); + } + + return ret; +} + +/** + * 0, Success. -1, Invalid 'signature-addr' value + */ +static int get_signature_base_addr(signature_handle_t *signature_handle) +{ + const void *prop; + struct device_node *pdpu_node; + unsigned long signature_address = SIG_BASE; + unsigned long base_addr_dtsi = 0; + uint32_t signature_field = 0; + + pdpu_node = dpu_compatible_node("dpu"); + if (!pdpu_node) { + dpr_init("Not found DPU device node!\n"); + return -1; + } + + prop = of_get_property(pdpu_node, "base-addr", NULL); + if (prop) { + base_addr_dtsi = of_read_ulong(prop, 1); + } + if (base_addr_dtsi) { + dpr_init("Found DPU signature addr = 0x%lx in device-tree\n", base_addr_dtsi); + signature_address = base_addr_dtsi + 0x00F00000; + } + + if (signature_address == SIG_BASE_NULL) { + dpr_init("Signature address is NULL, please check.\n"); + return -1; + } + + dpr_init("Checking DPU signature at addr = 0x%lx, \n", signature_address); + read_regs_32((void *)signature_address, 4, &signature_field); + if ((signature_field & SIG_MAGIC_MASK) == SIG_MAGIC) { + signature_handle->size = field_mask_value(signature_field, SIG_SIZE_MASK); + signature_handle->address = (void *)signature_address; + } else { + signature_handle->size = 0; + signature_handle->address = 0; + dpr_init("Invalid 'signature-addr' value specified in DPU device tree, please check.\n"); + return -1; + } + + return 0; +} + +/** + * Platform probe method for the dpu driver + * @pdev: Pointer to the platform_device structure + * + * This function initializes the driver data structures and the hardware. + * + * @return: 0 on success and error value on failure + */ +static int dpu_probe(struct platform_device *pdev) +{ + int ret, i; + struct device_node *pdpu_node, *dpucore_node; + uint32_t signature_length = 0; + uint32_t signature_field = 0; + uint32_t *signature_va; + dpu_intrrupt_data_t *p_cur_core; + + // init gp_dpu_ip_data + gp_dpu_ip_data[IP_ID_DPU] = g_dpu_core_data; + gp_dpu_ip_data[IP_ID_SOFTMAX] = g_smfc_core_data; + gp_dpu_ip_data[IP_ID_FULLCONNECT] = g_smfc_core_data; + + p_cur_core = gp_dpu_ip_data[IP_ID_DPU]; + dev_handler = &(pdev->dev); + dpucore_node = dpu_compatible_node("dpucore"); + + pdpu_node = dpu_compatible_node("dpu"); + if (!pdpu_node) { + dpr_init("Not found DPU device node!\n"); + return -ENXIO; + } + + ret = get_signature_base_addr(&g_signature_handle); + if (ret != 0) { + return -ENXIO; + } + + ret = get_dpu_cores_base_addr(g_signature_handle.address, &g_ip_dpu_handle); + if (ret != 0) { + return -EINVAL; + } + + signature_length = g_signature_handle.size; + signature_va = ioremap((phys_addr_t)g_signature_handle.address, + signature_length * sizeof(signature_field)); + + // offset 3 + read_regs_32(((uint32_t *)g_signature_handle.address) + 3, 4, &signature_field); + DPU_CORE_NUM = field_mask_value(signature_field, DPU_CORENUM_MASK); + + // map the dpu Register, all DPUs share the same registers area + pdpureg = (DPUReg *)ioremap((phys_addr_t)g_ip_dpu_handle.address[0], g_ip_dpu_handle.regs_size); + if (!pdpureg) { + dpr_init("Map DPU registers error!\n"); + return -ENXIO; + } + + // init DPU data + for (i = 0; i < DPU_CORE_NUM; i++) { + init_waitqueue_head(&p_cur_core[i].waitqueue); + sema_init(&p_cur_core[i].dpu_lock, 1); + p_cur_core[i].irq_flag = FALSE; + } + + // memory structure init + sema_init(&memblk_lock, 1); + INIT_LIST_HEAD(&head_alloc); + + // register interrupt service routine for DPU + for (i = 0; i < DPU_CORE_NUM; i++) { + p_cur_core[i].irq_no = dpucore_node? irq_of_parse_and_map(dpucore_node, i): platform_get_irq(pdev, i); + + if (p_cur_core[i].irq_no < 0) { + dprint("IRQ resource not found for DPU core %d\n", i); + return p_cur_core[i].irq_no; + } + + ret = request_irq(p_cur_core[i].irq_no, (irq_handler_t)dpu_isr, 0, "dpu_isr", NULL); + if (ret != 0) { + dpr_init("Request IRQ %d failed!\n", p_cur_core[i].irq_no); + return ret; + } else { + } + } + + // Reset DPU + iowrite32(0, &pdpureg->pmu.reset); + udelay(1); // wait 1us + iowrite32(0xFFFFFFFF, &pdpureg->pmu.reset); + iowrite32(0xFF, &pdpureg->intreg.icr); + udelay(1); // wait 1us + iowrite32(0, &pdpureg->intreg.icr); + + // initialize extent modules + init_softmax(g_signature_handle.address, pdpu_node); + + // Register the dpu device + g_misc_device_register.name = DEVICE_NAME; + g_misc_device_register.minor = MISC_DYNAMIC_MINOR; + g_misc_device_register.fops = &dev_fops; + g_misc_device_register.mode = S_IWUGO | S_IRUGO; + return misc_register(&g_misc_device_register); +} + +static void softmax_remove(void) +{ + dpu_intrrupt_data_t *p_cur_core = gp_dpu_ip_data[IP_ID_SOFTMAX]; + if (accipmask & DPU_EXT_SOFTMAX) { + // clean smfc moudle + iounmap(gp_smfc_regs); + free_irq(p_cur_core->irq_no, NULL); + } +} + +/** + * Platform remove method for the dpu driver + * @pdev: Pointer to the platform_device structure + * + * This function is called if a device is physically removed from the system or + * if the driver module is being unloaded. It frees all resources allocated to + * the device. + * + * @return: 0 on success and error value on failure + */ +static int dpu_remove(struct platform_device *pdev) +{ + int i; + + // remove dpu + dpu_intrrupt_data_t *p_cur_core = gp_dpu_ip_data[IP_ID_DPU]; + misc_deregister(&g_misc_device_register); + + for (i = 0; i < DPU_CORE_NUM; i++) + free_irq(p_cur_core[i].irq_no, NULL); + iounmap(pdpureg); + + // remove extend mdoules + softmax_remove(); + + return 0; +} + +static const struct of_device_id dpu_dt_ids[] = { { .compatible = "deephi, dpu" }, + { .compatible = "deephi,dpu" }, + { .compatible = "xilinx, dpu" }, + { .compatible = "xilinx,dpu" }, + { /* end of table */ } }; + +static struct platform_driver dpu_drv = { + .driver = { + .name = "dpu", + .of_match_table = dpu_dt_ids, + }, + .probe = dpu_probe, + .remove = dpu_remove, +}; + +/** + * dpu initialize function + */ +static int __init dpu_init(void) +{ + return platform_driver_register(&dpu_drv); +} + +/** + * dpu uninstall function + */ +static void __exit dpu_exit(void) +{ + platform_driver_unregister(&dpu_drv); +} + +//---------------------------------------------------------- +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Xilinx"); +module_init(dpu_init); +module_exit(dpu_exit); diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/dpucore.h b/RT-AK/rt_ai_tools/platforms/drivers/dpu/dpucore.h new file mode 100644 index 00000000..1cd7ce3d --- /dev/null +++ b/RT-AK/rt_ai_tools/platforms/drivers/dpu/dpucore.h @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2019 Xilinx, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + */ + +#ifndef _DPUCORE_H_ +#define _DPUCORE_H_ + +#include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include +#include +#include +// #include +// #include +// #include +#include +#include +// #include +#include +// #include +// #include +#include +// #include +// #include +#include +// #include +// #include +// #include +// #include +// #include +// #include +#include + + + +#include "dpudef.h" + +#define DPU_DRIVER_VERSION "4.0.0" + +#define DPU_EXT_HDMI (1 << 1) +#define DPU_EXT_BT1120 (1 << 2) +#define DPU_EXT_FULLCONNECT (1 << 3) +#define DPU_EXT_SOFTMAX (1 << 4) +#define DPU_EXT_RESIZE (1 << 5) + +#define SIG_BASE_NULL 0X00000000 +#ifdef SIG_BASE_ADDR +#define SIG_BASE SIG_BASE_ADDR +#else +#define SIG_BASE SIG_BASE_NULL +#endif + +#define SIG_BASE_MASK 0XFF000000 +#define DPU_BASE(signature) (((signature) & SIG_BASE_MASK) + 0x0000) +#define DPU_SIZE 0X00000700 +#define DPU_EXT_SOFTMAX_BASE(signature) (((signature) & SIG_BASE_MASK) + 0x0700) +#define DPU_EXT_SOFTMAX_SIZE 0X00000041 +#define MAX_REG_SIZE 0X00001000 + +/*dpu signature magic number*/ +#define SIG_MAGIC 0X4450 + +#define SIG_SIZE_MASK 0XFF000000 +#define SIG_VER_MASK 0X00FF0000 +#define SIG_MAGIC_MASK 0X0000FFFF + +#define DPU_CORENUM_MASK 0X0000000F +#define SOFTMAX_VLD_MASK 0X01000000 + +#define FALSE 0 +#define TRUE 1 + +#define dprint(fmt, args...) \ + do { \ + printk(KERN_ERR "[DPU][%d]" fmt, current->pid, \ + ##args); \ + } while (0) + +#define dpr_init(fmt, args...) pr_alert("[DPU][%d]" fmt, current->pid, ##args); + +/*dpu registers*/ +#define MAX_CORE_NUM 4 +typedef struct __DPUReg { + /*dpu pmu registers*/ + struct __regs_dpu_pmu { + volatile uint32_t version; + volatile uint32_t reset; + volatile uint32_t _rsv[62]; + } pmu; + + /*dpu rgbout registers*/ + struct __regs_dpu_rgbout { + volatile uint32_t display; + volatile uint32_t _rsv[63]; + } rgbout; + + /*dpu control registers struct*/ + struct __regs_dpu_ctrl { + volatile uint32_t hp_ctrl; + volatile uint32_t addr_io; + volatile uint32_t addr_weight; + volatile uint32_t addr_code; + volatile uint32_t addr_prof; + volatile uint32_t prof_value; + volatile uint32_t prof_num; + volatile uint32_t prof_en; + volatile uint32_t start; + volatile uint32_t com_addr[16]; //< extension for DPUv1.3.0 + volatile uint32_t _rsv[39]; + + } ctlreg[MAX_CORE_NUM]; + + /*dpu interrupt registers struct*/ + struct __regs_dpu_intr { + volatile uint32_t isr; + volatile uint32_t imr; + volatile uint32_t irsr; + volatile uint32_t icr; + volatile uint32_t _rsv[60]; + + } intreg; + +} DPUReg; + +typedef struct { + volatile uint32_t done; //< 0x000 command done reg (1:done,0:not) + volatile uint32_t sm_len_x; //< 0x004 vector length(unit:float) + volatile uint32_t sm_len_y; //< 0x008 vector count + volatile uint32_t src; //< 0x00c source address, require 256 byte alignment + volatile uint32_t dst; //< 0x010 destination address, require 256 byte alignment + volatile uint32_t scale; //< 0x014 fix point + volatile uint32_t sm_offset; //< 0x018 offset + volatile uint32_t clr; //< 0x01c clear interrupt reg (1:clear,0:not) + volatile uint32_t start; //< 0x020 start reg: valid on rising_edge, + volatile uint32_t fc_input_channel; //< 0x024 fc input channel, maxinum 4096B + volatile uint32_t fc_output_channel; //< 0x028 fc output channel,maxinum 4096B + volatile uint32_t fc_batch; //< 0x02c fc batch, + volatile uint32_t fc_weight_start; //< 0x030 fc weight and bias start addr, 256B alignment + volatile uint32_t fc_weight_end; //< 0x034 fc weight and bias end addr, 256B alignment + volatile uint32_t calc_mod; //< 0x038 0: softmax; 1: fc + volatile uint32_t dst_addr_sel; //< 0x03c fix to 1: ddr, + volatile uint32_t fc_relu_en; //< 0x040 fc relu, +} softmax_reg_t; + +typedef struct { + wait_queue_head_t waitqueue; + struct semaphore dpu_lock; + int irq_no; + int irq_flag; +}dpu_intrrupt_data_t; + +/*memory block node struct*/ +struct memblk_node { + unsigned long size; + unsigned long virt_addr; + dma_addr_t phy_addr; + struct list_head list; +}; + +#endif /*_DPU_H_*/ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/dpudef.h b/RT-AK/rt_ai_tools/platforms/drivers/dpu/dpudef.h new file mode 100644 index 00000000..b16bdf25 --- /dev/null +++ b/RT-AK/rt_ai_tools/platforms/drivers/dpu/dpudef.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2019 Xilinx, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + */ + +#ifndef _DPU_DEF_H_ +#define _DPU_DEF_H_ + +#define DPU_IOCTL_MAGIC 'D' + +/* allocate DPU memory */ +#define DPU_IOCTL_MEM_ALLOC _IOWR(DPU_IOCTL_MAGIC, 1, struct ioc_mem_alloc_t *) +/* free DPU memory */ +#define DPU_IOCTL_MEM_FREE _IOWR(DPU_IOCTL_MAGIC, 2, struct ioc_mem_free_t *) +/* run DPU */ +#define DPU_IOCTL_RUN _IOWR(DPU_IOCTL_MAGIC, 3, ioc_aol_run_t *) +/* init dpu registers */ +#define DPU_IOCTL_INIT _IOWR(DPU_IOCTL_MAGIC, 4, ioc_aol_init_t *) +/* Memory accessible from the CPU, synchronized to memory that the device can access */ +#define DPU_IOCTL_SYNC_TO_DEV _IOWR(DPU_IOCTL_MAGIC, 5, ioc_cache_ctrl_t *) +/* Memory accessible from the device, synchronized back to the memory that the CPU can access */ +#define DPU_IOCTL_SYNC_FROM_DEV _IOWR(DPU_IOCTL_MAGIC, 6, ioc_cache_ctrl_t *) +/* Get the cores physical address */ +#define DPU_IOCTL_GET_DEV_HANDLE _IOWR(DPU_IOCTL_MAGIC, 7, ioc_aol_device_handle_t *) +/* read the registers of the IPs */ +#define DPU_IOCTL_READ_REGS _IOWR(DPU_IOCTL_MAGIC, 8, ioc_aol_read_regs_t *) + +#define SUPPORT_IP_MAX_COUNT 16 +#define SUPPORT_CORE_MAX_COUNT 32 + +struct ioc_mem_alloc_t { + unsigned long size; /* size of memory space to be allocated */ + unsigned long addr_phy; /* suv the start pyhsical address of allocated DPU memory (RETURNED) */ +}; + +struct ioc_mem_free_t { + unsigned long addr_phy; /* the start pyhsical address of allocated DPU memory */ +}; + +typedef struct { + unsigned long addr_phy; /* physical address of memory range */ + unsigned long size; /* size of memory range */ +}ioc_cache_ctrl_t; + +#define READ_REG_DEFAULT_BUF_LEN 64 +typedef struct { + uint64_t phy_address; + uint32_t byte_size; + uint32_t out_buffer[READ_REG_DEFAULT_BUF_LEN]; +} ioc_aol_read_regs_t; + +typedef struct { + uint32_t offset; + uint32_t value; +}ioc_aol_reg_t; + +/* + * ID of each IP tha may be included in the system. + */ +typedef enum { + IP_ID_VER_REG = 0, + IP_ID_DPU, + IP_ID_SOFTMAX, + IP_ID_FULLCONNECT, + IP_ID_RESIZE, + IP_ID_SIGMOID, + IP_MAX_COUNT, +}ioc_aol_ip_id_t; + +#define DPU_AOL_REG_NUM 32 +typedef struct { + uint64_t time_start; /*[Output] The start timestamp in nano-second */ + uint64_t time_end; /*[Output] The end timestamp in nano-second */ + uint32_t timeout; /*[Input] The timeout setting for IP computing in second */ + uint32_t core_mask; /*[Input] Specify the core to be scheduled, each bit represents a core */ + uint32_t reg_count; /*[Input] Specify the count of registers to be written. No more than DPU_AOL_REG_NUM. */ + ioc_aol_ip_id_t ip_id; /*[Input] Specify the ip_id to be scheduled */ + ioc_aol_reg_t regs[DPU_AOL_REG_NUM]; /*[Input] The registers data buffer to be written. The actual count is specified by reg_count. */ +} ioc_aol_run_t; + +typedef struct { + uint32_t core_mask; /*[Input] Specify the core to be scheduled, each bit represents a core */ + uint32_t reg_count; /*[Input] Specify the count of registers to be written. No more than DPU_AOL_REG_NUM. */ + ioc_aol_ip_id_t ip_id; /*[Input] Specify the ip_id to be scheduled */ + ioc_aol_reg_t regs[DPU_AOL_REG_NUM]; /*[Input] The registers data buffer to be written. The actual count is specified by reg_count. */ + uint32_t regs_delay_us[DPU_AOL_REG_NUM]; /*[Input] The delay time array in microsecond after writing each register specified by regs. */ +} ioc_aol_init_t; + +typedef struct { + uint32_t aol_version; /*[Output] The version of AOL interface, fixed to 0x0100 */ + uint8_t core_count[SUPPORT_IP_MAX_COUNT]; /*[Output] The core count of each related DPU IP. The order according dpu_aol_ip_id_t. */ + uint64_t core_phy_addr[SUPPORT_CORE_MAX_COUNT]; /*[Output] The physical address of each IP core. */ +} ioc_aol_device_handle_t; + +#endif + diff --git "a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/1-\345\210\233\345\273\272petalinux\345\267\245\347\250\213.png" "b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/1-\345\210\233\345\273\272petalinux\345\267\245\347\250\213.png" new file mode 100644 index 00000000..73f12326 Binary files /dev/null and "b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/1-\345\210\233\345\273\272petalinux\345\267\245\347\250\213.png" differ diff --git "a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/2-xsa\351\205\215\347\275\256\345\267\245\347\250\213.png" "b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/2-xsa\351\205\215\347\275\256\345\267\245\347\250\213.png" new file mode 100644 index 00000000..6c6e7df1 Binary files /dev/null and "b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/2-xsa\351\205\215\347\275\256\345\267\245\347\250\213.png" differ diff --git "a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/3-build\346\210\220\345\212\237.png" "b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/3-build\346\210\220\345\212\237.png" new file mode 100644 index 00000000..67a415aa Binary files /dev/null and "b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/3-build\346\210\220\345\212\237.png" differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230529_165001.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230529_165001.png new file mode 100644 index 00000000..7dff66ea Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230529_165001.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230529_165439.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230529_165439.png new file mode 100644 index 00000000..c636638d Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230529_165439.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230707_093947.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230707_093947.png new file mode 100644 index 00000000..68943572 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230707_093947.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230719_223735.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230719_223735.png new file mode 100644 index 00000000..317455a6 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230719_223735.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230720_111440.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230720_111440.png new file mode 100644 index 00000000..bcf929de Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230720_111440.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230802_231914.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230802_231914.png new file mode 100644 index 00000000..a816f6cc Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230802_231914.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230807_230007.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230807_230007.png new file mode 100644 index 00000000..b9299a95 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230807_230007.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230807_234940.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230807_234940.png new file mode 100644 index 00000000..613b872c Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230807_234940.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230810_235539.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230810_235539.png new file mode 100644 index 00000000..6885e5cd Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230810_235539.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230814_001613.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230814_001613.png new file mode 100644 index 00000000..36a3f1e9 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230814_001613.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_001244.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_001244.png new file mode 100644 index 00000000..d12f17ce Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_001244.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_072851.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_072851.png new file mode 100644 index 00000000..9a925fde Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_072851.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_223323.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_223323.png new file mode 100644 index 00000000..e692f902 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_223323.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_231448.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_231448.png new file mode 100644 index 00000000..308b92a6 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230816_231448.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230830_233303.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230830_233303.png new file mode 100644 index 00000000..882a7f27 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230830_233303.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230831_000452.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230831_000452.png new file mode 100644 index 00000000..4d2572de Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230831_000452.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230831_130644.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230831_130644.png new file mode 100644 index 00000000..5925c457 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230831_130644.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230921_192916.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230921_192916.png new file mode 100644 index 00000000..701b779c Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/Screenshot_20230921_192916.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-1-IPandToolVersionCompatibility.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-1-IPandToolVersionCompatibility.png new file mode 100644 index 00000000..17dc5f99 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-1-IPandToolVersionCompatibility.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-10-TTC.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-10-TTC.png new file mode 100644 index 00000000..f100568f Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-10-TTC.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-11-GEM.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-11-GEM.png new file mode 100644 index 00000000..bc6144ad Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-11-GEM.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-12-USB.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-12-USB.png new file mode 100644 index 00000000..b43d590c Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-12-USB.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-13-PCIe.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-13-PCIe.png new file mode 100644 index 00000000..e0b27242 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-13-PCIe.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-14-DP.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-14-DP.png new file mode 100644 index 00000000..f0ca2677 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-14-DP.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-15-InputClocks.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-15-InputClocks.png new file mode 100644 index 00000000..c7dad461 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-15-InputClocks.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-16-OutputClocks1.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-16-OutputClocks1.png new file mode 100644 index 00000000..c60b78d4 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-16-OutputClocks1.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-17-OutputClocks2.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-17-OutputClocks2.png new file mode 100644 index 00000000..858a0d40 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-17-OutputClocks2.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-18-DDR.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-18-DDR.png new file mode 100644 index 00000000..de17eb52 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-18-DDR.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-19-AddDPUIP.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-19-AddDPUIP.png new file mode 100644 index 00000000..13085b0b Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-19-AddDPUIP.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-2-NewProject.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-2-NewProject.png new file mode 100644 index 00000000..308b7190 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-2-NewProject.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-20-AddDPUIP2.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-20-AddDPUIP2.png new file mode 100644 index 00000000..3df6e691 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-20-AddDPUIP2.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-21-DPUConfig1.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-21-DPUConfig1.png new file mode 100644 index 00000000..e1b59782 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-21-DPUConfig1.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-22-DPUConfig2.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-22-DPUConfig2.png new file mode 100644 index 00000000..9435d4ca Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-22-DPUConfig2.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-23-DPUConfig3.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-23-DPUConfig3.png new file mode 100644 index 00000000..350dc3be Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-23-DPUConfig3.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-24-ZynqIRQ0.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-24-ZynqIRQ0.png new file mode 100644 index 00000000..9b359936 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-24-ZynqIRQ0.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-25-ZynqAXI.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-25-ZynqAXI.png new file mode 100644 index 00000000..8844fa73 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-25-ZynqAXI.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-26-ZynqIPAdded.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-26-ZynqIPAdded.png new file mode 100644 index 00000000..b4e257f9 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-26-ZynqIPAdded.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-27-ClockWizard1.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-27-ClockWizard1.png new file mode 100644 index 00000000..3f090288 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-27-ClockWizard1.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-28-ClockWizard2.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-28-ClockWizard2.png new file mode 100644 index 00000000..b5df0103 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-28-ClockWizard2.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-29-ClockWizard3.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-29-ClockWizard3.png new file mode 100644 index 00000000..f12f18d8 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-29-ClockWizard3.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-3-DefaultType.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-3-DefaultType.png new file mode 100644 index 00000000..e87655ef Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-3-DefaultType.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-30-ClockWizard4.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-30-ClockWizard4.png new file mode 100644 index 00000000..bc6a87e5 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-30-ClockWizard4.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-31-AddResets.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-31-AddResets.png new file mode 100644 index 00000000..a23ad265 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-31-AddResets.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-32-BlocksConnection.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-32-BlocksConnection.png new file mode 100644 index 00000000..bc0f9bed Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-32-BlocksConnection.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-33-AddressAssign.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-33-AddressAssign.png new file mode 100644 index 00000000..74da28a3 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-33-AddressAssign.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-34-AddressAssign2.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-34-AddressAssign2.png new file mode 100644 index 00000000..c4bf0315 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-34-AddressAssign2.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-35-AddressAssign3.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-35-AddressAssign3.png new file mode 100644 index 00000000..fb95c119 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-35-AddressAssign3.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-36-ValidateDesign.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-36-ValidateDesign.png new file mode 100644 index 00000000..f48b8e1d Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-36-ValidateDesign.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-37-GenOutputProduct.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-37-GenOutputProduct.png new file mode 100644 index 00000000..ff1062da Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-37-GenOutputProduct.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-38-CreateHDLWrapper.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-38-CreateHDLWrapper.png new file mode 100644 index 00000000..899be3ee Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-38-CreateHDLWrapper.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-39-GenBitstream.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-39-GenBitstream.png new file mode 100644 index 00000000..3642924c Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-39-GenBitstream.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-4-CreateBlockDesign.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-4-CreateBlockDesign.png new file mode 100644 index 00000000..1bd44424 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-4-CreateBlockDesign.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-40-ExportXSA.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-40-ExportXSA.png new file mode 100644 index 00000000..52962799 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-40-ExportXSA.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-5-AddZynqIP.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-5-AddZynqIP.png new file mode 100644 index 00000000..3edc2a3d Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-5-AddZynqIP.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-6-BANKandQSPI.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-6-BANKandQSPI.png new file mode 100644 index 00000000..55bf2aa0 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-6-BANKandQSPI.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-7-SD0.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-7-SD0.png new file mode 100644 index 00000000..814ff224 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-7-SD0.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-8-SD1.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-8-SD1.png new file mode 100644 index 00000000..c1b3e7c4 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-8-SD1.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-9-I2CandUART.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-9-I2CandUART.png new file mode 100644 index 00000000..218be7dc Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/ZynqMP_DPU-IP-9-I2CandUART.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/compiler_workflow.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/compiler_workflow.png new file mode 100644 index 00000000..a14509fd Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/compiler_workflow.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/deployment_workflow.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/deployment_workflow.png new file mode 100644 index 00000000..5b05c351 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/deployment_workflow.png differ diff --git a/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/f9f3f78ebafbdba4b76d0e6f490976e6.png b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/f9f3f78ebafbdba4b76d0e6f490976e6.png new file mode 100644 index 00000000..b67b6e15 Binary files /dev/null and b/RT-AK/rt_ai_tools/platforms/drivers/dpu/images/f9f3f78ebafbdba4b76d0e6f490976e6.png differ