一、TI TDA4 平台概述
1.1 TDA4 系列芯片
| 型号 |
CPU |
DSP |
NPU |
应用场景 |
| TDA4VM |
2×A72 |
1×C7x |
8 TOPS |
L2 辅助驾驶 |
| TDA4VL |
2×A72 |
1×C7x |
4 TOPS |
DMS/OMS |
| TDA4VE |
2×A53 |
1×C7x |
2 TOPS |
入门级 ADAS |
1.2 TDA4 架构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
| ┌─────────────────────────────────────────────────────────────────────────┐ │ TDA4 (Jacinto 7) 架构 │ ├─────────────────────────────────────────────────────────────────────────┤ │ │ │ 主处理器 │ │ ┌─────────────────────────────────────────────────────────┐ │ │ │ Dual-core Cortex-A72 @ 1.8GHz │ │ │ │ ├── 运行 Linux / QNX │ │ │ │ ├── 应用逻辑、模型后处理 │ │ │ │ └── MediaPipe 图执行 │ │ │ └─────────────────────────────────────────────────────────┘ │ │ │ │ 加速器 │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ │ C7x DSP │ │ DLA (NPU) │ │ VPAC │ │ │ │ 1.0 GHz │ │ 8 TOPS │ │ 图像处理 │ │ │ │ 深度学习 │ │ 深度学习 │ │ ISP/LDC │ │ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │ │ │ 内存 │ │ ┌─────────────────────────────────────────────────────────┐ │ │ │ DDR4: 4-8 GB │ │ │ │ L3 Cache: 1 MB │ │ │ │ MSMC: 512 KB (共享内存) │ │ │ └─────────────────────────────────────────────────────────┘ │ │ │ │ 外设 │ │ ┌─────────────────────────────────────────────────────────┐ │ │ │ MIPI CSI-4 (摄像头) | Ethernet | CAN-FD │ │ │ │ PCIe 3.0 | USB 3.0 | eMMC/SD │ │ │ └─────────────────────────────────────────────────────────┘ │ │ │ └─────────────────────────────────────────────────────────────────────────┘
|
二、开发环境搭建
2.1 SDK 安装
1 2 3 4 5 6 7 8 9 10
|
chmod +x ti-processor-sdk-linux-adas-k3-tda4vm-evm-09_02_00_05-Linux-x86-Install.bin ./ti-processor-sdk-linux-adas-k3-tda4vm-evm-09_02_00_05-Linux-x86-Install.bin
export TISDK_ROOT=/opt/ti-processor-sdk-linux-adas-k3-tda4vm-evm-09_02_00_05 export PATH=$TISDK_ROOT/linux-devkit/sysroots/x86_64-arago-linux/usr/bin:$PATH
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| git clone https://github.com/google/mediapipe.git cd mediapipe
new_local_repository( name = "linux_aarch64_sysroot", path = "/opt/ti-processor-sdk-linux-adas-k3-tda4vm-evm-09_02_00_05/linux-devkit/sysroots/aarch64-linux", )
build:tda4 --crosstool_top=@linux_aarch64_sysroot//usr/share/clang build:tda4 --host_crosstool_top=@bazel_tools//tools/cpp:toolchain build:tda4 --cpu=aarch64 build:tda4 --compiler=clang
bazel build -c opt --config=tda4 mediapipe/examples/desktop/face_detection:face_detection
|
三、模型转换与优化
3.1 TFLite → TIDL 模型转换
1 2 3 4 5 6 7 8 9
| export TIDL_TOOLS_PATH=$TISDK_ROOT/tidl-tools
python3 $TIDL_TOOLS_PATH/tidl_model_import/tflite_import.py \ --model_path blazeface.tflite \ --output_path blazeface_tidl \ --target_device TDA4VM \ --num_bits 8
|
3.2 量化校准
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
| import tidl_model_import as ti
model = ti.load_model('blazeface.tflite')
calibration_images = load_calibration_images(num_images=100)
for image in calibration_images: preprocessed = preprocess(image) model.run_calibration(preprocessed)
model.export('blazeface_quant.tflite', quantized=True)
|
4.1 Calculator 实现
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
|
#ifndef MEDIAPIPE_CALCULATORS_TI_TIDL_INFERENCE_CALCULATOR_H_ #define MEDIAPIPE_CALCULATORS_TI_TIDL_INFERENCE_CALCULATOR_H_
#include "mediapipe/framework/calculator_framework.h" #include "mediapipe/framework/formats/tensor.h" #include "tidl_infer_engine.h"
namespace mediapipe {
class TidlInferenceCalculator : public CalculatorBase { public: static absl::Status GetContract(CalculatorContract* cc); absl::Status Open(CalculatorContext* cc) override; absl::Status Process(CalculatorContext* cc) override; absl::Status Close(CalculatorContext* cc) override;
private: absl::Status InitializeTidlEngine(const std::string& model_path); void PreprocessInput(const Tensor& input, float* tidl_input); void PostprocessOutput(const float* tidl_output, Tensor* output); std::unique_ptr<TIDLInferEngine> tidl_engine_; std::string accelerator_ = "C7X"; int num_bits_ = 8; };
}
#endif
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
|
#include "mediapipe/calculators/ti/tidl_inference_calculator.h"
namespace mediapipe {
absl::Status TidlInferenceCalculator::GetContract(CalculatorContract* cc) { cc->Inputs().Tag("TENSORS").Set<std::vector<Tensor>>(); cc->Outputs().Tag("TENSORS").Set<std::vector<Tensor>>(); cc->Options<TidlInferenceOptions>(); return absl::OkStatus(); }
absl::Status TidlInferenceCalculator::Open(CalculatorContext* cc) { const auto& options = cc->Options<TidlInferenceOptions>(); std::string model_path = options.model_path(); accelerator_ = options.accelerator(); num_bits_ = options.num_bits(); MP_RETURN_IF_ERROR(InitializeTidlEngine(model_path)); LOG(INFO) << "TidlInferenceCalculator initialized: " << "accelerator=" << accelerator_ << ", num_bits=" << num_bits_; return absl::OkStatus(); }
absl::Status TidlInferenceCalculator::InitializeTidlEngine( const std::string& model_path) { TIDLInferConfig config; config.model_path = model_path; config.accelerator = accelerator_; config.num_bits = num_bits_; tidl_engine_ = std::make_unique<TIDLInferEngine>(); int ret = tidl_engine_->Initialize(config); if (ret != 0) { return absl::InternalError("Failed to initialize TIDL engine"); } return absl::OkStatus(); }
absl::Status TidlInferenceCalculator::Process(CalculatorContext* cc) { if (cc->Inputs().Tag("TENSORS").IsEmpty()) { return absl::OkStatus(); } const auto& input_tensors = cc->Inputs().Tag("TENSORS").Get<std::vector<Tensor>>(); if (input_tensors.empty()) { return absl::OkStatus(); } const Tensor& input_tensor = input_tensors[0]; float* tidl_input = tidl_engine_->GetInputBuffer(); PreprocessInput(input_tensor, tidl_input); int ret = tidl_engine_->RunInference(); if (ret != 0) { return absl::InternalError("TIDL inference failed"); } const float* tidl_output = tidl_engine_->GetOutputBuffer(); auto output_tensors = absl::make_unique<std::vector<Tensor>>(); Tensor& output_tensor = output_tensors->emplace_back( Tensor::ElementType::kFloat32, tidl_engine_->GetOutputShape()); PostprocessOutput(tidl_output, &output_tensor); cc->Outputs().Tag("TENSORS").Add(output_tensors.release(), cc->InputTimestamp()); return absl::OkStatus(); }
void TidlInferenceCalculator::PreprocessInput( const Tensor& input, float* tidl_input) { const float* src_data = input.GetCpuReadView().buffer<float>(); size_t input_size = input.shape().num_elements(); std::memcpy(tidl_input, src_data, input_size * sizeof(float)); }
void TidlInferenceCalculator::PostprocessOutput( const float* tidl_output, Tensor* output) { float* dst_data = output->GetCpuWriteView().buffer<float>(); size_t output_size = output->shape().num_elements(); std::memcpy(dst_data, tidl_output, output_size * sizeof(float)); }
absl::Status TidlInferenceCalculator::Close(CalculatorContext* cc) { if (tidl_engine_) { tidl_engine_->Deinitialize(); tidl_engine_.reset(); } return absl::OkStatus(); }
REGISTER_CALCULATOR(TidlInferenceCalculator);
}
|
五、Graph 配置
5.1 TDA4 Face Detection Graph
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
| # mediapipe/graphs/ims/face_detection_tda4.pbtxt
input_stream: "IMAGE:image" output_stream: "DETECTIONS:detections"
# 1. 图像预处理 node { calculator: "ImageToTensorCalculator" input_stream: "IMAGE:image" output_stream: "TENSORS:input_tensors" options { [mediapipe.ImageToTensorCalculatorOptions.ext] { tensor_width: 128 tensor_height: 128 tensor_channels: 3 tensor_float_range { min: -1.0 max: 1.0 } } } }
# 2. TIDL 推理 (C7x DSP) node { calculator: "TidlInferenceCalculator" input_stream: "TENSORS:input_tensors" output_stream: "TENSORS:output_tensors" options { [mediapipe.TidlInferenceOptions.ext] { model_path: "/opt/models/blazeface_tidl.bin" accelerator: "C7X" num_bits: 8 } } }
# 3. 后处理 node { calculator: "BlazeFacePostprocessorCalculator" input_stream: "TENSORS:output_tensors" output_stream: "DETECTIONS:detections" }
|
六、性能优化
6.1 加速器选择
| 加速器 |
延迟 (BlazeFace) |
功耗 |
适用场景 |
| A72 CPU |
~50ms |
2W |
开发调试 |
| C7x DSP |
~8ms |
1W |
深度学习推理 |
| DLA (NPU) |
~5ms |
2W |
高吞吐量场景 |
6.2 内存优化
1 2 3 4 5 6 7
|
void* msmc_buffer = msmc_alloc(buffer_size);
tidl_engine_->SetInputBuffer(msmc_buffer, zero_copy=true);
|
6.3 流水线并行
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| # 使用 Flow Control 实现流水线
# 图像采集 node { calculator: "ImageCaptureCalculator" output_stream: "IMAGE:image" output_stream: "FLOW_CONTROL:flow_control" }
# 预处理 (CPU) node { calculator: "ImageToTensorCalculator" input_stream: "IMAGE:image" input_stream: "FLOW_CONTROL:flow_control" output_stream: "TENSORS:tensors" }
# 推理 (DSP) node { calculator: "TidlInferenceCalculator" input_stream: "TENSORS:tensors" output_stream: "TENSORS:output" }
# 后处理 (CPU) node { calculator: "PostprocessorCalculator" input_stream: "TENSORS:output" output_stream: "RESULT:result" }
|
七、调试技巧
7.1 性能分析
1 2 3 4 5 6 7 8 9 10
| ti-perf-analyzer \ --graph face_detection.pbtxt \ --model blazeface_tidl.bin \ --num_iterations 100
|
7.2 常见问题
| 问题 |
可能原因 |
解决方案 |
| 初始化失败 |
模型格式不对 |
重新转换模型 |
| 精度下降 |
量化精度损失 |
增加校准数据 |
| 内存不足 |
模型过大 |
模型剪枝 |
| DSP 不工作 |
驱动未加载 |
检查 remoteproc |
八、总结
| 要点 |
说明 |
| C7x DSP |
主要推理引擎,功耗低 |
| TIDL |
TI 深度学习框架 |
| 量化 |
8-bit 量化提升性能 |
| 内存 |
使用 MSMC 共享内存 |
八、部署脚本
8.1 完整部署脚本
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
| #!/bin/bash
set -e
TISDK_ROOT="/opt/ti-processor-sdk-linux-adas-k3-tda4vm-evm-09_02_00_05" TARGET_IP="192.168.1.100"
MODEL_DIR="models" OUTPUT_DIR="deploy"
source $TISDK_ROOT/linux-devkit/environment-setup
echo "=== TI SDK 环境设置完成 ==="
convert_model() { local model_name=$1 local input_shape=$2 echo "转换模型: $model_name" $TISDK_ROOT/tidl-tools/model_import/tflite_import \ --model $MODEL_DIR/${model_name}.tflite \ --output $OUTPUT_DIR/${model_name}_tidl \ --input_shape $input_shape \ --quantization_mode post-training echo "模型转换完成: $model_name" }
convert_model "blazeface" "1,128,128,3" convert_model "face_landmark" "1,192,192,3"
echo "编译 MediaPipe..." bazel build -c opt --config=tda4 \ //mediapipe/graphs/ims:dms_fatigue_graph
echo "部署到设备..." ssh root@$TARGET_IP "mkdir -p /opt/ims" scp $OUTPUT_DIR/* root@$TARGET_IP:/opt/ims/ scp bazel-bin/mediapipe/graphs/ims/dms_fatigue_graph root@$TARGET_IP:/opt/ims/
echo "=== 部署完成 ==="
|
8.2 性能测试脚本
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| #!/bin/bash
MODEL="blazeface_tidl" ITERATIONS=100
ssh root@$TARGET_IP "/opt/ims/benchmark \ --model /opt/ims/${MODEL} \ --iterations $ITERATIONS \ --output /opt/ims/benchmark_result.txt"
scp root@$TARGET_IP:/opt/ims/benchmark_result.txt .
echo "性能测试完成" cat benchmark_result.txt
|
九、常见问题与解决
9.1 TIDL 模型转换失败
问题: 不支持的算子
1
| Error: Unsupported layer type: CUSTOM
|
解决:
1 2 3 4 5 6
| $TISDK_ROOT/tidl-tools/model_import/check_supported_ops \ --model $MODEL_DIR/model.tflite
|
9.2 DSP 初始化失败
问题: C7x DSP 无法加载
1
| Error: Failed to initialize C7x DSP
|
解决:
1 2 3 4 5 6 7 8
| ls /lib/firmware/c7x*
modprobe c7x_pci
systemctl restart tidl-dsp
|
9.3 内存不足
问题: 推理时内存溢出
1
| Error: MSMC memory allocation failed
|
解决:
1 2 3 4 5 6 7
| tidl_rt_config_t config; config.msmc_size = 0x200000; config.ddr_size = 0x10000000;
|
十、性能优化
10.1 内存优化
1 2 3 4 5
| void* msmc_buffer = msmc_alloc(buffer_size);
|
10.2 并行优化
1 2 3 4 5 6 7 8 9 10
| #pragma omp parallel for for (int i = 0; i < num_frames; i++) { process_frame(frames[i]); }
tidl_invoke_async(dsp_handle, input, output);
tidl_wait(dsp_handle);
|
十一、总结
| 要点 |
说明 |
| TDA4 架构 |
A72 + C7x DSP + DLA NPU |
| 模型转换 |
TFLite → TIDL |
| Calculator |
TIDL Inference Calculator |
| 性能优化 |
MSMC 共享内存、并行执行 |
参考资料
- Texas Instruments. TDA4 Datasheet
- Texas Instruments. TIDL Documentation
系列进度: 54/55
更新时间: 2026-03-12