Thank you! Removing nn.Embedding
indeed solves the problem. Here is the final code that works in Hailo Dataflow Compiler v3.28.0:
import torch.nn as nn
import torch.utils.data
import hailo_sdk_client
from hailo_sdk_client import ClientRunner
print(f'Hailo Dataflow Compiler v{hailo_sdk_client.__version__}')
batch_size = 1
input_len = 15
vocab_len = 256 # UTF-8 characters
embedding_len = 256
torch.manual_seed(0)
model = nn.Sequential(
nn.Linear(vocab_len, embedding_len),
nn.ReLU(),
nn.Flatten(),
nn.Linear(input_len * embedding_len, 256, bias=False),
nn.ReLU(),
nn.Linear(256, vocab_len, bias=False),
)
# Create one-hot input instead of embedding indices
input_data = torch.zeros(batch_size, input_len, vocab_len)
dummy_input = torch.randint(vocab_len, (batch_size, input_len))
for i in range(batch_size):
for j in range(input_len):
input_data[i, j, dummy_input[i, j]] = 1 # One-hot encoding
output = model(input_data)
print(f"{output.mean()=}, {output.std(unbiased=False)=}, {output.shape=}")
with torch.no_grad():
# torch.onnx.export(model, input_data, "model.onnx", verbose=True, input_names=["input"], output_names=["output"])
torch.onnx.export(model, input_data, "model.onnx", verbose=True)
# chosen_hw_arch = "hailo8"
# chosen_hw_arch = "hailo15h" # For Hailo-15 devices
chosen_hw_arch = "hailo8r" # For Mini PCIe modules or Hailo-8R devices
runner = ClientRunner(hw_arch=chosen_hw_arch)
hn, npz = runner.translate_onnx_model(
"model.onnx",
"network",
start_node_names=["/0/MatMul"],
end_node_names=["/5/MatMul"],
net_input_shapes={"/0/MatMul": [batch_size, input_len, vocab_len]},
)
runner.save_har("model.har")
runner.optimize(None)
hef = runner.compile()
file_name = f"model.hef"
with open(file_name, "wb") as f:
f.write(hef)