Andrew Simpson
Andrew Simpson

Reputation: 7334

Problem parsing inference using DnnInvoke.ReadNetFromONNX with Yolo9

Been using Yolo3. Looking to use yolo9.
I am using the EmguCV framework to do this. I wish to continue to use the EmguCV framework to do this.

I initialise my net object:

private const string ModelPath = "yolov9-c.onnx";

private void Init(float conf=0.4f, float nmsThreshold=0.4f)
{
    ConfidenceThreshold = conf;
    _nmsThreshold = nmsThreshold;
    Enum.TryParse(PreferredBackendModel.OpenCV.ToString(), out Emgu.CV.Dnn.Backend b); 
    _network = DnnInvoke.ReadNetFromONNX(ModelPath); 
    _network.SetPreferableBackend(b);
    Enum.TryParse(PreferredBackendModel.OpenCV.ToString(), out Target t);
   _network.SetPreferableTarget(t);
}

I then attempt my prediction:

 public List<ObjectDetectedModel> PredictEmguCv(Image<Bgr, byte> img)
  {
  var filteredBoxes = new List<ObjectDetectedModel>();
  try
  {

      img= img.Resize(640, 640, Inter.LinearExact); 
      using var t = new Mat();
      var w = img.Width;
      var h = img.Height;
      using var layerOutputs = new VectorOfMat();
      var outNames = _network.UnconnectedOutLayersNames; 
      using var blob = DnnInvoke.BlobFromImage(img, 1 / 255f, new Size(640, 640), swapRB: true, crop: false);
      _network.SetInput(blob);
      _network.Forward(layerOutputs, outNames);

      var boxes = new List<Rectangle>();
      var confidences = new List<float>();
      var classIDs = new List<int>();
      for (var k = 0; k < layerOutputs.Size; k++)
      {
          var lo = (float[,,])layerOutputs[k].GetData(); 
          var len = lo.GetLength(0);
          for (var i = 0; i < len; i++)
          {
              if (lo[0,i, 4] < 0.2f)
                  continue;
              float max = 0;
              var idx = 0;

              var len2 = lo.GetLength(1);
              for (var j = 5; j < len2; j++)
                  if (lo[0, i, j] > max)
                  {
                      max = lo[0, i, j]; 
                      idx = j - 5;
                  }

              if (!(max > 0.2f)) continue;
              lo[0, i, 0] *= w;
              lo[0, i, 1] *= h;
              lo[0, i, 2] *= w;
              lo[0, i, 3] *= h;

              var x = (int)(lo[0, i, 0] - lo[0, i, 2] / 2);
              var y = (int)(lo[0, i, 1] - lo[0, i, 3] / 2);

              var rect = new Rectangle(x, y, (int)lo[0, i, 2], (int)lo[0, i, 3]);

              rect.X = rect.X < 0 ? 0 : rect.X;
              rect.X = rect.X > w ? w - 1 : rect.X;
              rect.Y = rect.Y < 0 ? 0 : rect.Y;
              rect.Y = rect.Y > h ? h - 1 : rect.Y;
              rect.Width = rect.X + rect.Width > w ? w - rect.X - 1 : rect.Width;
              rect.Height = rect.Y + rect.Height > h ? h - rect.Y - 1 : rect.Height;

              boxes.Add(rect);
              confidences.Add(max);
              classIDs.Add(idx);
          }
      }

Apart from every label coming back as 'clock' which I know is wrong, the confidence continues to climb beyound 100%.

SO, i am obviously parsing this wrong. Trouble is after days of googling I cannot find any solution. Any pointers?

Upvotes: 0

Views: 49

Answers (0)

Related Questions