Capturing video with monochrome camera in aruco opencv

I'm currently trying to use a monochrome camera with the aruco and opencv libraries in order to accelerate the computation and get better marker capturing. The problem i am having is that the monochrome feed is being tripled on screen when running the aruco_test program and so the resolution in diminished by two thirds and the markers are being detected three times each instead of one.

I saw feeds which talk about similar problems with monochrome cameras in opencv. Some answers suggested cropping the image (which fixes the tripling problem but not the smaller resolution) but it all seems to be caused by the conversion from either BGR2GRAY or GRAY2BGR. Any help on what exactly is causing the images being tripled and how to bypass that part either in the aruco source code or opencv source code would be appreciated.


Driver Info (not using libv4l2):

Driver name   : uvcvideo
Card type     : oCam-1MGN-U
Bus info      : usb-0000:00:1d.0-1.5
Driver version: 3.13.11
Capabilities  : 0x84000001
    Video Capture
    Device Capabilities
Device Caps   : 0x04000001
    Video Capture
Priority: 2
Video input : 0 (Camera 1: ok)
Format Video Capture:
    Width/Height  : 1280/960
    Pixel Format  : 'GREY'
    Field         : None
    Bytes per Line: 1280
    Size Image    : 1228800
    Colorspace    : Unknown (00000000)
Crop Capability Video Capture:
    Bounds      : Left 0, Top 0, Width 1280, Height 960
    Default     : Left 0, Top 0, Width 1280, Height 960
    Pixel Aspect: 1/1
Streaming Parameters Video Capture:
    Capabilities     : timeperframe
    Frames per second: 30.000 (30/1)
    Read buffers     : 0
    brightness (int)    : min=0 max=127 step=1 default=64 value=64
    exposure_absolute (int)    : min=1 max=625 step=1 default=39 value=39

Using Aruco 2.0.19 and OpenCV 3.2

Pixel Format not being YUYV i cannot simply take the Y channel from the camera feed.

code executed :

#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include "aruco.h" 
#include "cvdrawingutils.h"
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
using namespace cv;
using namespace aruco;

MarkerDetector MDetector;
VideoCapture TheVideoCapturer;
vector< Marker > TheMarkers;
Mat TheInputImage, TheInputImageCopy;
CameraParameters TheCameraParameters;
void cvTackBarEvents(int pos, void *);

pair< double, double > AvrgTime(0, 0); // determines the average time required for detection
int iThresParam1, iThresParam2;
int waitTime = 0;
class CmdLineParser{int argc; char **argv; public: CmdLineParser(int _argc,char **_argv):argc(_argc),argv(_argv){}  bool operator[] ( string param ) {int idx=-1;  for ( int i=0; i<argc && idx==-1; i++ ) if ( string ( argv[i] ) ==param ) idx=i;    return ( idx!=-1 ) ;    } string operator()(string param,string defvalue="-1"){int idx=-1;    for ( int i=0; i<argc && idx==-1; i++ ) if ( string ( argv[i] ) ==param ) idx=i; if ( idx==-1 ) return defvalue;   else  return ( argv[  idx+1] ); }};

cv::Mat resize(const cv::Mat &in,int width){
    if (in.size().width<=width) return in;
    float yf=float(  width)/float(in.size().width);
    cv::Mat im2;
    return im2;


int main(int argc, char **argv) {
    try {
        CmdLineParser cml(argc,argv);
        if (argc < 2  || cml["-h"]) {
            cerr << "Invalid number of arguments" << endl;
            cerr << "Usage: (in.avi|live[:idx_cam=0]) [-c camera_params.yml] [-s  marker_size_in_meters] [-d dictionary:ARUCO by default] [-h]" << endl;
            cerr<<"\tDictionaries: "; for(auto dict:aruco::Dictionary::getDicTypes())    cerr<<dict<<" ";cerr<<endl;
            cerr<<"\t Instead of these, you can directly indicate the path to a file with your own generated dictionary"<<endl;
            return false;

        ///////////  PARSE ARGUMENTS
        string TheInputVideo = argv[1];
        // read camera parameters if passed
    if (cml["-c"] )  TheCameraParameters.readFromXMLFile(cml("-c"));
    float TheMarkerSize = std::stof(cml("-s","-1"));
    //aruco::Dictionary::DICT_TYPES  TheDictionary= Dictionary::getTypeFromString( cml("-d","ARUCO") );

    ///////////  OPEN VIDEO
    // read from camera or from  file
    if (TheInputVideo.find("live") != string::npos) {
        int vIdx = 0;
        // check if the :idx is here
        char cad[100];
        if (TheInputVideo.find(":") != string::npos) {
            std::replace(TheInputVideo.begin(), TheInputVideo.end(), ':', ' ');
            sscanf(TheInputVideo.c_str(), "%s %d", cad, &vIdx);
        cout << "Opening camera index " << vIdx << endl;;
        waitTime = 10;
    // check video is open
    if (!TheVideoCapturer.isOpened())  throw std::runtime_error("Could not open video");

    // read first image to get the dimensions
    TheVideoCapturer >> TheInputImage;
    if (TheCameraParameters.isValid())

    MDetector.setDictionary(cml("-d","ARUCO"));//sets the dictionary to be employed (ARUCO,APRILTAGS,ARTOOLKIT,etc)
    MDetector.setThresholdParams(7, 7);
    MDetector.setThresholdParamRange(2, 0);
   //  MDetector.setCornerRefinementMethod(aruco::MarkerDetector::SUBPIX);

    //gui requirements : the trackbars to change this parameters
    iThresParam1 = MDetector.getParams()._thresParam1;
    iThresParam2 = MDetector.getParams()._thresParam2;
    cv::createTrackbar("ThresParam1", "in", &iThresParam1, 25, cvTackBarEvents);
    cv::createTrackbar("ThresParam2", "in", &iThresParam2, 13, cvTackBarEvents);

    char key = 0;
    int index = 0;
    // capture until press ESC or until the end of the video
    do {

        // copy image
        double tick = (double)getTickCount(); // for checking the speed
        // Detection of markers in the image passed
        TheMarkers= MDetector.detect(TheInputImage, TheCameraParameters, TheMarkerSize);
        // chekc the speed by calculating the mean speed of all iterations
        AvrgTime.first += ((double)getTickCount() - tick) / getTickFrequency();
        cout << "\rTime detection=" << 1000 * AvrgTime.first / AvrgTime.second << " milliseconds nmarkers=" << TheMarkers.size() << std::endl;

        // print marker info and draw the markers in image

        for (unsigned int i = 0; i < TheMarkers.size(); i++) {
            cout << TheMarkers[i]<<endl;
            TheMarkers[i].draw(TheInputImageCopy, Scalar(0, 0, 255));

        // draw a 3d cube in each marker if there is 3d info
        if (TheCameraParameters.isValid() && TheMarkerSize>0)
            for (unsigned int i = 0; i < TheMarkers.size(); i++) {
                CvDrawingUtils::draw3dCube(TheInputImageCopy, TheMarkers[i], TheCameraParameters);
                CvDrawingUtils::draw3dAxis(TheInputImageCopy, TheMarkers[i], TheCameraParameters);

        // DONE! Easy, right?
        // show input with augmented information and  the thresholded image
        cv::imshow("in", resize(TheInputImageCopy,1280));
        cv::imshow("thres", resize(MDetector.getThresholdedImage(),1280));

        key = cv::waitKey(waitTime); // wait for key to be pressed
        if(key=='s')  waitTime= waitTime==0?10:0;
        index++; // number of images captured
    } while (key != 27 && (TheVideoCapturer.grab() ));

} catch (std::exception &ex)

    cout << "Exception :" << ex.what() << endl;

void cvTackBarEvents(int pos, void *) {
if (iThresParam1 < 3)  iThresParam1 = 3;
if (iThresParam1 % 2 != 1)  iThresParam1++;
if (iThresParam1 < 1)  iThresParam1 = 1;
MDetector.setThresholdParams(iThresParam1, iThresParam2);
// recompute
MDetector.detect(TheInputImage, TheMarkers, TheCameraParameters);
for (unsigned int i = 0; i < TheMarkers.size(); i++)
    TheMarkers[i].draw(TheInputImageCopy, Scalar(0, 0, 255));

// draw a 3d cube in each marker if there is 3d info
if (TheCameraParameters.isValid())
    for (unsigned int i = 0; i < TheMarkers.size(); i++)
        CvDrawingUtils::draw3dCube(TheInputImageCopy, TheMarkers[i], TheCameraParameters);

cv::imshow("in", resize(TheInputImageCopy,1280));
cv::imshow("thres", resize(MDetector.getThresholdedImage(),1280));

