Mr Bell
Mr Bell

Reputation: 9366

How to get width and height of directshow webcam video stream

I found a bit of code that gets me access to the raw pixel data from my webcam. However I need to know the image width, height, pixel format and preferably the data stride(pitch, memory padding or whatever you want to call it) if its ever gonna be something other than the width * bytes per pixel

#include <windows.h>
#include <dshow.h>

#pragma comment(lib,"Strmiids.lib")

#define DsHook(a,b,c) if (!c##_) { INT_PTR* p=b+*(INT_PTR**)a;   VirtualProtect(&c##_,4,PAGE_EXECUTE_READWRITE,&no);\
                                          *(INT_PTR*)&c##_=*p;   VirtualProtect(p,    4,PAGE_EXECUTE_READWRITE,&no);   *p=(INT_PTR)c; }


// Here you get image video data in buf / len. Process it before calling Receive_ because renderer dealocates it.
HRESULT ( __stdcall * Receive_ ) ( void* inst, IMediaSample *smp ) ; 
HRESULT   __stdcall   Receive    ( void* inst, IMediaSample *smp ) {     
    BYTE*     buf;    smp->GetPointer(&buf); DWORD len = smp->GetActualDataLength();
    //AM_MEDIA_TYPE* info;
    //smp->GetMediaType(&info);
    HRESULT   ret  =  Receive_   ( inst, smp );   
    return    ret; 
}

int WINAPI WinMain(HINSTANCE inst,HINSTANCE prev,LPSTR cmd,int show){
    HRESULT hr = CoInitialize(0); MSG msg={0}; DWORD no;

    IGraphBuilder*  graph= 0;  hr = CoCreateInstance( CLSID_FilterGraph, 0, CLSCTX_INPROC,IID_IGraphBuilder, (void **)&graph );
    IMediaControl*  ctrl = 0;  hr = graph->QueryInterface( IID_IMediaControl, (void **)&ctrl );

    ICreateDevEnum* devs = 0;  hr = CoCreateInstance (CLSID_SystemDeviceEnum, 0, CLSCTX_INPROC, IID_ICreateDevEnum, (void **) &devs);
    IEnumMoniker*   cams = 0;  hr = devs?devs->CreateClassEnumerator (CLSID_VideoInputDeviceCategory, &cams, 0):0;  
    IMoniker*       mon  = 0;  hr = cams->Next (1,&mon,0);  // get first found capture device (webcam?)    
    IBaseFilter*    cam  = 0;  hr = mon->BindToObject(0,0,IID_IBaseFilter, (void**)&cam);
                               hr = graph->AddFilter(cam, L"Capture Source"); // add web cam to graph as source
    IEnumPins*      pins = 0;  hr = cam?cam->EnumPins(&pins):0;   // we need output pin to autogenerate rest of the graph
    IPin*           pin  = 0;  hr = pins?pins->Next(1,&pin, 0):0; // via graph->Render
                               hr = graph->Render(pin); // graph builder now builds whole filter chain including MJPG decompression on some webcams
    IEnumFilters*   fil  = 0;  hr = graph->EnumFilters(&fil); // from all newly added filters
    IBaseFilter*    rnd  = 0;  hr = fil->Next(1,&rnd,0); // we find last one (renderer)
                               hr = rnd->EnumPins(&pins);  // because data we are intersted in are pumped to renderers input pin 
                               hr = pins->Next(1,&pin, 0); // via Receive member of IMemInputPin interface
    IMemInputPin*   mem  = 0;  hr = pin->QueryInterface(IID_IMemInputPin,(void**)&mem);

    DsHook(mem,6,Receive); // so we redirect it to our own proc to grab image data

    hr = ctrl->Run();   

    while ( GetMessage(   &msg, 0, 0, 0 ) ) {  
        TranslateMessage( &msg );   
        DispatchMessage(  &msg ); 
    }
};

Bonus points if you can tell me how get this thing not to render a window but still get me access to the image data.

Upvotes: 0

Views: 4908

Answers (3)

tetsuoii
tetsuoii

Reputation: 56

Here's how to add the Null Renderer that suppresses the rendering window. Add directly after creating the IGraphBuilder*

//create null renderer and add null renderer to graph
IBaseFilter *m_pNULLRenderer;  hr = CoCreateInstance(CLSID_NullRenderer, NULL, CLSCTX_INPROC_SERVER, IID_IBaseFilter, (void **)&m_pNULLRenderer);
                               hr = graph->AddFilter(m_pNULLRenderer, L"Null Renderer");

That dshook hack is the only elegant directshow code of which I am aware.

In my experience, the DirectShow API is a convoluted nightmare, requiring hundreds of lines of code to do even the simplest operation, and adapting a whole programming paradigm in order to access your web camera. So if this code does the job for you, as it did for me, use it and enjoy fewer lines of code to maintain.

Upvotes: 0

Alexander Drichel
Alexander Drichel

Reputation: 515

I am sorry for you. When the interface was created there were probably not the best programmer to it.

// Here you get image video data in buf / len. Process it before calling Receive_ because renderer dealocates it.

BITMAPINFOHEADER bmpInfo; // current bitmap header info
int stride;

HRESULT ( __stdcall * Receive_ ) ( void* inst, IMediaSample *smp ) ; 
HRESULT   __stdcall   Receive    ( void* inst, IMediaSample *smp )
{     
    BYTE*     buf;    smp->GetPointer(&buf); DWORD len = smp->GetActualDataLength();
    HRESULT   ret  =  Receive_   ( inst, smp );   

    AM_MEDIA_TYPE* info;
    HRESULT hr = smp->GetMediaType(&info);
    if ( hr != S_OK )
    { //TODO: error } 
    else
    {
        if ( type->formattype == FORMAT_VideoInfo )
        {
            const VIDEOINFOHEADER * vi = reinterpret_cast<VIDEOINFOHEADER*>( type->pbFormat );
            const BITMAPINFOHEADER & bmiHeader = vi->bmiHeader;
            //! now the bmiHeader.biWidth contains the data stride
            stride = bmiHeader.biWidth;

            bmpInfo = bmiHeader;
            int width = ( vi->rcTarget.right - vi->rcTarget.left );
            //! replace the data stride be the actual width
            if ( width != 0 )
                bmpInfo.biWidth = width;

        }
        else
        { // unsupported format }
    }
    DeleteMediaType( info );

    return    ret; 
}

Upvotes: 0

Geraint Davies
Geraint Davies

Reputation: 2847

That's really ugly. Please don't do that. Insert a pass-through filter like the sample grabber instead (as I replied to your other post on the same topic). Connecting the sample grabber to the null renderer gets you the bits in a clean, safe way without rendering the image.

To get the stride, you need to get the media type, either through ISampleGrabber or IPin::ConnectionMediaType. The format block will be either a VIDEOINFOHEADER or a VIDEOINFOHEADER2 (check the format GUID). The bitmapinfo header biWidth and biHeight defines the bitmap dimensions (and hence stride). If the RECT is not empty, then that defines the relevant image area within the bitmap.

I'm going to have to wash my hands now after touching this post.

Upvotes: 5

Related Questions