Reputation: 9366
I found a bit of code that gets me access to the raw pixel data from my webcam. However I need to know the image width, height, pixel format and preferably the data stride(pitch, memory padding or whatever you want to call it) if its ever gonna be something other than the width * bytes per pixel
#include <windows.h>
#include <dshow.h>
#pragma comment(lib,"Strmiids.lib")
#define DsHook(a,b,c) if (!c##_) { INT_PTR* p=b+*(INT_PTR**)a; VirtualProtect(&c##_,4,PAGE_EXECUTE_READWRITE,&no);\
*(INT_PTR*)&c##_=*p; VirtualProtect(p, 4,PAGE_EXECUTE_READWRITE,&no); *p=(INT_PTR)c; }
// Here you get image video data in buf / len. Process it before calling Receive_ because renderer dealocates it.
HRESULT ( __stdcall * Receive_ ) ( void* inst, IMediaSample *smp ) ;
HRESULT __stdcall Receive ( void* inst, IMediaSample *smp ) {
BYTE* buf; smp->GetPointer(&buf); DWORD len = smp->GetActualDataLength();
//AM_MEDIA_TYPE* info;
//smp->GetMediaType(&info);
HRESULT ret = Receive_ ( inst, smp );
return ret;
}
int WINAPI WinMain(HINSTANCE inst,HINSTANCE prev,LPSTR cmd,int show){
HRESULT hr = CoInitialize(0); MSG msg={0}; DWORD no;
IGraphBuilder* graph= 0; hr = CoCreateInstance( CLSID_FilterGraph, 0, CLSCTX_INPROC,IID_IGraphBuilder, (void **)&graph );
IMediaControl* ctrl = 0; hr = graph->QueryInterface( IID_IMediaControl, (void **)&ctrl );
ICreateDevEnum* devs = 0; hr = CoCreateInstance (CLSID_SystemDeviceEnum, 0, CLSCTX_INPROC, IID_ICreateDevEnum, (void **) &devs);
IEnumMoniker* cams = 0; hr = devs?devs->CreateClassEnumerator (CLSID_VideoInputDeviceCategory, &cams, 0):0;
IMoniker* mon = 0; hr = cams->Next (1,&mon,0); // get first found capture device (webcam?)
IBaseFilter* cam = 0; hr = mon->BindToObject(0,0,IID_IBaseFilter, (void**)&cam);
hr = graph->AddFilter(cam, L"Capture Source"); // add web cam to graph as source
IEnumPins* pins = 0; hr = cam?cam->EnumPins(&pins):0; // we need output pin to autogenerate rest of the graph
IPin* pin = 0; hr = pins?pins->Next(1,&pin, 0):0; // via graph->Render
hr = graph->Render(pin); // graph builder now builds whole filter chain including MJPG decompression on some webcams
IEnumFilters* fil = 0; hr = graph->EnumFilters(&fil); // from all newly added filters
IBaseFilter* rnd = 0; hr = fil->Next(1,&rnd,0); // we find last one (renderer)
hr = rnd->EnumPins(&pins); // because data we are intersted in are pumped to renderers input pin
hr = pins->Next(1,&pin, 0); // via Receive member of IMemInputPin interface
IMemInputPin* mem = 0; hr = pin->QueryInterface(IID_IMemInputPin,(void**)&mem);
DsHook(mem,6,Receive); // so we redirect it to our own proc to grab image data
hr = ctrl->Run();
while ( GetMessage( &msg, 0, 0, 0 ) ) {
TranslateMessage( &msg );
DispatchMessage( &msg );
}
};
Bonus points if you can tell me how get this thing not to render a window but still get me access to the image data.
Upvotes: 0
Views: 4908
Reputation: 56
Here's how to add the Null Renderer that suppresses the rendering window. Add directly after creating the IGraphBuilder*
//create null renderer and add null renderer to graph
IBaseFilter *m_pNULLRenderer; hr = CoCreateInstance(CLSID_NullRenderer, NULL, CLSCTX_INPROC_SERVER, IID_IBaseFilter, (void **)&m_pNULLRenderer);
hr = graph->AddFilter(m_pNULLRenderer, L"Null Renderer");
That dshook hack is the only elegant directshow code of which I am aware.
In my experience, the DirectShow API is a convoluted nightmare, requiring hundreds of lines of code to do even the simplest operation, and adapting a whole programming paradigm in order to access your web camera. So if this code does the job for you, as it did for me, use it and enjoy fewer lines of code to maintain.
Upvotes: 0
Reputation: 515
I am sorry for you. When the interface was created there were probably not the best programmer to it.
// Here you get image video data in buf / len. Process it before calling Receive_ because renderer dealocates it.
BITMAPINFOHEADER bmpInfo; // current bitmap header info
int stride;
HRESULT ( __stdcall * Receive_ ) ( void* inst, IMediaSample *smp ) ;
HRESULT __stdcall Receive ( void* inst, IMediaSample *smp )
{
BYTE* buf; smp->GetPointer(&buf); DWORD len = smp->GetActualDataLength();
HRESULT ret = Receive_ ( inst, smp );
AM_MEDIA_TYPE* info;
HRESULT hr = smp->GetMediaType(&info);
if ( hr != S_OK )
{ //TODO: error }
else
{
if ( type->formattype == FORMAT_VideoInfo )
{
const VIDEOINFOHEADER * vi = reinterpret_cast<VIDEOINFOHEADER*>( type->pbFormat );
const BITMAPINFOHEADER & bmiHeader = vi->bmiHeader;
//! now the bmiHeader.biWidth contains the data stride
stride = bmiHeader.biWidth;
bmpInfo = bmiHeader;
int width = ( vi->rcTarget.right - vi->rcTarget.left );
//! replace the data stride be the actual width
if ( width != 0 )
bmpInfo.biWidth = width;
}
else
{ // unsupported format }
}
DeleteMediaType( info );
return ret;
}
Upvotes: 0
Reputation: 2847
That's really ugly. Please don't do that. Insert a pass-through filter like the sample grabber instead (as I replied to your other post on the same topic). Connecting the sample grabber to the null renderer gets you the bits in a clean, safe way without rendering the image.
To get the stride, you need to get the media type, either through ISampleGrabber or IPin::ConnectionMediaType. The format block will be either a VIDEOINFOHEADER or a VIDEOINFOHEADER2 (check the format GUID). The bitmapinfo header biWidth and biHeight defines the bitmap dimensions (and hence stride). If the RECT is not empty, then that defines the relevant image area within the bitmap.
I'm going to have to wash my hands now after touching this post.
Upvotes: 5