tfzxyinhao
tfzxyinhao

Reputation: 354

how to extract image from pdf using mupdf?

I want to extract image from pdf and save the image handle to std::vector,some time the background is incorrect,my code is follow.

    BOOL CTextEditorDoc::loadImage()
{
    if(m_strPDFPath.IsEmpty())
        return FALSE;

    CString strFile;
    fz_context *ctx;
    fz_document* doc;

    fz_device *dev;

    fz_irect bbox;
    fz_rect bounds;
    fz_matrix ctm;
    fz_pixmap *image;
    fz_colorspace *colorspace;

    int i,j,rotation = 0;
    int pagecount = 0;
    fz_page *page;

    BITMAPINFO bmi;
    HBITMAP hBitmap;
    LPBYTE pDest,pImage;

    if(!gb2312toutf8(m_strPDFPath,strFile))
        return FALSE;

    ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
    fz_try(ctx){
        doc = fz_open_document(ctx,strFile.GetBuffer(0));
    }fz_catch(ctx){
        fz_free_context(ctx);
        return FALSE;
    }

    fz_rotate(&ctm, rotation);
    colorspace = fz_device_rgb(ctx);
    pagecount = fz_count_pages(doc);

    pDest = NULL;
    ::ZeroMemory(&bmi, sizeof(BITMAPINFO));
    bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
    bmi.bmiHeader.biCompression = BI_RGB;
    bmi.bmiHeader.biPlanes = 1;
    bmi.bmiHeader.biBitCount = 32;
    bmi.bmiHeader.biWidth = 180;
    bmi.bmiHeader.biHeight = -180;
    bmi.bmiHeader.biSizeImage = 180*180*4;

    for(i=0;i<pagecount;i++){
        page = fz_load_page(doc,i);

        if(i == 0){
            fz_bound_page(doc,page,&bounds);
            fz_pre_scale(&ctm,180/(bounds.x1 - bounds.x0),180/(bounds.y1 - bounds.y0));
            fz_transform_rect(&bounds, &ctm);
            fz_round_rect(&bbox, &bounds);
        }

        image = fz_new_pixmap_with_bbox(ctx,colorspace,&bbox);
        dev = fz_new_draw_device(ctx,image);

        fz_try(ctx){
            fz_run_page(doc,page,dev,&ctm,NULL);
        }fz_catch(ctx){
            fz_drop_pixmap(ctx,image);
            fz_free_device(dev);
            fz_free_page(doc, page);
            continue;
        }

        pImage = image->samples;
        if(pImage){
            pDest = NULL;
            hBitmap = ::CreateDIBSection(NULL,&bmi,DIB_RGB_COLORS,(void**)&pDest,NULL,0);
            ASSERT(hBitmap);

            if(image->n == 2){ //not pallet
                for (j = 180* 180; j > 0 ; j--){
                    pDest[0] = pDest[1] = pDest[2] = *pImage++;
                    pDest[3] = *pImage++;
                    pDest += 4;
                }
            }else if(image->n == 4){
                //memcpy(pDest,pImage,m_thumbWidth * m_thumbHeight*4);
                for (j = 180* 180; j > 0 ; j--){
                    pDest[0] = *pImage++;
                    pDest[1] = *pImage++;
                    pDest[2] = *pImage++;
                    pDest[3] = *pImage++;
                    pDest += 4;
                }
            }else ASSERT(FALSE);

            m_imageVector.push_back(hBitmap);// save it to std::vector
        }

        fz_drop_pixmap(ctx,image);
        fz_free_device(dev);
        fz_free_page(doc, page);
    }
    
    fz_close_document(doc);
    fz_free_context(ctx);
    return TRUE;
}

this code can extract all the image of pdf but it maybe too slow ,how to improve it ? some time the image's background is incorrect?

left of the follow picture is incorrect,right of the follow picture is real.

screentshot

Upvotes: 1

Views: 3181

Answers (1)

Bernhard
Bernhard

Reputation: 2809

Like in the example of http://mupdf.com/docs/example.c

you forgot

fz_clear_pixmap_with_value(ctx, pix, 0xff);

to white the pixmap

Upvotes: 2

Related Questions