OCR Images Using Microsoft Office 2003 SDK

Universal Document Converter is virtual printer software that saves any document you print as a raster PDF or an image file. You can use a post-print feature of Universal Document Converter to apply additional processing to every output file. The example below is just one of many post-print processing solutions.

// Microsoft Office Document Imaging Library (MODI) which is
// pupplied with the Office 2003 package, allows you easily integrate
// OCR functionality into your own applications. For example, you can
// use recognized text for indexing documents in your database.
// Important notice: MS Office 2000 or XP does not contain MODI!
//
// 1) Open your project in Microsoft Visual C++ 6.0
//
// 2) Press "Ctrl+W" in Visual C++ to open ClassWizard
//
// 3) In ClassWizard window press "Add Class->From a type library"
//    button and select "MDIVWCTL.DLL" file. By default this file
//    should be in folder:
//    "C:\Program Files\Common Files\Microsoft Shared\MODI\11.0"
//
// 4) Save files "mdivwctl.h" and "mdivwctl.cpp" and add into
//    your "stdafx.h" file this include: #include "mdivwctl.h"
//
// 5) You must initialize the COM before you call any COM method.
//    Please call "::CoInitialize(0);" before using COM and
//    ::CoUninitialize(); after using COM.

#include "mdivwctl.h"

enum MiLANGUAGES 
{ 
  miLANG_CHINESE_SIMPLIFIED = 2052,
  miLANG_CHINESE_TRADITIONAL = 1028,
  miLANG_CZECH = 5,
  miLANG_DANISH = 6, 
  miLANG_DUTCH = 19,
  miLANG_ENGLISH = 9,
  miLANG_FINNISH = 11,
  miLANG_FRENCH = 12,
  miLANG_GERMAN = 7,
  miLANG_GREEK = 8, 
  miLANG_HUNGARIAN = 14,
  miLANG_ITALIAN = 16,
  miLANG_JAPANESE = 17,
  miLANG_KOREAN = 18,
  miLANG_NORWEGIAN = 20,
  miLANG_POLISH = 21,
  miLANG_PORTUGUESE = 22,
  miLANG_RUSSIAN = 25,
  miLANG_SPANISH = 10,
  miLANG_SWEDISH = 29,
  miLANG_SYSDEFAULT = 2048,
  miLANG_TURKISH = 31
};

enum MiFILE_FORMAT
{
  miFILE_FORMAT_DEFAULTVALUE = -1,
  miFILE_FORMAT_TIFF = 1,
  miFILE_FORMAT_TIFF_LOSSLESS = 2, 
  miFILE_FORMAT_MDI = 4
};

enum MiCOMP_LEVEL 
{
  miCOMP_LEVEL_LOW = 0,
  miCOMP_LEVEL_MEDIUM = 1, 
  miCOMP_LEVEL_HIGH = 2
};

BOOL OCRImageFile( CString sImgFilePath, CString sOutFilePath )
{
  IDocument *pDoc = new IDocument;
  pDoc->CreateDispatch( "MODI.Document" );

  pDoc->Create( sImgFilePath );
  pDoc->OCR( miLANG_ENGLISH, 0, 0 );
////
  FILE    *fpOut = fopen( sOutFilePath, "wt" );
  IImages images = pDoc->GetImages();
  long	  num = images.GetCount();

  for( int i = 0; i < num; i++ )
  {
    IImage  image = images.GetItem(i);
    ILayout layout = image.GetLayout();

    fprintf( fpOut, " --< page %d of %d begin >--\n", i + 1, num );
    fprintf( fpOut, layout.GetText() );
    fprintf( fpOut, " --< page %d of %d end >--\n", i + 1, num );
  }
////
  pDoc->Close(0);

  pDoc->ReleaseDispatch();
  delete pDoc;

  fclose( fpOut );
////
  return (num > 0) ? TRUE : FALSE;
}


  • Thomas Vass

    Owner of The Private Capital Market Crowd Funding Website

    «I have used Universal Document Converter since the very first versions as my sole tool for making document conversions. It works flawlessly and is very user-friendly. I recommend the product for all business applications.»