/* * PROGRAM NAME: * ------------- * READOCR1.CPL * Concordance(tm) Information Retrieval System, Professional Edition * * * * COPYRIGHT: * ---------- * Copyright (c) 1996, 1997 Dataflight Software. * ALL RIGHTS RESERVED. * 2337 Roscomare Road, Suite 11 * Los Angeles, CA 90077 * * Unauthorized distribution, adaptation or use may be * subject to civil and criminal penalties. * * SYNOPSIS: * --------- * This program cycles through the current query and locates * the image field. It then translates the image into a filename * for the corresponding OCR text. It reads in the file and * places the content into the specified paragraph field. * * This revision is for databases that contain the FULL path, file name, * and extension for the OCR.TXT files. * * THIS IS THE CLIENT MODIFIED VERSION OF READOCR1.CPL, IT IS * NOT OFFICIALLY SUPPORTED BY DATAFLIGHT. */ /**************************************************************** * Global Variable Declarations and Initialization * ****************************************************************/ int TRUE = 1, FALSE = 0, CR = 13, EOF = -1, ESC = 27, LF = 10, TOTALCHAR = 0, COUNTFILE = 1, CLEARTEXT = 0, MAXBUFFERSIZE = 2048; /* Must be way less than 60000 */ /**************************************************************** * Name: RGB * * Synopsis: Helper routine for Windows color creation. * ****************************************************************/ RGB(int red, grn, blu) { return(((blu & 255) * 65536) | ((grn & 255) * 256) | (red & 255)); } /**************************************************************** * Name: Message * * Synopsis: Displays error message and waits for key. * ****************************************************************/ Message( text message; int wait ) { text screen; int key; cursoroff(); screen = save( 5, 13, 10, 69 ); box( 7, 13, 9, 69, "3U", RGB(0,0,150), RGB(0,0,255) ); puts( 8, 14, pad( message, 'C', 55 ), RGB(255,255,255), RGB(0,0,255)); if( wait ) { key = getkey(); restore( 5, 13, screen ); } if( islower( key )) key = key - ( 'a' - 'A' ); return( key ); } /* Message() */ /**************************************************************** * Name: FileName * * Synopsis: Trims the path from the file name. * ****************************************************************/ FileName(text name) { int i; while(i = match(name,"\",1)) name = substr(name,i+1); return(name); } /* FileName() */ /**************************************************************** * Name: Path * * Synopsis: Returns the path up to the last \ and without * * the file's name. * ****************************************************************/ Path(text dosPath) { int i, j; if ((i = match(dosPath,":",1)) == 0) i = match(dosPath,"\",1); while(j = match(dosPath,"\",i+1)) i = j; return(substr(dosPath,1,i)); } /* Path() */ /**************************************************************** * Name: GetField * * Synopsis: Prompt user for field name. * ****************************************************************/ GetField(int db, next) { int i, n; text field[101]; text screen; if (db.documents >= 0) { field[0] = "Field Type "; for(i = 1; i <= db.fields; i = i +1) switch(db.type[i]) { case 'T' : field[i] = pad(db.name[i],'L',13)+ "Text "; case 'P' : field[i] = pad(db.name[i],'L',13)+ "Paragraph"; case 'N' : field[i] = pad(db.name[i],'L',13)+ "Numeric "; case 'D' : field[i] = pad(db.name[i],'L',13)+ "Date "; } i = db.fields + 1; screen = save(11,30,21,57); while(i > db.fields) i = menu(11, 30, 21, 57, field, next,""); restore(11,30,screen); if (i) next = i; } return(next); } /* GetField() */ /**************************************************************** * Name: initScreen * * Synopsis: Clears the screen and puts copyright. * ****************************************************************/ initScreen() { cls(RGB(0,0,170)); puts(MaxRow_,0,pad("(C) Copyright Dataflight Software, Inc. 1996. All Rights Reserved.",'C',80)); } /**************************************************************** * Name: Greeting * * Synopsis: Says hello. * ****************************************************************/ Greeting(int isWindows){ text greeting, screen; greeting = newline() + pad(" OCR Text Import Program v1.0",'C',60) + newline() + pad(" -----------------------------",'C',60) + newline() + newline() + pad(" PURPOSE: It is designed to cycle through a database", 'L', 60) + newline() + pad(" query and translates the image key into a", 'L', 62) + pad(" filename and imports the appropriate OCR text", 'L', 62)+ newline() + newline() + pad(" NOTE: Originally designed for Merrill Corporation",'L',60)+ newline() + newline() + newline() + pad("Press [Enter] to continue or [Esc] to quit", 'C', 65); screen = save (5,8,19,72); if (isWindows) box(5, 8, 19, 72,"3U", RGB(190,190,190), RGB(128,128,128)); else box(5, 8, 19, 72,"S",MenuColor_,MenuColor_); edit(greeting ,6, 9, 18, 71,"@" ,1,"" ,0,0,MenuColor_); if (getkey() == ESC) { restore(5,8,screen); return(FALSE); } restore(5,8,screen); return(TRUE); } /**************************************************************** * Name: nextField * * Synopsis: Finds next available paragraph field * ****************************************************************/ nextField(int db, field) { int nextfield; nextfield = field + 1; while ((isfield(db, db.name[nextfield])) and (db.type[nextfield] <> 'P') and (nextfield < db.fields)) nextfield = nextfield + 1; if ((nextfield <= db.fields) and (isfield(db, db.name[nextfield]))) return(nextfield); else return(FALSE); } /**************************************************************** * Name: LoadtoField * * Synopsis: Reads in a document into the current record * ****************************************************************/ LoadtoField(int db, imageFh, logFh, textField) { float displaycount; int charcount, done, readchar; text buffer; char currentchar[2]; /* Initialize done and currentchar */ done = FALSE; currentchar[1] = 0; while (done == FALSE) { /* We assume that the file is already open */ /* Loop until EOF or read in 60000 characters */ while (((readchar = readc(imageFh)) <> EOF) and ((charcount = charcount + 1) <= 8000000)) { puts (0,0,str((count/1000.0),10,3)+"k"); /* Construct a string so we can append it */ currentchar[0] = readchar; /* Append the character */ db->textField = db->textField + currentchar; } /* If we read in to the end of file we are done */ if (readchar == EOF) return; /* If we read in 60000 characters, then go to next paragraph field */ /* increment paragraph field, if not done = TRUE, report in log file */ if ((textField = nextField(db, textField)) == FALSE) { logError(logFh, "Out of paragraph fields on record "+str(recno(db))); done = TRUE; } else { /* Don't forget the last character we read in */ currentchar[0] = readchar; db->textField = db->textField + currentchar; /* Reset the count to reflect the new field and the first character */ charcount = 1; } } } /**************************************************************** * Name: LoadtoFieldFast * * Synopsis: Reads in a document into the current record * ****************************************************************/ LoadtoFieldFast(int db, imageFh, logFh, textField) { float displaycount; int charcount, done, readchar, leftover; char buffer[MAXBUFFERSIZE]; /* Initialize done and currentchar */ done = FALSE; /* Following line removed so that textfield will not be cleared for each loop iteration in LoadFiles */ /* db->textField = ""; */ while (done == FALSE) { /* We assume that the file is already open */ /* Read in the file until EOF */ memset(buffer, 0, MAXBUFFERSIZE); charcount = charcount + read(imageFh, buffer, MAXBUFFERSIZE); /* If we read in 60000 characters, then go to next paragraph field */ /* increment paragraph field, if not done = TRUE, report in log file */ if ((charcount > 8000000) and (len(buffer) > 0)) { /* Put up to 60000 characters into the field */ leftover = 8000000 - len(db->textField); db->textField = db->textField + substr(buffer, 1, leftover); /* Increment the field */ if ((textField = nextField(db, textField)) == FALSE) { logError(logFh, "Out of paragraph fields on record "+str(recno(db))); done = TRUE; } /* Next field gets the left over info */ if (done == FALSE) { db->textField = substr(buffer, leftover + 1); charcount = len(db->textField); } } else { db->textField = db->textField + buffer; } /* If we read in to the end of file we are done */ if (len(buffer) < MAXBUFFERSIZE) done = TRUE; TOTALCHAR = TOTALCHAR + charcount; logError(logFh, "**** TOTALCHAR = " + str(TOTALCHAR) + " CHARCOUNT = " + str(charcount) + " ****"); } } /**************************************************************** * Name: LoadFiles * * Synopsis: Cycles through, loads OCR text * ****************************************************************/ LoadFiles(int db, imageField, textField; text path) { int logFh, imageFh, i, multiFiles, checkFiles, trackBreak, starthour, startminute, startsecond, endhour, endminute, endsecond; text szLogFile, imageFile, DBImageFieldCopy; time(starthour,startminute,startsecond); /* Check to ensure fields were selected and database is open */ if (db.documents <= 0) return(Message("Please open a database. ",TRUE)); if (imageField <= 0) return(Message("Please enter OCR Paths field. ",TRUE)); if (textField <= 0) return(Message("Please enter OCR field. ",TRUE)); if (db.type[textField] <> 'P') return(Message("OCR field must be type PARAGRAPH. ",TRUE)); /* Ask user to open a log file */ while (len(szLogFile) <= 0) { if (getfile("Create log file","*.LOG",szLogFile) == CR) { if ((logFh = open(szLogFile, "w+")) == EOF) { szLogFile = ""; } } else szLogFile = ""; } /* Set up a message box */ Message("",FALSE); /* Let's cycle through the database */ cycle(db) { /* Processing message */ puts( 8, 14, pad("Processing... Record "+str(docno(db)), 'C', 55 ), RGB(255,255,255), RGB(0,0,255)); /* trackBreak is used later to track where in the field the last ";" was found */ trackBreak = 0; checkFiles = 0; DBImageFieldCopy = trim(db->imageField); if (CLEARTEXT == 1) { db->textField = ""; } while(checkFiles == 0){ if (match(DBImageFieldCopy, ";", trackBreak + 1) <> 0) { /* Pull out contents of text string up to ; character */ logError(logFh,"trackbreak before: " + str(trackBreak) + newline()); DBImageFieldCopy = substr(DBImageFieldCopy, trackBreak + 1, match(DBImageFieldCopy, ";", trackBreak + 1) - 1); if (trackBreak <> 0) { DBImageFieldCopy = trim(substr(DBImageFieldCopy, 2, len(DBImageFieldCopy))); } logError(logFh,"After: " + DBImageFieldCopy + newline()); } else { if (trackBreak <> 0) { DBImageFieldCopy = trim(substr(DBImageFieldCopy, trackBreak + 2)); } else{ DBImageFieldCopy = trim(substr(DBImageFieldCopy, trackBreak + 1)); } /* Found last TXT file */ checkFiles = 1; } logError(logFh, "********DBImageFieldCopy = " + DBImageFieldCopy); /* Grab the image field */ if (i = match(DBImageFieldCopy, ".", 1)){ imageFile = substr(DBImageFieldCopy, 1, i - 1); imageFile = imageFile + ".TXT"; } else { imageFile = DBImageFieldCopy + ".TXT"; } logError(logFh,"imageFile : " + imageFile + newline()); /* Open the OCR text file */ if ((imageFh = open(imageFile, "r")) == EOF) { logError(logFh, "Could not open "+imageFile); } else { LoadtoFieldFast(db, imageFh, logFh, textField); db->textField = db->textField + newline() + "******************** END OF PAGE ********************" + newline(); logError(logFh, "Loaded "+imageFile+" successfully."); } logError(logFh, "checkFiles = " + str(checkFiles)); DBImageFieldCopy = trim(db->imageField); /* Move trackBreak to position of most recent ";" */ trackBreak = match(DBImageFieldCopy, ";", trackBreak + 1); logError(logFh, "TRACKBREAK = " + str(trackBreak) + newline()); logError(logFh, str(COUNTFILE) + " FILES COMPLETED. " + newline()); COUNTFILE = COUNTFILE + 1; close(imageFh); } /* close(imageFh); */ } time(endhour,endminute,endsecond); logError(logFh, "BEGAN PROCESS @ : " + str(starthour,2,0,'Z')+":"+str(startminute,2,0,'Z')+":"+str(startsecond,2,0,'Z')); logError(logFh, "FINISHED PROCESS @ : " + str(endhour,2,0,'Z')+":"+str(endminute,2,0,'Z')+":"+str(endsecond,2,0,'Z')); close(logFh); } /**************************************************************** * Name: logError * * Synopsis: Writes error info to log file * ****************************************************************/ logError(int logFh; text buffer) { writeln(logFh, buffer, len(buffer)); } /**************************************************************** * Name: Status * * Synopsis: Displays data base and program name. * ****************************************************************/ Status(int db, imagefield, textfield; text path) { int bg, fg, row, column; cursoroff(); cursor(0,0); bg = RGB(128,128,128); fg = RGB(255,255,255); row = 17; column = 21; box(row,column,row+15,column+33,"3U", fg, bg); puts(row+1,column+2,pad("Status",'C',30),fg,bg); puts(row+3,column+2,"Database:",fg,bg); puts(row+5,column+2,"Current Active Query:",fg,bg); puts(row+7,column+2,"Docs in Query:",fg,bg); puts(row+9,column+2,"Image field:",fg,bg); puts(row+11,column+2,"First OCR field:",fg,bg); puts(row+13,column+2,"Path:",fg,bg); if (db.documents >= 0) { puts(row+4,column+3,FileName(db.database),fg,bg); puts(row+6,column+3,str(db.activequery),fg,bg); puts(row+8,column+3,str(count(db)),fg,bg); puts(row+10,column+3,db.name[imagefield],fg,bg); puts(row+12,column+3,db.name[textfield],fg,bg); puts(row+14,column+3,path,fg,bg); } else { puts(row+4,column+3,"none",fg,bg); puts(row+6,column+3,"n/a",fg,bg); puts(row+8,column+3,"n/a",fg,bg); puts(row+10,column+3,"n/a",fg,bg); puts(row+12,column+3,"n/a",fg,bg); puts(row+14,column+3,"n/a",fg,bg); } } /**************************************************************** * Name: Main * * Synopsis: Entry point for all Concordance programs * ****************************************************************/ main() { char string[80]; char szDb[80]; /* Filename of database */ int isWindows; int finished, next; int db; /* Database handle */ int imagefield, textfield; text MenuItems[8], ImagePath; /* Clear the screen with a cool color. */ cursoroff(); ver(string); isWindows = TRUE; if (isWindows) initScreen(); else { Message("This CPL works for Windows. ",TRUE); return; } /* Popup the title screen */ if (Greeting(isWindows) == FALSE) return; /* Initialize the Menu */ MenuItems[0] = "OCR IMPORT MENU"; MenuItems[1] = "[O]pen a database"; MenuItems[2] = "[S]earch a database"; MenuItems[3] = "[I]mage field select"; MenuItems[4] = "O[C]R field select"; MenuItems[5] = "------------------"; MenuItems[6] = "[G]o!"; MenuItems[7] = "[Q]UIT"; /* Start the menu loop */ while(finished == FALSE) { /* Keep the status of this program */ Status(db, imagefield, textfield, ImagePath); switch(next = menu(6, 25, 16, 51, MenuItems, next,"OSICDGQ")) { case 0: /* Escape menu option */ if (Message("Really quit (Y/N)?", TRUE) == 'Y') return; break; case 1: /* Open a database. */ if (getfile('Database',"*.DCB",szDb) == CR) { closedb(db); if ((db = opendb(szDb)) < 0) Message("Cannot open database. ",TRUE); } next = 1; break; case 2: /* Search the database */ if (db<>EOF) { searchfs(db,""); initScreen(); } else Message("Open a database first. ",TRUE); next = 2; break; case 3: /* Select the image field */ imagefield = GetField(db, 1); next = 3; break; case 4: /* OCR field select */ textfield = GetField(db, 1); next = 4; break; case 6: /* Start importing */ if (messageBox("Do you wish to clear the text fields of existing text before processing?","Clear Text",MB_YESNO) == IDYES) { CLEARTEXT = 1; } LoadFiles(db, imagefield, textfield, ImagePath); initScreen(); break; case 7: /* Return to Concordance */ finished = TRUE; break; } } }