/* * PROGRAM NAME: * ------------- * READOCR.CPL * Concordance(tm) Information Retrieval System, Professional Edition * * COPYRIGHT: * ---------- * Copyright (c) 1996 Dataflight Software. * All Rights Reserved. * 2337 Roscomare Road, Suite 11 * Los Angeles, CA 90077 * * ALL RIGHTS RESERVED. * * Unauthorized distribution, adaptation or use may be * subject to civil and criminal penalties. * * SYNOPSIS: * --------- * This program cycles through the current query and locates * the image field. It then translates the image into a filename * for the corresponding OCR text. It reads in the file and * places the content into the specified paragraph field. */ /**************************************************************** * Global Variable Declarations and Initialization * ****************************************************************/ int TRUE = 1, FALSE = 0, CR = 13, EOF = -1, ESC = 27, LF = 10, MAXBUFFERSIZE = 2048, /* Must be way less than 65000 */ MAXPARAGRAPH = 8000000; /**************************************************************** * Name: RGB * * Synopsis: Helper routine for Windows color creation. * ****************************************************************/ RGB(int red, grn, blu) { return(((blu & 255) * 65536) | ((grn & 255) * 256) | (red & 255)); } /**************************************************************** * Name: Message * * Synopsis: Displays error message and waits for key. * ****************************************************************/ Message( text message; int wait ) { text screen; int key; cursoroff(); screen = save( 5, 13, 10, 69 ); box( 7, 13, 9, 69, "3U", RGB(0,0,150), RGB(0,0,255) ); puts( 8, 14, pad( message, 'C', 55 ), RGB(255,255,255), RGB(0,0,255)); if( wait ) { key = getkey(); restore( 5, 13, screen ); } if( islower( key )) key = key - ( 'a' - 'A' ); return( key ); } /* Message() */ /**************************************************************** * Name: FileName * * Synopsis: Trims the path from the file name. * ****************************************************************/ FileName(text name) { int i; while(i = match(name,"\",1)) name = substr(name,i+1); return(name); } /* FileName() */ /**************************************************************** * Name: Path * * Synopsis: Returns the path up to the last \ and without * * the file's name. * ****************************************************************/ Path(text dosPath) { int i, j; if ((i = match(dosPath,":",1)) == 0) i = match(dosPath,"\",1); while(j = match(dosPath,"\",i+1)) i = j; return(substr(dosPath,1,i)); } /* Path() */ /**************************************************************** * Name: GetField * * Synopsis: Prompt user for field name. * ****************************************************************/ GetField(int db, next) { int i, n; text field[255]; text screen; if (db.documents >= 0) { field[0] = "Field Type "; for(i = 1; i <= db.fields; i = i +1) switch(db.type[i]) { case 'T' : field[i] = pad(db.name[i],'L',13)+ "Text "; case 'P' : field[i] = pad(db.name[i],'L',13)+ "Paragraph"; case 'N' : field[i] = pad(db.name[i],'L',13)+ "Numeric "; case 'D' : field[i] = pad(db.name[i],'L',13)+ "Date "; } i = db.fields + 1; screen = save(11,30,21,57); while(i > db.fields) i = menu(11, 30, 21, 57, field, next,""); restore(11,30,screen); if (i) next = i; } return(next); } /* GetField() */ /**************************************************************** * Name: initScreen * * Synopsis: Clears the screen and puts copyright. * ****************************************************************/ initScreen() { cls(RGB(0,0,170)); puts(MaxRow_,0,pad("(C) Copyright Dataflight Software, Inc. 1996-1999. All Rights Reserved.",'C',80)); } /**************************************************************** * Name: Greeting * * Synopsis: Says hello. * ****************************************************************/ Greeting(int isWindows){ text greeting; greeting = "This program cycles through a database " + newline() + "query, translates the image key into a " + newline() + "file name, and imports the matching OCR " + newline() + "text file into the record."; messageBox(greeting, "Read OCR Utility 2.0", 0); return(TRUE); } /**************************************************************** * Name: nextField * * Synopsis: Finds next available paragraph field * ****************************************************************/ nextField(int db, field) { int nextfield; nextfield = field + 1; while ((isfield(db, db.name[nextfield])) and (db.type[nextfield] <> 'P') and (nextfield < db.fields)) nextfield = nextfield + 1; if ((nextfield <= db.fields) and (isfield(db, db.name[nextfield]))) return(nextfield); else return(FALSE); } /**************************************************************** * Name: LoadtoFieldFast * * Synopsis: Reads in a document into the current record * ****************************************************************/ LoadtoFieldFast(int db, imageFh, logFh, textField) { float displaycount; int charcount, done, readchar, leftover, length, i; char buffer[MAXBUFFERSIZE]; /* Get a count of the characters in the file. */ charcount = lseek(imageFh, 0, 'E'); lseek(imageFh, 0, 'B'); /* Allocate that space in the field. */ db->textField = rep(' ', charcount); /* Read the entire file into the field. */ read(imageFh, db->textField, charcount); /* Check for that nasty DOS end of file character, CTRL-Z. */ if (len(db->textField) > 0) { /* Is the last character a CTRL-Z? */ if (asc(addr(db->textField, charcount + 1)) == 26) /* Delete the end of file character, we don't want it. */ deleteText(db->textField, charcount + 1, 1); } /* We're done. Return. */ while (done == FALSE) { /* We assume that the file is already open */ /* Read in the file until EOF */ memset(buffer, 0, MAXBUFFERSIZE); charcount = charcount + read(imageFh, buffer, MAXBUFFERSIZE); /* If we read in MAXPARAGRAPH characters, then go to next paragraph field */ /* increment paragraph field, if not done = TRUE, report in log file */ if ((charcount > MAXPARAGRAPH) and (len(buffer) > 0)) { /* Look for the next line. */ /* Calculate how much we can fit */ wrap(buffer, 80); i = findnline(buffer, 1, length); leftover = i - 1; /* If there was no next line then just add everything. */ if (leftover < 0) { db->textField = db->textField + buffer; break; } /* Put up to MAXPARAGRAPH characters into the field */ db->textField = db->textField + substr(buffer, 1, leftover); /* Increment the field */ if ((textField = nextField(db, textField)) == FALSE) { logError(logFh, "Out of paragraph fields on record "+str(recno(db))); done = TRUE; } /* Next field gets the left over info */ if (done == FALSE) { db->textField = substr(buffer, leftover + 1); charcount = len(db->textField); } } else { db->textField = db->textField + buffer; } /* If we read in to the end of file we are done */ if (len(buffer) < MAXBUFFERSIZE) done = TRUE; } return; } /**************************************************************** * Name: LoadFiles * * Synopsis: Cycles through, loads OCR text * ****************************************************************/ LoadFiles(int db, imageField, textField; text path) { int logFh, imageFh, i, j; text szLogFile, imageFile, szImageFile; /* Check to ensure fields were selected and database is open */ if (db.documents <= 0) return(Message("Please open a database. ",TRUE)); if (imageField <= 0) return(Message("Please enter key field. ",TRUE)); if (textField <= 0) return(Message("Please enter OCR field. ",TRUE)); if (db.type[textField] <> 'P') return(Message("OCR field must be type PARAGRAPH. ",TRUE)); if (len(path) <= 0) return(Message("Please enter path. ",TRUE)); /* Ask user to open a log file */ while (len(szLogFile) <= 0) { if (getfile("Create log file","*.LOG",szLogFile) == CR) { if ((logFh = open(szLogFile, "w+")) == EOF) { szLogFile = ""; } } else szLogFile = ""; } /* Set up a message box */ Message("",FALSE); /* Let's cycle through the database */ cycle(db) { /* Processing message */ puts( 8, 14, pad("Processing... Record "+str(docno(db)), 'C', 55 ), RGB(255,255,255), RGB(0,0,255)); /* Grab the image field */ if ((db.type[imageField] == 'P') or (db.type[imageField] == 'T')) { if (i = match(db->imageField, ".", 1)) { /* Make sure this is the last period(.) */ while ((j = match(db->imageField, ".", i + 1)) <> 0) i = j; imageFile = substr(trim(db->imageField), 1, i - 1); szImageFile = imageFile; imageFile = imageFile + ".TXT"; } else { szImageFile = trim(db->imageField); imageFile = trim(db->imageField) + ".TXT"; } } else { if (db.places[imageField]) szImageFile = imageFile = str(db->imageField, db.length[imageField], db.places[imageField], 'Z'); else { szImageFile = imageFile = str(db->imageField, db.length[imageField], db.places[imageField], 'Z'); imageFile = str(db->imageField, db.length[imageField], db.places[imageField], 'Z') + ".TXT"; } } imageFile = path + imageFile; /* Open the OCR text file */ if ((imageFh = open(imageFile, "r")) == EOF) { /* The .txt file is not present. Try to open it as an .rtf file. */ if ((imageFh = open(imageFile = szImageFile+".rtf", "r")) == EOF) { logError(logFh, "Could not open "+imageFile + " as .txt or .rtf"); } } if (imageFh <> EOF) { LoadtoFieldFast(db, imageFh, logFh, textField); logError(logFh, "Loaded "+imageFile+" successfully."); close(imageFh); } } /* Close the log file */ close(logFh); } /**************************************************************** * Name: logError * * Synopsis: Writes error info to log file * ****************************************************************/ logError(int logFh; text buffer) { writeln(logFh, buffer, len(buffer)); } /**************************************************************** * Name: Status * * Synopsis: Displays data base and program name. * ****************************************************************/ Status(int db, imagefield, textfield; text path) { int bg, fg, row, column; cursoroff(); cursor(0,0); bg = RGB(128,128,128); fg = RGB(255,255,255); row = 17; column = 21; box(row,column,row+15,column+33,"3U", fg, bg); puts(row+1,column+2,pad("Status",'C',30),fg,bg); puts(row+3,column+2,"Database:",fg,bg); puts(row+5,column+2,"Current Active Query:",fg,bg); puts(row+7,column+2,"Docs in Query:",fg,bg); puts(row+9,column+2,"Image field:",fg,bg); puts(row+11,column+2,"First OCR field:",fg,bg); puts(row+13,column+2,"Path:",fg,bg); if (db.documents >= 0) { puts(row+4,column+3,FileName(db.database),fg,bg); puts(row+6,column+3,str(db.activequery),fg,bg); puts(row+8,column+3,str(count(db)),fg,bg); puts(row+10,column+3,db.name[imagefield],fg,bg); puts(row+12,column+3,db.name[textfield],fg,bg); puts(row+14,column+3,path,fg,bg); } else { puts(row+4,column+3,"none",fg,bg); puts(row+6,column+3,"n/a",fg,bg); puts(row+8,column+3,"n/a",fg,bg); puts(row+10,column+3,"n/a",fg,bg); puts(row+12,column+3,"n/a",fg,bg); puts(row+14,column+3,"n/a",fg,bg); } } /**************************************************************** * Name: Main * * Synopsis: Entry point for all Concordance programs * ****************************************************************/ main() { char string[80]; char szDb[80]; /* Filename of database */ int isWindows; int finished, next; int db; /* Database handle */ int imagefield, textfield; text MenuItems[8], ImagePath; if (ver() >= 7.0) MAXPARAGRAPH = 8000000; else MAXPARAGRAPH = 60000; /* Clear the screen with a cool color. */ cursoroff(); ver(string); isWindows = 1; initScreen(); /* Popup the title screen */ if (Greeting(isWindows) == FALSE) return; /* Initialize the Menu */ MenuItems[0] = "OCR IMPORT MENU"; MenuItems[1] = "[O]pen a database"; MenuItems[2] = "[S]earch a database"; MenuItems[3] = "[I]mage field select"; MenuItems[4] = "O[C]R field select"; MenuItems[5] = "[D]irectory of OCR text"; MenuItems[6] = "[G]o!"; MenuItems[7] = "[Q]UIT"; /* Start the menu loop */ while(finished == FALSE) { /* Keep the status of this program */ Status(db, imagefield, textfield, ImagePath); switch(next = menu(6, 25, 16, 51, MenuItems, next,"OSICDGQ")) { case 0: /* Escape menu option */ if (Message("Really quit (Y/N)?", TRUE) == 'Y') return; break; case 1: /* Open a database. */ if (getfile('Database',"*.DCB",szDb) == CR) { closedb(db); if ((db = opendb(szDb)) < 0) Message("Cannot open database. ",TRUE); } next = 1; break; case 2: /* Search the database */ if (db<>EOF) { searchfs(db,""); initScreen(); } else Message("Open a database first. ",TRUE); next = 2; break; case 3: /* Select the image field */ imagefield = GetField(db, 1); next = 3; break; case 4: /* OCR field select */ textfield = GetField(db, 1); next = 4; break; case 5: /* OCR Directory */ if (getfile("OCR Text Directory","*.*",ImagePath, OFN_NOVALIDATE | OFN_PATHMUSTEXIST) == CR) ImagePath = Path(ImagePath); break; case 6: /* Start importing */ LoadFiles(db, imagefield, textfield, ImagePath); initScreen(); break; case 7: /* Return to Concordance */ finished = TRUE; break; } } }