/************************************************************
  FILE: findkeys.cc
  K.Becker
  November 10 1999

  Program to show how to search hash tables created using 
  various collision resolution techniques.

  WARNING: input error checking is minimal

  Expected arguments:
  1: table size: -t <n>

  2: collision resolution technique (c.r.t.) to use
        -lp  = linear probing (default)
        -dh1 = double hashing (type 1: hash twice)
        -dh2 = double hashing (type 2: vary increment)
        -co  = chained overflow (2-pass)
	-sc  = simple chaining (probes next, step=1)
        -ch  = coalesced hashing (probes bottom-up)
        -dc  = direct chaining (dynamic)
        -cc  = computed chaining (dynamic)
        -bm  = brent's method (dynamic)
        -bti = binary tree insertion (dynamic)

  3: (optional) debug flag: -d 1 | 2 | 3
        [0] no trace (default)
	[1] trace probe chains > 1
	[2] trace all keys
	[3] trace all keys AND print table at start

  4: file containing loaded hash table

  5: file containing keys to be searched for

-----------------------------------------------------------
      TABLE OF CONTENTS..

******* Admin Stuff ****************************
void help( )                 : displays simple man page
int pl ( int ln )            : convert link value to print 
                                 (so null link looks reasonable)
void displaytable( int Table[] )
                             :  Draw the Hash Table
void displaytablelinks( int Table[], int Links[] )
                             : Draw the Hash Table (with associated links)
int crtcode( char* crttype ) : convert (text) code for crt to number 
                                  [see constants for mapping]
void getargs (int argc, char* argv[], 
	      char*& fname, int& size, int& debug, int& crt )
                             : Get the Command Line Arguments
                             : should be crt -t <n> -crt -d <n> fname

******* Hashing Stuff ****************************
int hash( int key )          : the first hash function
int hash2( int key )         : the secondary hash function
int step( int key )          : calculates the step value 
                                     (incrementing function)
int next ( int loc, int stepsize ) 
                             : calculate the next location in the table
                                      with wrap-around
int prev ( int loc, int stepsize )
                             : calculate the next location in the table
                                     going backwards (with wrap-around)

******* Searching Routines ****************************

int find_pseudolinks ( int key, int loc, int stepsize )
  // Chaining using Pseudolinks
  //      Computed Chaining

int find_DH1 ( int key, int loc, int stepsize )
  // Double Hashing Type 1

int find_open ( int key, int loc, int stepsize )
  // Open Addressing Schemes that vary step size:
  //      Double Hashing Type 2
  //      Brent Method
  //      Binary Tree Insertion

int find_chains ( int key, int loc, int stepsize )
  // Chained Addressing Schemes
  //      Chained Overflow
  //      Simple Chaining
  //      Coalesced Hashing
  //      Direct Chaining

int find_pseudolinks ( int key, int loc, int stepsize )
  // Chaining using Pseudolinks
  //      Computed Chaining

**********************************************************/

#include <stdio.h>
#include <iostream.h>
#include <iomanip.h>
#include <stdlib.h>
#include <math.h>
#include <limits.h>
#include <fstream.h>
#include <string.h>

//--- GENERAL CONSTANTS-----------------------------------
const int EMPTY     = INT_MAX;
const int YES       = 1;
const int NO        = 0;

//--- COLLISION RESOLUTION TECHNIQUES---------------------
const int LP = 0;            // Linear Probing
const int DH1 = 1;           // Double Hashing Type 1
const int DH2 = 2;           // Double Hashing Type 2
const int CO = 3;            // Chained Overflow
const int SC = 9;            // Simple Chaining
const int CH = 4;            // Coalesced Hashing
const int DC = 5;            // Direct Chaining
const int CC = 6;            // Computed Chaining
const int BM = 7;            // Brent's Method
const int BTI = 8;           // Binary Tree Insertion

//--- GLOBALLY USED VARIABLES----------------------------

// 'generic' search function
int (*nextlocation) (int key, int location, int stepsize);


int AddrSpace = 29;          // useable addresses (default value)

int* Table;                  // the hash table itself
int* Links;                  // for links if required

int Nkeys = 0;               // count of keys actually searched for
int probes = 0;              // probe count for one key

int nkeys_found = 0;         // count of keys found in table
int nkeys_not_found = 0;     // count of keys NOT found
int tprobes_found = 0;       // sum of all probes for keys found
int tprobes_not_found = 0;   // sum of probes for keys NOT found

float apl_found = 0.0;       // ave probe length
float apl_not_found = 0.0;
float apl_overall = 0.0;

int key;                     // key to be found (incoming key)
int sval;                    // step value for incoming key

int debug = 0;               // debug flag (command line)
int crt   = LP;              // collision resolution technique that was used

int haslinks = NO;           // set if c.r.t. uses links

char* hashname;              // name of file containing hash table
fstream hashfile;            // file with keys to place

char* keyname;              // name of file containing keys to find
fstream keyfile;            // file with keys to look for


/*********************************************************/
/***** admin stuff  **************************************/
/*********************************************************/

void help( )
{
  cout << "Program to show how to search hash tables created using " << endl;
  cout << "  various collision resolution techniques." << endl << endl;
  cout << "Usage: findkeys [-t <n>] [-<crt>] [-d <n>] f1 f2" << endl;
  cout << "       WARNING: input error checking is minimal" << endl;
  cout << endl;
  cout << "  Expected arguments:" << endl;
  cout << "  1: table size: " << endl;
  cout << "     -t <n>" << endl;
  cout << endl;
  cout << "  2: collision resolution technique (c.r.t.) to use" << endl;
  cout << "        -lp  = linear probing (default)" << endl;
  cout << "	-dh1 = double hashing - type 1: hash twice" << endl;
  cout << "	-dh2 = double hashing - type 2: vary increment" << endl;
  cout << "	-co  = chained overflow (2-pass)" << endl;
  cout << "        -sc  = simple chaining (probes next, step=1)" << endl;
  cout << "	-ch  = coalesced hashing (probes bottom-up)" << endl;
  cout << "	-dc  = direct chaining (dynamic)" << endl;
  cout << "	-cc  = computed chaining (dynamic)" << endl;
  cout << "	-bm  = brent's method" << endl;
  cout << "	-bti = binary tree insertion" << endl;
  cout << endl;
  cout << "  3: (optional) debug flag: -d 1 | 2 | 3" << endl;
  cout << "        [0] no trace (default)" << endl;
  cout << "	[1] trace all probes > 1" << endl;
  cout << "	[2] trace all keys" << endl;
  cout << "	[3] trace all keys and print table at start" << endl;
  cout << endl;
  cout << "  4: file containing loaded hash table" << endl;
  cout << endl;
  cout << "  5: file containing keys to look for" << endl;
  cout << endl;

} // help

//-------------------------------------------------------//
int pl ( int ln )
{
  // convert link value to print (so null link looks reasonable)
  if (ln == EMPTY)
    return -1;
  else
    return ln;

} // pl

//-------------------------------------------------------//
void displaytable( int Table[] )
  //
  // Draw the Hash Table
{
  int i;
  
  for (i = 0; i < AddrSpace; i++)
    if (Table[i] == EMPTY)
      cout << setw(3) << i << ": ---"
	   << endl;
    else
      cout << setw(3) << i << ": "
	   << setw(3) << Table [i]
	   << endl;

  cout << endl;
  return;
} // displaytable

//-------------------------------------------------------//
void displaytablelinks( int Table[], int Links[] )
  //
  // Draw the Hash Table (with associated links)
{
  int i;
  
  for (i = 0; i < AddrSpace; i++)
    if (Table[i] == EMPTY)
      cout << setw(3) << i << ": ---"
	   << "  " << setw(3) << pl(Links [i])
	   << endl;
  else
    cout << setw(3) << i << ": "
	 << setw(3) << Table [i]
	 << "  " << setw(3) << pl(Links [i])
	 << endl;

  cout << endl;
  return;
} // displaytablelinks

//-------------------------------------------------------//
int crtcode( char* crttype )
{
  // convert (text) code for collision resolution technique
  // to number [see constants for mapping]

  if ( strcmp( crttype, "-lp" ) == 0 )
    {
      cout << "Using Linear Probing..." << endl;
      return LP;
    }
  else if ( strcmp( crttype, "-dh1" ) == 0 )
    {
      cout << "Using Double Hashing Type 1 (hash twice)..." << endl;
      return DH1;
    }
  else if ( strcmp( crttype, "-dh2" ) == 0 )
    {
      cout << "Using Double Hashing Type 2 (vary increment)..." << endl;
      return DH2;
    }
  else if ( strcmp( crttype, "-co" ) == 0 )
    {
      cout << "Using Chained Overflow..." << endl;
      haslinks = YES;
      return CO;
    }
  else if ( strcmp( crttype, "-sc" ) == 0 )
    {
      cout << "Using Simple Chaining..." << endl;
      haslinks = YES;
      return SC;
    }
  else if ( strcmp( crttype, "-ch" ) == 0 )
    {
      cout << "Using Coalesced Hashing (probes bottom up).." << endl;
      haslinks = YES;
      return CH;
    }
  else if ( strcmp( crttype, "-dc" ) == 0 )
    {
      cout << "Using Direct Chaining" << endl;
      haslinks = YES;
      return DC;
    }

  else if ( strcmp( crttype, "-cc" ) == 0 )
    {
      cout << "Using Computed Chaining..." << endl;
      haslinks = YES;
      return CC;
    }
  else if ( strcmp( crttype, "-bm" ) == 0 )
    {
      cout << "Using Brent's Method..." << endl;
      return BM;
    }
  else if ( strcmp( crttype, "-bti" ) == 0 )
    {
      cout << "Using Binary Tree Insertion..." << endl;
      return BTI;
    }
  else
    {
      cout << "Using Default..." << endl;
      return LP;
    }

} // crtcode

//-------------------------------------------------------//
void getargs (int argc, char* argv[], 
	      char*& fname1,
	      char*& fname2,
	      int& size, int& debug, int& crt )
  //
  // Get the Command Line Arguments
  //
  // should be crt -t <n> -crt -d <n> f1 f2
{
  if (argc > 1) // table size is first
    size = atoi( argv[2]);

  if (argc > 3) // c.r.t. MUST be next
    crt = crtcode(argv[3]);

  if ((argc > 4) && (strcmp(argv[4],"-d") == 0)) // -d optional
    // debug code arg4= "-d"; arg5= debug setting
    // convert char digit to number
    debug = atoi( argv[5] );

  fname1 = argv[argc-2]; // second last is hash table
  fname2 = argv[argc-1]; // last is keys to find

  return;
} // getargs

/*********************************************************/
/***** Hashing Routines **********************************/
/*********************************************************/

// IMPORTANT: these must be the exact same hashing routines 
//            as used to place the keys

int hash( int key )
  //
  // the first hash function
{ 
  int h;
  h = key % AddrSpace; // simple algorithm
  
  /*****/
  if (debug > 1)
    cout << "Key " << key
	 << " hashes to " << h
	 << endl;
  /*****/
  return h;
} // hash

//-------------------------------------------------------//

int hash2( int key )
  //
  // the secondary hash function
{ 
  int h;
  h = key*key % AddrSpace; // simple algorithm
  
  /*****/
  if (debug > 1)
    cout << "Key " << key
	 << " now hashes to " << h
	 << endl;
  /*****/
  return h;
} // hash2

//-------------------------------------------------------//

int step( int key )
  //
  // calculates the step value (incrementing function)
{ 
  int s;

  if (key == EMPTY)
    s = 0;
  else
    {
      s = (int)(key/AddrSpace) % AddrSpace; // simple algorithm
      if (s == 0) // can't allow a step size of 0
	s = 1;
    }
  
  /*****/
  if ((debug > 1) && (key != EMPTY))
    cout << "     Key " << key
	 << " step size is " << s
	 << endl;
  /*****/
  return s;
} // step

//-------------------------------------------------------//
int next ( int loc, int stepsize )
{
  // calculate the next location in the table
  // with wrap-around 
  // wraps as often as necessary mostly for Computed Chaining
  //   which could have reeely big steps
  int v;

  v = loc + stepsize;
  while ( v >= AddrSpace )
    v -= AddrSpace;
  return v;

} // next

//-------------------------------------------------------//
int prev ( int loc, int stepsize )
{
  // calculate the next location in the table
  // going backwards (with wrap-around)
  int v;

  v = loc - stepsize;
  while ( v < 0 )
    v += AddrSpace;
  return v;

} // prev

/*********************************************************/
/******* Search Algorithms ****************************/
/*********************************************************/
int find_LP ( int key, int loc, int stepsize )
{
  // Linear Probing
  // find the next location in the table

  return next( loc, 1);

} // find_LP

//-------------------------------------------------------//
int find_DH1 ( int key, int loc, int stepsize )
{
  // Double Hashing Type 1
  // find the next location in the table

  if (probes == 2)
    return hash2( key );
  else
    return next( loc, 1);

} // find_DH1

//-------------------------------------------------------//
int find_open ( int key, int loc, int stepsize )
{
  // Open Addressing Schemes that vary step size:
  //      Double Hashing Type 2
  //      Brent Method
  //      Binary Tree Insertion
  //
  // find the next location in the table

  return next( loc, stepsize );

} // find_open

//-------------------------------------------------------//
int find_chains ( int key, int loc, int stepsize )
{
  // Chained Addressing Schemes
  //      Chained Overflow
  //      Simple Chaining
  //      Coalesced Hashing
  //      Direct Chaining
  //
  // find the next location in the table

  return Links[ loc ];

} // find_chains

//-------------------------------------------------------//
int find_pseudolinks ( int key, int loc, int stepsize )
{
  // Chaining using Pseudolinks
  //      Computed Chaining
  //
  // find the next location in the table

  if (Links[loc] != EMPTY)
    return next( loc, (Links[loc]*step(Table[loc])));
  else
    return EMPTY;

} // find_pseudolinks

//-------------------------------------------------------//


/*********************************************************/
/***** MAIN **********************************************/
/*********************************************************/
int main( int argc, char* argv[] )
{
  int location;
  int stepsize;
  int i; // for indexing

  // get args - set flags
  if (argc <= 1) // invoked without arguments
    {
      help();
      exit(0);
    }

  getargs ( argc, argv, hashname, keyname, AddrSpace, debug, crt );
  cout << "Hashtable Filename is: " << hashname << endl;
  cout << "Keys Filename is: " << keyname << endl;
  cout << "Table Size is: " << AddrSpace << endl;
  cout << "Debug Switch = " << debug << endl << endl;



  // initialize table:
  Table    = new (int[AddrSpace]);
  Links    = new (int[AddrSpace]);

  for (i = 0; i < AddrSpace; i++)
    {
      Table[i]    = EMPTY;
      Links[i]    = EMPTY;
    }

  // open file containing hash table
  hashfile.open( hashname, ios::in );
  if (hashfile.fail())
    {
      cout << "Unable to open Hash file. Code is: " << hashfile.rdstate()
	   << " Bailing out." << endl;
      exit(1);
    }


  // LOAD HASH TABLE
  i = 0;
  while ((!hashfile.eof()) && (i < AddrSpace))
    {
      hashfile >> Table[i];
      if (Table[i] < 0) Table[i] = EMPTY;
      if (haslinks)
	{
	  hashfile >> Links[i];
	  if (Links[i] < 0) Links[i] = EMPTY;
	}
      i++;
    } // while keys to load

  // check i against AddrSpace (should be the same)
  if ((!hashfile.eof()) && (i != AddrSpace))
    {
      cout << "WARNING: Table filled before all keys read." << endl;
    }
  else if (i < AddrSpace)
    {
      cout << "WARNING: All keys read before Table filled." << endl;
    }

  /*****/
  if (debug > 2) // asked to show table
    if (haslinks)
      displaytablelinks( Table, Links );
    else
      displaytable( Table );
  /*****/

  // open file of keys to find
  keyfile.open( keyname, ios::in );
  if (keyfile.fail())
    {
      cout << "Unable to open file of search keys. Code is: " 
	   << keyfile.rdstate()
	   << " Bailing out." << endl;
      exit(1);
    }
  
  // find out which algorithm to use to calculate next probe location..
  switch ( crt )
    {
    case LP: nextlocation = find_LP;
      break;
    case DH1: nextlocation = find_DH1;
      break;
    case DH2:
    case BM:
    case BTI: nextlocation = find_open;
      break;
    case CO:
    case SC:
    case CH:
    case DC: nextlocation = find_chains;
      break;
    case CC: nextlocation = find_pseudolinks;
    }


  // SEARCH FOR KEYS:
  while ( !keyfile.eof())
    { 
      // read key
      keyfile >> key;
      if (keyfile.eof()) // safety check
	break; // DONE

      Nkeys++; // count it
      location = hash( key ); // home address
      stepsize = step( key ); // in case we need it

      // start looking
      if (Table[location] == key) // found right away;
	{
	  nkeys_found++;
	  tprobes_found++;

	  /*****/
	  if (debug > 1)
	    { cout << "Key " << key << " found at home(" << location 
		   << ") " << endl;
	    }
	  /*****/
	}
      else // need to search for it
	{
	  probes = 1;
	  /*****/
	  if (debug > 2)
	    { cout << "Key " << key 
		   << " not found, commencing search from.. "
		   << location 
		   << endl;
	    }
	  /*****/
	  
	  while ((location != EMPTY) && (probes < AddrSpace) &&
		 (Table[location] != key) && (Table[location] != EMPTY))
	    {
	      probes++;
	      location = nextlocation( key, location, stepsize);
	      /*****/
	      if (debug > 2)
		{ cout << " .. " << location;
		if ((probes%12) == 0) cout << endl;
		}
	      /*****/
	    } // while not found
	  cout << endl;
	  
	  // check result
	  if ((location == EMPTY) || (probes >= AddrSpace))
	    { // not found
	      nkeys_not_found++;
	      tprobes_not_found += probes;
	      /*****/
	      if (debug > 1)
		cout << "  Key " << key << " NOT found." << endl;
	      /*****/
	    }
	  else if (Table[location] == EMPTY)
	    { // also not found
	      nkeys_not_found++;
	      tprobes_not_found += probes;
	      /*****/
	      if (debug > 1)
		cout << "  Key " << key << " NOT found." << endl;
	      /*****/
	    }
	  else // FOUND IT!!
	    { 
	      nkeys_found++;
	      tprobes_found += probes;
	      /*****/
	      if (debug > 1)
		cout << " Key " << key << " found at " << location 
		     << endl;
	      /*****/
	    }
	} // else search for it
        
    } // while not keyfile.eof
  
  // print stats...
  cout << "------------------------------------------" << endl;
  cout << endl;
  cout << "Total Keys Searched for: " << Nkeys << endl;
  cout << "Total keys found: " << nkeys_found << endl;
  cout << "Total keys NOT found " << nkeys_not_found << endl;
  cout << endl;

  cout << setprecision(2) << setiosflags( ios::fixed | ios::showpoint);

  if (nkeys_found > 0)
    apl_found = float(tprobes_found)/float(nkeys_found);

  if (nkeys_not_found > 0)
    apl_not_found = float(tprobes_not_found)/float(nkeys_not_found);

  if(nkeys_found+nkeys_not_found > 0)
    apl_overall = float(tprobes_found+tprobes_not_found) / 
                    float(nkeys_found+nkeys_not_found);

  cout << "Average probe length:" << endl;
  cout << "     keys found: " << apl_found << endl;
  cout << "     keys NOT found: " << apl_not_found << endl;
  cout << endl;

  cout << "Probe length overall:";
  cout << apl_overall << endl;

  // CLEANUP..
  keyfile.close();
  hashfile.close();

  cout << "Done." << endl;

} // END PROGRAM ---------------------------------------------
