CPSC 461: Copyright (C) 2003 Katrin Becker Last Modified June 18, 2003 09:35 PM

Sample Code (Pascal Pseudo-Code)

Two_way Sort-Merge Algorithm
{ sort phase }
initial distribution
{ Merge Phase }
Repeat
runs := 0
Repeat {
merge next runs from 2 input files
input first record from file 1
input first record from file 2
Repeat {
output record with smaller key to file 3
file_k := Smallest_key
output record from file_k to file 3
input nest record from file_k
} Until End_Of_Run_On_Both_Files
increment runs by 1
}Until End_Of_Both_Files
If runs > 1
Distribute
Until runs = 1

Algorithm Smallest_Key

If End_Of_Run_On_File (file 1)
Or Eof (file 1)
Smallest_Key := file 2
Else
If End_Of_Run_On_File (file 2)
Or Eof (file 2)
Smallest_Key := file 1
Else
Smallest_Key := file 2

Algorithm End_Of_Run_On_Both_Files

test_end := TRUE
For I := 1 To 2
test_end := test_end And End_Of_Run_On_File (file I)
End_Of_Run_On_Both_files := test_end

Algorithm End_Of_Both_Files

test_end := TRUE
For I := 1 To 2
test_end := test_end And Eof (file I)

End_Of_Both_Files := test_end

Algorithm Distribute

j := 0
For I := 1 To runs
Repeat
input record from run I of file 3
output record to file (1 + j)
Until End_Of_Run_On_File (file 3)
j := 1 - j


Balanced Two-Way Sort-Merge
{ Sort Phase }
initial distribution
{ Merge Phase }
number_of_output_files := 2
in_file [1] := 1
in_file [2] := 2
out_file [1] := 3
out_file [2] := 4
Repeat
runs := 0
Repeat {
perform 2-way merge on files (in_file [1])
and (in_file [2]) evenly distributing output
to files (out_file [1]) and (out_file [2])
}
input first record from each input file
runs := runs + 1
{
The remainder of runs / number_of_output_files will
be 0 or 1 for 2 output files. So the remainder
+ 1 will select output file 1 or 2 alternately.
}
outfile := Mod (runs / number_of_output files) + 1
Repeat
{
output record with smaller key
}
file_k := Smallest_Key
output record from file_k to out_file [outfile]
input next record from in_file [file_k]
Until End_Of_Run_On_Both_Files
Until End_Of_Both_Files
If Total_Runs > 1 Then
For I := 1 To 2
hold := in_file [I]
in_file [I] := out_file [I]
out_file [I] := hold
Until Total_Runs = 1

Algorithm Total_Runs

total := 0
For j := out_file [1] To out_file [2]
total := total + runs (file j)
Total_Runs := total

Algorithm Smallest_Key

If End_Of_run_On_File (in_file [1])
Or Eof (in_file [1])
Smallest_Key := in_file [2]
Else
If End_Of_Run_On_File (in_file [2])
Or Eof (in_file [2])
Smallest_Key := in_file [1]
Else
    If key( in_file[1] ) < key( in_file[2] )
      Smallest_Key := in_file[1]
      Else
      Smallest_Key := in_file [2]
 

Algorithm End_Of_Run_On_Both_Files

test_end := TRUE
For I := in_file [1] To in_file [2]
test_end := test_end And End_Of_Run_On_File (I)
End_Of_Run_On_Both_Files := test_end

Algorithm End_Of_Both_Files

test_end := TRUE
For I := in_file [1] To in_file [2]
test_end := test_end And Eof (file I)
End_Of_Both_Files := test_end


Balanced K-Way Sort-Merge

{ Sort Phase }
initial distribution
 
{ Merge Phase }
number_of_output_files := k
i := 0
Repeat
runs := 0
Repeat
j := 1 - i
{
perform k-way merge on file (i * k + 1)
thru (I * k + k) evenly distributing output
to file (j * k + 1) thru (j * k + k)
}
input first record from each input file
runs := runs + 1
outfile := Mod (runs / number_of_output files) + 1
Repeat
{
output record with smaller key
}
file_k := Smallest_Key
output record from file_k to file (j * k + outfile)
input next record from file_k
Until End_Of_Run_On_All_Files
Until End_Of_All_Files
If Total_Runs > 1
i := 1 - i
Until Total_Runs = 1
 
Algorithm Total_Runs
total := 0
For lcv := 1 To k
total := total + runs (file (I * k + lcv) )
Total_Runs := total
 
Algorithm Smallest_Key
lcv := 1
first_file := Find_Nonempty_File
small := first_file
While first_file <> 0
second_file := Find_Nonempty_File
If second_file <> 0
If key (first_file) > key (second_file)
small := second_file
first_file := second_file
Smallest_Key := small
 
 
Algorithm Find_Nonempty_File
nonempty := 0
While nonempty = 0
And lcv <= k
If Not End_Of_Run_On_File (i * k + lcv)
And Not Eof (file (i * k + lcv)
nonempty := I * k + lcv
lcv := lcv + 1
Find_Nonempty_File := nonempty
 
Algorithm End_Of_Run_On_All_Files
test_end := TRUE
For lcv := 1 To k
test_end := test_end And End_Of_Run_On_File (I * k + lcv)
End_Of_Run_On_All_Files := test_end
 
Algorithm End_Of_All_Files
test_end := TRUE
For lcv := 1 To k
test_end := test_end And Eof (file (I * k + lcv))
End_Of_All_Files := test_end

Polyphase Sort-Merge

{ Sort Phase }
initial distribution
 
{ Merge Phase }
outfile := k + 1
Repeat
runs := 0
Repeat
{
perform k-way merge on k input files
with output to outfile
}
input first record from each input file
runs := runs + 1
Repeat
{
output record with smaller key
}
file_k := Smallest_Key
output record from file_k to outfile
input next record from file_k
Until End_Of_Run_On_All_Files
Until End_Of_A_File (empty_file)
rewind outfile
outfile := empty_file
Until Total_Runs = 1
 
Algorithm Total_Runs
total := 0
For lcv := 1 To k + 1
total := total + runs (file (lcv) )
Total_Runs := total
 
Algorithm Smallest_Key
lcv := 1
first_file := Find_Nonempty_File
small := first_file
While first_file <> 0
second_file := Find_Nonempty_File
If second_file <> 0
If key (first_file) > key (second_file)
small := second_file
first_file := second_file
Smallest_Key := small
 
Algorithm Find_Nonempty_File
nonempty := 0
While nonempty = 0
And lcv <= k + 1
If lcv <> outfile
If Not End_Of_Run_On_File (lcv)
And Not Eof (file (lcv)
nonempty := lcv
lcv := lcv + 1
Find_Nonempty_File := nonempty
 
Algorithm End_Of_Run_On_All_Files
test_end := TRUE
For lcv := 1 To k + 1
If lcv <> outfile
test_end := test_end And End_Of_Run_On_File (lcv)
End_Of_Run_On_All_Files := test_end
 
Algorithm End_Of_A_File
lcv := 1
test_end := FALSE
While lcv <= k + 1
And Not test_end
If lcv <> outfile
test_end := test_end Or Eof (file (lcv))
lcv := lcv + 1
empty_file := lcv
End_Of_A_File := test_end
 

Back to Top
CPSC 461: Copyright (C) 2003 Katrin Becker Last Modified June 18, 2003 09:35 PM