- CPSC 461: Copyright (C) 2003 Katrin Becker Last Modified June 18, 2003 09:35 PM
Sample Code (Pascal Pseudo-Code)
Two_way Sort-Merge Algorithm
- { sort phase }
- initial distribution
- { Merge Phase }
- Repeat
- runs := 0
- Repeat {
- merge next runs from 2 input files
- input first record from file 1
- input first record from file 2
- Repeat {
- output record with smaller key to file 3
- file_k := Smallest_key
- output record from file_k to file 3
- input nest record from file_k
- } Until End_Of_Run_On_Both_Files
- increment runs by 1
- }Until End_Of_Both_Files
-
- If runs > 1
- Distribute
- Until runs = 1
Algorithm Smallest_Key
- If End_Of_Run_On_File (file 1)
- Or Eof (file 1)
- Smallest_Key := file 2
- Else
- If End_Of_Run_On_File (file 2)
- Or Eof (file 2)
- Smallest_Key := file 1
- Else
- Smallest_Key := file 2
Algorithm End_Of_Run_On_Both_Files
- test_end := TRUE
- For I := 1 To 2
- test_end := test_end And End_Of_Run_On_File (file I)
- End_Of_Run_On_Both_files := test_end
Algorithm End_Of_Both_Files
- test_end := TRUE
- For I := 1 To 2
- test_end := test_end And Eof (file I)
End_Of_Both_Files := test_end
Algorithm Distribute
- j := 0
- For I := 1 To runs
- Repeat
- input record from run I of file 3
- output record to file (1 + j)
- Until End_Of_Run_On_File (file 3)
- j := 1 - j
Balanced Two-Way Sort-Merge
- { Sort Phase }
- initial distribution
- { Merge Phase }
- number_of_output_files := 2
- in_file [1] := 1
- in_file [2] := 2
- out_file [1] := 3
- out_file [2] := 4
- Repeat
- runs := 0
- Repeat {
- perform 2-way merge on files (in_file [1])
- and (in_file [2]) evenly distributing output
- to files (out_file [1]) and (out_file [2])
- }
- input first record from each input file
- runs := runs + 1
- {
- The remainder of runs / number_of_output_files will
- be 0 or 1 for 2 output files. So the remainder
- + 1 will select output file 1 or 2 alternately.
- }
- outfile := Mod (runs / number_of_output files) + 1
- Repeat
- {
- output record with smaller key
- }
- file_k := Smallest_Key
- output record from file_k to out_file [outfile]
- input next record from in_file [file_k]
- Until End_Of_Run_On_Both_Files
- Until End_Of_Both_Files
- If Total_Runs > 1 Then
- For I := 1 To 2
- hold := in_file [I]
- in_file [I] := out_file [I]
- out_file [I] := hold
- Until Total_Runs = 1
Algorithm Total_Runs
- total := 0
- For j := out_file [1] To out_file [2]
- total := total + runs (file j)
- Total_Runs := total
Algorithm Smallest_Key
- If End_Of_run_On_File (in_file [1])
- Or Eof (in_file [1])
- Smallest_Key := in_file [2]
- Else
- If End_Of_Run_On_File (in_file [2])
- Or Eof (in_file [2])
- Smallest_Key := in_file [1]
- Else
- If key( in_file[1] ) < key( in_file[2] )
- Smallest_Key := in_file[1]
- Else
- Smallest_Key := in_file [2]
Algorithm End_Of_Run_On_Both_Files
- test_end := TRUE
- For I := in_file [1] To in_file [2]
- test_end := test_end And End_Of_Run_On_File (I)
- End_Of_Run_On_Both_Files := test_end
Algorithm End_Of_Both_Files
- test_end := TRUE
- For I := in_file [1] To in_file [2]
- test_end := test_end And Eof (file I)
- End_Of_Both_Files := test_end
Balanced K-Way Sort-Merge
- { Sort Phase }
- initial distribution
-
- { Merge Phase }
- number_of_output_files := k
- i := 0
- Repeat
- runs := 0
- Repeat
- j := 1 - i
- {
- perform k-way merge on file (i * k + 1)
- thru (I * k + k) evenly distributing output
- to file (j * k + 1) thru (j * k + k)
- }
- input first record from each input file
- runs := runs + 1
- outfile := Mod (runs / number_of_output files) + 1
- Repeat
- {
- output record with smaller key
- }
- file_k := Smallest_Key
- output record from file_k to file (j * k + outfile)
- input next record from file_k
- Until End_Of_Run_On_All_Files
- Until End_Of_All_Files
- If Total_Runs > 1
- i := 1 - i
- Until Total_Runs = 1
-
- Algorithm Total_Runs
- total := 0
- For lcv := 1 To k
- total := total + runs (file (I * k + lcv) )
- Total_Runs := total
-
- Algorithm Smallest_Key
- lcv := 1
- first_file := Find_Nonempty_File
- small := first_file
- While first_file <> 0
- second_file := Find_Nonempty_File
- If second_file <> 0
- If key (first_file) > key (second_file)
- small := second_file
- first_file := second_file
- Smallest_Key := small
-
-
- Algorithm Find_Nonempty_File
- nonempty := 0
- While nonempty = 0
- And lcv <= k
- If Not End_Of_Run_On_File (i * k + lcv)
- And Not Eof (file (i * k + lcv)
- nonempty := I * k + lcv
- lcv := lcv + 1
- Find_Nonempty_File := nonempty
-
- Algorithm End_Of_Run_On_All_Files
- test_end := TRUE
- For lcv := 1 To k
- test_end := test_end And End_Of_Run_On_File (I * k + lcv)
- End_Of_Run_On_All_Files := test_end
-
- Algorithm End_Of_All_Files
- test_end := TRUE
- For lcv := 1 To k
- test_end := test_end And Eof (file (I * k + lcv))
- End_Of_All_Files := test_end
-
Polyphase Sort-Merge
- { Sort Phase }
- initial distribution
-
- { Merge Phase }
- outfile := k + 1
- Repeat
- runs := 0
- Repeat
- {
- perform k-way merge on k input files
- with output to outfile
- }
- input first record from each input file
- runs := runs + 1
- Repeat
- {
- output record with smaller key
- }
- file_k := Smallest_Key
- output record from file_k to outfile
- input next record from file_k
- Until End_Of_Run_On_All_Files
- Until End_Of_A_File (empty_file)
- rewind outfile
- outfile := empty_file
- Until Total_Runs = 1
-
- Algorithm Total_Runs
- total := 0
- For lcv := 1 To k + 1
- total := total + runs (file (lcv) )
- Total_Runs := total
-
- Algorithm Smallest_Key
- lcv := 1
- first_file := Find_Nonempty_File
- small := first_file
- While first_file <> 0
- second_file := Find_Nonempty_File
- If second_file <> 0
- If key (first_file) > key (second_file)
- small := second_file
- first_file := second_file
- Smallest_Key := small
-
- Algorithm Find_Nonempty_File
- nonempty := 0
- While nonempty = 0
- And lcv <= k + 1
- If lcv <> outfile
- If Not End_Of_Run_On_File (lcv)
- And Not Eof (file (lcv)
- nonempty := lcv
- lcv := lcv + 1
- Find_Nonempty_File := nonempty
-
- Algorithm End_Of_Run_On_All_Files
- test_end := TRUE
- For lcv := 1 To k + 1
- If lcv <> outfile
- test_end := test_end And End_Of_Run_On_File (lcv)
- End_Of_Run_On_All_Files := test_end
-
- Algorithm End_Of_A_File
- lcv := 1
- test_end := FALSE
- While lcv <= k + 1
- And Not test_end
- If lcv <> outfile
- test_end := test_end Or Eof (file (lcv))
- lcv := lcv + 1
- empty_file := lcv
- End_Of_A_File := test_end
-
-
CPSC 461: Copyright (C) 2003 Katrin Becker Last Modified June 18, 2003 09:35 PM