00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
#ifndef _MERGE_SORT
00024
#define _MERGE_SORT
00025
00026
00027
00028
00029
00030
#include <memory>
00031
#include <algorithm>
00032
#include <fstream>
00033
#include <cassert>
00034
00035
using namespace std;
00036
00037
00038
00041
template<
typename T>
00042 class CStreamPos{
00044 T mContent;
00045
public:
00046
CStreamPos(T inContent):mContent(inContent){
00047
00048 }
00049
00050
CStreamPos(
long int inContent):mContent(inContent){
00051
00052 }
00053
00054 operator T()
const{
00055
return mContent;
00056 }
00057 operator long int()
const{
00058
return mContent;
00059 }
00060
00061
CStreamPos<T>& operator++ (){
00062 mContent=mContent+static_cast<T>(1);
00063
return *
this;
00064 }
00065
CStreamPos<T> operator++ (
int){
00066
CStreamPos<T> lReturnValue=*
this;
00067 mContent=mContent+static_cast<T>(1);
00068
return lReturnValue;
00069 }
00070
00071
CStreamPos<T> operator% (
int inMod)
const{
00072
return mContent % inMod;
00073 }
00074
CStreamPos<T> operator/ (
int inMod)
const{
00075
return mContent / inMod;
00076 }
00077
bool operator< (CStreamPos<T>& inThan)
const{
00078
return this->mContent < inThan.mContent;
00079 }
00080
bool operator! ()
const{
00081
return !(this->mContent);
00082 }
00083
CStreamPos<T> operator+ (
CStreamPos<T>& inSummand){
00084
return CStreamPos<T>(mContent + inSummand.
mContent);
00085 }
00086 };
00087
00088
#if __GNUC__==2
00089
#define STREAMPOS_T long long int
00090
#else
00091
#define STREAMPOS_T CStreamPos<fstream::pos_type>
00092
#endif
00093
00094
00105
template<
class T>
00106
void merge_streams(istream& in1,
const STREAMPOS_T inCount1,
00107 istream& in2,
const STREAMPOS_T inCount2,
00108 ostream& out,
00109
int inNumberOfPageElements=1){
00110
00111 {
00112
00113
00114
00115
00116
00117
const STREAMPOS_T lPageSize(
sizeof(T)*inNumberOfPageElements);
00118
00119
00120
00121
if(!(inCount1)){
00122
for(STREAMPOS_T i=0;
00123 i<inCount2;
00124 i++){
00125 T l2;
00126 in2.read((
char*)&l2,
00127
sizeof(l2));
00128 assert(in2);
00129
00130 out.write((
char*)&l2,
00131
sizeof(l2));
00132 assert(out);
00133 }
00134
return;
00135 }
00136
if(!inCount2){
00137
for(STREAMPOS_T i=0;
00138 i<inCount1;
00139 i++){
00140 T l1;
00141 in1.read((
char*)&l1,
00142
sizeof(l1));
00143 assert(in1);
00144 out.write((
char*)&l1,
00145
sizeof(l1));
00146 assert(out);
00147 }
00148
return;
00149 }
00150
00151 STREAMPOS_T lI1(0);
00152 STREAMPOS_T lI2(0);
00153
00154
00155 T l1;
00156 in1.read((
char*)&l1,
00157
sizeof(l1));
00158 assert(in1);
00159 T l2;
00160 in2.read((
char*)&l2,
00161
sizeof(l2));
00162 assert(in2);
00163
00164
while((lI1<inCount1)
00165 &&(lI2<inCount2)){
00166
if(l1<l2){
00167
00168 out.write((
char*)&l1,
00169
sizeof(l1));
00170 assert(out);
00171
00172
00173 lI1++;
00174
if(lI1<inCount1){
00175 in1.read((
char*)&l1,
00176
sizeof(l1));
00177 assert(in1);
00178 }
00179 }
else{
00180
00181 out.write((
char*)&l2,
00182
sizeof(l2));
00183
00184 lI2++;
00185
if(lI2<inCount2){
00186 in2.read((
char*)&l2,
00187
sizeof(l2));
00188 assert(in2);
00189 }
00190 }
00191 }
00192
while(lI1<inCount1){
00193
00194 out.write((
char*)&l1,
00195
sizeof(l1));
00196
00197 lI1++;
00198
if(lI1<inCount1){
00199 in1.read((
char*)&l1,
00200
sizeof(l1));
00201 assert(in1);
00202 }
00203 }
00204
while(lI2<inCount2){
00205
00206 out.write((
char*)&l2,
00207
sizeof(l2));
00208 assert(out);
00209
00210 lI2++;
00211
if(lI2<inCount2){
00212 in2.read((
char*)&l2,
00213
sizeof(l2));
00214 assert(in2);
00215 }
00216 }
00217 }
00218 }
00219
template<
typename T>
00220
void first_level_quicksort(
int inNumberOfPageElements,
00221
const char* inFileToBeSortedName,
00222
const char* inTemporaryName){
00223
00224 cout <<
"Starting quicksort: "
00225 << inNumberOfPageElements
00226 <<
" elements per page." << endl
00227 <<
"Sorting files " << inFileToBeSortedName << endl
00228 <<
"to " << inTemporaryName << endl;
00229 cout <<
"NOW ALLOCATING A PAGE" << inNumberOfPageElements << endl;
00230 auto_ptr<T> lPage(
new T[inNumberOfPageElements]);
00231
00232 cout <<
"H" << flush;
00233
00234
const STREAMPOS_T lPageSize(
sizeof(T)*inNumberOfPageElements);
00235
00236 cout <<
"I" << flush;
00237
00238 STREAMPOS_T lFileSize(0);
00239 ifstream lToBeSorted1(inFileToBeSortedName);
00240 assert(lToBeSorted1);
00241 lToBeSorted1.seekg(0,
00242 ios::end);
00243 lFileSize=lToBeSorted1.tellg();
00244 lToBeSorted1.clear();
00245 lToBeSorted1.seekg(0,
00246 ios::beg);
00247 cout <<
"E" << flush;
00248
00249 ofstream lTemporary(inTemporaryName);
00250 assert(lTemporary);
00251 cout <<
"R" << flush;
00252
00253 STREAMPOS_T lSum(0);
00254
00255 T* lBegin(lPage.get());
00256 T* lEnd(lPage.get());
00257
00258 cout <<
"FIRSTLEVELQUICK" << lFileSize <<
";" << lSum<< endl;
00259
while((lSum<lFileSize)
00260 && lToBeSorted1){
00261
00262 cout <<
"." << flush;
00263
00264
int lRead(0);
00265
if(lSum+lPageSize < lFileSize){
00266 lToBeSorted1.read((
char*)lPage.get(),
00267 lPageSize);
00268 lRead=lPageSize;
00269 }
else{
00270 lToBeSorted1.read((
char*)lPage.get(),
00271 lFileSize-lSum);
00272 lRead=lFileSize-lSum;
00273 }
00274
if(lRead){
00275 lEnd=lBegin+(lRead)/
sizeof(T);
00276 sort(lBegin,lEnd);
00277 lTemporary.write((
char*)lPage.get(),lRead);
00278 assert(lTemporary);
00279 }
00280 }
00281 cout <<
"." << endl;
00282
00283 }
00284
00295
template<
class T>
00296
char* merge_sort_streams(
const char* inFileToBeSortedName,
00297
const char* inTemporaryName,
00298
int inNumberOfPageElements=(1 << 20)
00299 ){
00300
00301
const char* lFileToBeSortedName(inFileToBeSortedName);
00302
const char* lTemporaryName(inTemporaryName);
00303
00304 STREAMPOS_T lFileSize(0);
00305 ifstream lToBeSorted1(inFileToBeSortedName);
00306 lToBeSorted1.seekg(0,
00307 ios::end);
00308 lFileSize=lToBeSorted1.tellg();
00309 lToBeSorted1.close();
00310
00311 ofstream lTemporary;
00312 ifstream lToBeSorted2;
00313
00314
#ifdef first_level_quick
00315
first_level_quicksort<T>(inNumberOfPageElements,
00316 inFileToBeSortedName,
00317 inTemporaryName);
00318 swap(lFileToBeSortedName,
00319 lTemporaryName);
00320
#else
00321
cout <<
"STARTING mit MERGESIZE1" << endl;
00322 inNumberOfPageElements=1;
00323
#endif
00324
STREAMPOS_T lCount(0);
00325
for(STREAMPOS_T iMergeSize(
sizeof(T)*inNumberOfPageElements);
00326 (iMergeSize < lFileSize)
00327 || !(lCount%2)
00328
00329
00330
00331
00332 ;
00333 (iMergeSize = iMergeSize << 1),
00334 (lCount=lCount+static_cast<STREAMPOS_T>(1))){
00335
00336 cout <<
"MERGESORT MergeSize "
00337 << iMergeSize
00338 << endl;
00339
00340 lToBeSorted1.open(lFileToBeSortedName);
00341 lToBeSorted1.clear();
00342
00343 lToBeSorted2.open(lFileToBeSortedName);
00344 lToBeSorted2.clear();
00345
00346 lTemporary.open(lTemporaryName);
00347 lTemporary.clear();
00348
00349
00350
for(STREAMPOS_T i(0);
00351 i<lFileSize;
00352 i = i + iMergeSize*2){
00353 lToBeSorted1.seekg(i);
00354
00355
if(!lToBeSorted1){
00356 cerr << __FILE__ <<
":" << __LINE__ <<
" lToBeSorted false, after seekg("
00357 << static_cast<long int>(i)
00358 <<
")"
00359 << endl;
00360 }
00361
00362 assert(lToBeSorted1);
00363
00364
if(i+iMergeSize<lFileSize){
00365 lToBeSorted2.seekg(i+iMergeSize);
00366 assert(lToBeSorted2);
00367 }
00368
00369 STREAMPOS_T lMergeSize1=lFileSize-i;
00370
if(lFileSize < i){
00371 lMergeSize1=0;
00372 }
00373 STREAMPOS_T lMergeSize2=lFileSize-i-iMergeSize;
00374
if(lFileSize < i + iMergeSize){
00375 lMergeSize2=0;
00376 }
00377
00378
00379
00380
00381
if(lMergeSize1>iMergeSize){
00382 lMergeSize1=iMergeSize;
00383 }
00384
if(lMergeSize2>iMergeSize){
00385 lMergeSize2=iMergeSize;
00386 }
00387
00388
#if __GNUC__==2
00389
merge_streams<T>(lToBeSorted1,
00390 lMergeSize1/
sizeof(T),
00391 lToBeSorted2,
00392 lMergeSize2/
sizeof(T),
00393 lTemporary,
00394 inNumberOfPageElements
00395 );
00396
#else
00397
merge_streams<T>(lToBeSorted1,
00398 lMergeSize1.operator/(
sizeof(T)),
00399 lToBeSorted2,
00400 lMergeSize2.operator/(
sizeof(T)),
00401 lTemporary,
00402 inNumberOfPageElements
00403 );
00404
#endif
00405
}
00406
00407 lTemporary.close();
00408 lToBeSorted1.close();
00409 lToBeSorted2.close();
00410 swap(lFileToBeSortedName,
00411 lTemporaryName);
00412 cout <<
"endmerge" << endl;
00413 }
00414
return strdup(lFileToBeSortedName);
00415 }
00416
#endif