System documentation of the GNU Image-Finding Tool

Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members

merge_sort_streams.h

00001 /* -*- mode: c++ -*- 00002 */ 00003 /* 00004 00005 GIFT, a flexible content based image retrieval system. 00006 Copyright (C) 1998, 1999, 2000, 2001, 2002, CUI University of Geneva, University of Bayreuth 00007 00008 This program is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 2 of the License, or 00011 (at your option) any later version. 00012 00013 This program is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with this program; if not, write to the Free Software 00020 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00021 00022 */ 00023 #ifndef _MERGE_SORT 00024 #define _MERGE_SORT 00025 00026 //#warning FIXME: this needs to be added to the GIFT distro. It speeds up the generation 00027 //#warning of inverted files considerably 00028 //(done) 00029 00030 #include <memory> // auto_ptr 00031 #include <algorithm> // using swap 00032 #include <fstream> // file access 00033 #include <cassert> 00034 00035 using namespace std; 00036 00037 //#define STREAMPOS_T fstream::pos_type 00038 00041 template<typename T> 00042 class CStreamPos{ 00044 T mContent; 00045 public: 00046 CStreamPos(T inContent):mContent(inContent){ 00047 00048 } 00049 00050 CStreamPos(long int inContent):mContent(inContent){ 00051 00052 } 00053 00054 operator T()const{ 00055 return mContent; 00056 } 00057 operator long int()const{ 00058 return mContent; 00059 } 00060 00061 CStreamPos<T>& operator++ (){ 00062 mContent=mContent+static_cast<T>(1); 00063 return *this; 00064 } 00065 CStreamPos<T> operator++ (int){ 00066 CStreamPos<T> lReturnValue=*this; 00067 mContent=mContent+static_cast<T>(1); 00068 return lReturnValue; 00069 } 00070 00071 CStreamPos<T> operator% (int inMod)const{ 00072 return mContent % inMod; 00073 } 00074 CStreamPos<T> operator/ (int inMod)const{ 00075 return mContent / inMod; 00076 } 00077 bool operator< (CStreamPos<T>& inThan)const{ 00078 return this->mContent < inThan.mContent; 00079 } 00080 bool operator! ()const{ 00081 return !(this->mContent); 00082 } 00083 CStreamPos<T> operator+ (CStreamPos<T>& inSummand){ 00084 return CStreamPos<T>(mContent + inSummand.mContent); 00085 } 00086 }; 00087 00088 #if __GNUC__==2 00089 #define STREAMPOS_T long long int 00090 #else 00091 #define STREAMPOS_T CStreamPos<fstream::pos_type> 00092 #endif 00093 // ex long long int 00094 00105 template<class T> 00106 void merge_streams(istream& in1, const STREAMPOS_T inCount1, 00107 istream& in2, const STREAMPOS_T inCount2, 00108 ostream& out, 00109 int inNumberOfPageElements=1){ 00110 00111 { 00112 // cout << "Merging: " 00113 // << inCount1 00114 // << "," 00115 // << inCount2 00116 // << endl; 00117 const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements); 00118 00119 00120 00121 if(!(inCount1)){// if there is nothing to merge 00122 for(STREAMPOS_T i=0;//...copy 00123 i<inCount2; 00124 i++){ 00125 T l2; 00126 in2.read((char*)&l2, 00127 sizeof(l2)); 00128 assert(in2); 00129 00130 out.write((char*)&l2, 00131 sizeof(l2)); 00132 assert(out); 00133 } 00134 return;//and return 00135 } 00136 if(!inCount2){//if there is nothing to merge 00137 for(STREAMPOS_T i=0;//...copy 00138 i<inCount1; 00139 i++){ 00140 T l1; 00141 in1.read((char*)&l1, 00142 sizeof(l1)); 00143 assert(in1); 00144 out.write((char*)&l1, 00145 sizeof(l1)); 00146 assert(out); 00147 } 00148 return;//and return 00149 } 00150 00151 STREAMPOS_T lI1(0); 00152 STREAMPOS_T lI2(0); 00153 00154 //read the first record from both files 00155 T l1; 00156 in1.read((char*)&l1, 00157 sizeof(l1)); 00158 assert(in1); 00159 T l2; 00160 in2.read((char*)&l2, 00161 sizeof(l2)); 00162 assert(in2); 00163 00164 while((lI1<inCount1) 00165 &&(lI2<inCount2)){ 00166 if(l1<l2){ 00167 // 00168 out.write((char*)&l1, 00169 sizeof(l1)); 00170 assert(out); 00171 00172 // 00173 lI1++; 00174 if(lI1<inCount1){ 00175 in1.read((char*)&l1, 00176 sizeof(l1)); 00177 assert(in1); 00178 } 00179 }else{ 00180 // 00181 out.write((char*)&l2, 00182 sizeof(l2)); 00183 // 00184 lI2++; 00185 if(lI2<inCount2){ 00186 in2.read((char*)&l2, 00187 sizeof(l2)); 00188 assert(in2); 00189 } 00190 } 00191 } 00192 while(lI1<inCount1){ 00193 // 00194 out.write((char*)&l1, 00195 sizeof(l1)); 00196 // 00197 lI1++; 00198 if(lI1<inCount1){ 00199 in1.read((char*)&l1, 00200 sizeof(l1)); 00201 assert(in1); 00202 } 00203 } 00204 while(lI2<inCount2){ 00205 // 00206 out.write((char*)&l2, 00207 sizeof(l2)); 00208 assert(out); 00209 // 00210 lI2++; 00211 if(lI2<inCount2){ 00212 in2.read((char*)&l2, 00213 sizeof(l2)); 00214 assert(in2); 00215 } 00216 } 00217 } 00218 } 00219 template<typename T> 00220 void first_level_quicksort(int inNumberOfPageElements, 00221 const char* inFileToBeSortedName, 00222 const char* inTemporaryName){ 00223 00224 cout << "Starting quicksort: " 00225 << inNumberOfPageElements 00226 << " elements per page." << endl 00227 << "Sorting files " << inFileToBeSortedName << endl 00228 << "to " << inTemporaryName << endl; 00229 cout << "NOW ALLOCATING A PAGE" << inNumberOfPageElements << endl; 00230 auto_ptr<T> lPage(new T[inNumberOfPageElements]); 00231 00232 cout << "H" << flush; 00233 00234 const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements); 00235 00236 cout << "I" << flush; 00237 00238 STREAMPOS_T lFileSize(0); 00239 ifstream lToBeSorted1(inFileToBeSortedName); 00240 assert(lToBeSorted1); 00241 lToBeSorted1.seekg(0, 00242 ios::end); 00243 lFileSize=lToBeSorted1.tellg(); 00244 lToBeSorted1.clear(); 00245 lToBeSorted1.seekg(0, 00246 ios::beg); 00247 cout << "E" << flush; 00248 00249 ofstream lTemporary(inTemporaryName); 00250 assert(lTemporary); 00251 cout << "R" << flush; 00252 00253 STREAMPOS_T lSum(0); 00254 00255 T* lBegin(lPage.get()); 00256 T* lEnd(lPage.get()); 00257 00258 cout << "FIRSTLEVELQUICK" << lFileSize << ";" << lSum<< endl; 00259 while((lSum<lFileSize) 00260 && lToBeSorted1){ 00261 00262 cout << "." << flush; 00263 00264 int lRead(0); 00265 if(lSum+lPageSize < lFileSize){ 00266 lToBeSorted1.read((char*)lPage.get(), 00267 lPageSize); 00268 lRead=lPageSize; 00269 }else{ 00270 lToBeSorted1.read((char*)lPage.get(), 00271 lFileSize-lSum); 00272 lRead=lFileSize-lSum; 00273 } 00274 if(lRead){ 00275 lEnd=lBegin+(lRead)/sizeof(T); 00276 sort(lBegin,lEnd); 00277 lTemporary.write((char*)lPage.get(),lRead); 00278 assert(lTemporary); 00279 } 00280 } 00281 cout << "." << endl; 00282 00283 } 00284 00295 template<class T> 00296 char* merge_sort_streams(const char* inFileToBeSortedName, 00297 const char* inTemporaryName, 00298 int inNumberOfPageElements=(1 << 20) 00299 ){ 00300 00301 const char* lFileToBeSortedName(inFileToBeSortedName); 00302 const char* lTemporaryName(inTemporaryName); 00303 00304 STREAMPOS_T lFileSize(0); 00305 ifstream lToBeSorted1(inFileToBeSortedName); 00306 lToBeSorted1.seekg(0, 00307 ios::end); 00308 lFileSize=lToBeSorted1.tellg(); 00309 lToBeSorted1.close(); 00310 00311 ofstream lTemporary; 00312 ifstream lToBeSorted2; 00313 00314 #ifdef first_level_quick 00315 first_level_quicksort<T>(inNumberOfPageElements, 00316 inFileToBeSortedName, 00317 inTemporaryName); 00318 swap(lFileToBeSortedName, 00319 lTemporaryName); 00320 #else 00321 cout << "STARTING mit MERGESIZE1" << endl; 00322 inNumberOfPageElements=1; 00323 #endif 00324 STREAMPOS_T lCount(0); 00325 for(STREAMPOS_T iMergeSize(sizeof(T)*inNumberOfPageElements); 00326 (iMergeSize < lFileSize) 00327 || !(lCount%2) 00328 // ||(lCount%2) makes sure that we will get 00329 // the result in inFileToBeSorted 00330 // the ! is, because we have have already 00331 // the quicksort sorting pass behind us 00332 ; 00333 (iMergeSize = iMergeSize << 1), 00334 (lCount=lCount+static_cast<STREAMPOS_T>(1))){ 00335 00336 cout << "MERGESORT MergeSize " 00337 << iMergeSize 00338 << endl; 00339 00340 lToBeSorted1.open(lFileToBeSortedName); 00341 lToBeSorted1.clear(); 00342 00343 lToBeSorted2.open(lFileToBeSortedName); 00344 lToBeSorted2.clear(); 00345 00346 lTemporary.open(lTemporaryName); 00347 lTemporary.clear(); 00348 00349 00350 for(STREAMPOS_T i(0); 00351 i<lFileSize; 00352 i = i + iMergeSize*2){ 00353 lToBeSorted1.seekg(i); 00354 00355 if(!lToBeSorted1){ 00356 cerr << __FILE__ << ":" << __LINE__ << " lToBeSorted false, after seekg(" 00357 << static_cast<long int>(i) 00358 << ")" 00359 << endl; 00360 } 00361 00362 assert(lToBeSorted1); 00363 00364 if(i+iMergeSize<lFileSize){ 00365 lToBeSorted2.seekg(i+iMergeSize); 00366 assert(lToBeSorted2); 00367 } 00368 00369 STREAMPOS_T lMergeSize1=lFileSize-i; 00370 if(lFileSize < i){// underflow 00371 lMergeSize1=0;//correct underflow 00372 } 00373 STREAMPOS_T lMergeSize2=lFileSize-i-iMergeSize; 00374 if(lFileSize < i + iMergeSize){// underflow of lMergeSize2 00375 lMergeSize2=0;//correct underflow 00376 } 00377 00378 // lToBeSorted1.clear(); 00379 // lToBeSorted2.clear(); 00380 00381 if(lMergeSize1>iMergeSize){ 00382 lMergeSize1=iMergeSize; 00383 } 00384 if(lMergeSize2>iMergeSize){ 00385 lMergeSize2=iMergeSize; 00386 } 00387 00388 #if __GNUC__==2 00389 merge_streams<T>(lToBeSorted1, 00390 lMergeSize1/sizeof(T), 00391 lToBeSorted2, 00392 lMergeSize2/sizeof(T), 00393 lTemporary, 00394 inNumberOfPageElements 00395 ); 00396 #else 00397 merge_streams<T>(lToBeSorted1, 00398 lMergeSize1.operator/(sizeof(T)), 00399 lToBeSorted2, 00400 lMergeSize2.operator/(sizeof(T)), 00401 lTemporary, 00402 inNumberOfPageElements 00403 ); 00404 #endif 00405 } 00406 00407 lTemporary.close(); 00408 lToBeSorted1.close(); 00409 lToBeSorted2.close(); 00410 swap(lFileToBeSortedName, 00411 lTemporaryName); 00412 cout << "endmerge" << endl; 00413 } 00414 return strdup(lFileToBeSortedName); 00415 } 00416 #endif

Need for discussion? Want to contribute? Contact
help-gift@gnu.org Generated using Doxygen