#include #include #include #include #include #include "e:\opt\common\timings.h" int CalculateKeySegment(char* InputLine) { int KeySegment = 0; unsigned char LowChar = (InputLine[2] - ' '); unsigned char MiddleChar = InputLine[1] - ' '; unsigned char HighChar = InputLine[0] - ' '; KeySegment = HighChar * 96 * 96 + MiddleChar * 96 + LowChar; return KeySegment; } int main(int argc, char *argv[]) { const int KEY_PREFIX_LENGTH = 2; const int MAXPASSCOUNT = 100; const int BUFCOUNT = 96*96*96; const int TOTAL_BUFFER = 16*1048576; const int INPUTLINESIZE = 100; char InputLine[INPUTLINESIZE]; int* BufferOffset = new int [BUFCOUNT+1]; int* BufferCharCount = new int[BUFCOUNT]; int KeySegment; char* InputFileName; char* OutputFileName; ifstream InputFile; ofstream OutputFile; int PassCount; int CurrentLength; int NewLength; int LineLength; int TotalKeys = 0; bool StatisticsDisplayed = false; int TotalWrites = 0; int i; int j; double BufferRatio; int PartialLength; int TotalBufferSize; int KeyLength; if (argc < 4) { printf("Usage: zen07 keylength infile outfile\n"); exit(1); } else { KeyLength = atoi(argv[1]); InputFileName = argv[2]; OutputFileName = argv[3]; } char temp[100]; start_timing(); InputFile.open(InputFileName,ios::in|ios::binary); //start counting pass int* BufferCapacity = new int[BUFCOUNT]; for (i = 0; i < BUFCOUNT; i ++) { BufferCapacity[i] = 0; } for (i = 0; ; i ++) { InputFile.read(InputLine,INPUTLINESIZE); if (!InputFile) break; TotalKeys ++; LineLength = INPUTLINESIZE; if (LineLength < KeyLength) { printf("Illegal record: %s",InputLine); exit(1); } KeySegment = CalculateKeySegment(InputLine); BufferCapacity[KeySegment] += LineLength; } int Split[MAXPASSCOUNT]; // possible number of passes int SplitTotalSize[MAXPASSCOUNT]; // bytes per pass int SplitData; int ThisDisplacement; int TotalData = 0; Split[0] = 0; i = 0; for (j = 1; j < MAXPASSCOUNT; j ++) { SplitData = 0; BufferOffset[i] = 0; for (; i < BUFCOUNT; i ++) { ThisDisplacement = BufferCapacity[i]; if (SplitData + ThisDisplacement > TOTAL_BUFFER) break; SplitData += ThisDisplacement; BufferOffset[i+1] = SplitData; } Split[j] = i; SplitTotalSize[j-1] = SplitData; TotalData += SplitData; if (i == BUFCOUNT) break; } delete [] BufferCapacity; PassCount = j; printf("Total buffer space: %d\n",TOTAL_BUFFER); printf("Total keys: %d\n", TotalKeys); printf("Total data: %d\n", TotalData); sprintf(temp,"Finished counting"); timing(temp); OutputFile.open(OutputFileName,ios::out|ios::binary); char* BigBuffer = new char [TOTAL_BUFFER]; memset(BigBuffer,0,TOTAL_BUFFER); for (int Pass = 0; Pass < PassCount ; Pass ++) { for (i = Split[Pass]; i < Split[Pass+1]; i ++) { BufferCharCount[i] = 0; } InputFile.clear(); InputFile.seekg(0); int CompareResult; for (i = 0; ; i ++) { InputFile.read(InputLine,INPUTLINESIZE); if (!InputFile) break; TotalKeys ++; LineLength = INPUTLINESIZE; KeySegment = CalculateKeySegment(InputLine); char* Where; if (KeySegment >= Split[Pass] && KeySegment < Split[Pass+1]) { CurrentLength = BufferCharCount[KeySegment]; char* CurrentPosition = BufferOffset[KeySegment]+BigBuffer; char* EndOfBuffer = CurrentPosition + CurrentLength; for (Where = CurrentPosition; Where < EndOfBuffer; Where += LineLength) { CompareResult = memcmp(InputLine,Where,KeyLength); if (CompareResult < 0) break; } memmove(Where+LineLength,Where,EndOfBuffer-Where); memcpy(Where,InputLine,LineLength); BufferCharCount[KeySegment] += LineLength; } } OutputFile.write(BigBuffer,SplitTotalSize[Pass]); TotalWrites ++; sprintf(temp,"Finished distributing on pass %d",PassCount-Pass); timing(temp); } InputFile.close(); OutputFile.close(); printf("Total writes: %d\n", TotalWrites); end_timing(); return 0; }