// -*- mode: C++ -*- // This header file is devided 5 files originally. // So, the author wants that the reader divide this file appropriately. // The concatination point include Emacs Header like " -*- mode: C++ -*- " #include #include #include "interface.h" #ifndef __BASE_H__ #define __BASE_H__ #define ASSERT(X) //#define ASSERT(X) assert(X) static const int BLK_WIDTH = 6; static const int IDX_WIDTH = 8; static const int TAG_WIDTH = 18; static const int BLK_BITS = BLK_WIDTH; static const int IDX_BITS = IDX_WIDTH + BLK_BITS; static const int TAG_BITS = TAG_WIDTH + IDX_BITS; static const CacheAddr_t BLK_SIZE = ((CacheAddr_t)1) << BLK_BITS; static const CacheAddr_t IDX_SIZE = ((CacheAddr_t)1) << IDX_BITS; static const CacheAddr_t TAG_SIZE = ((CacheAddr_t)1) << TAG_BITS; static const CacheAddr_t BLK_MASK = (((CacheAddr_t)1) << BLK_WIDTH)-1; static const CacheAddr_t IDX_MASK = (((CacheAddr_t)1) << IDX_WIDTH)-1; static const CacheAddr_t TAG_MASK = (((CacheAddr_t)1) << TAG_WIDTH)-1; static const int PREF_MSHR_SIZE = 32; #endif // -*- mode: C++ -*- // // Default MSHR (16 entries) // We assume that this MSHR is implemente as a part of Toolkit. // So, we do not count this module as a 32K bit prefetch storage. // // #include "interface.h" // #include "base.h" #ifndef __MSHR_H__ #define __MSHR_H__ static const int MSHR_SIZE = 16; class MSHR_Entry{ private: public: // Budget Count // Valid bit : 1 bit // Address bit: 26 bit // Total Count: 27 bit bool Valid; CacheAddr_t Addr; public: // Budget() returns an amount of storage for // implementing prefetch algorithm. int Budget(){ return 0; } // Initialize MSHR entry void Init(){ Valid = false; Addr = (CacheAddr_t)0; } // Returns hit or miss with MSHR entry. bool Hit(CacheAddr_t addr) { // NULL pointer handling if(addr==0) return true; // Check Address addr ^= Addr; addr &= ~BLK_MASK; return Valid && (addr == (CacheAddr_t)0); } // Assign Entry void Entry(CacheAddr_t addr){ Valid = true; Addr = addr; } // Housekeeping for MSHR entry void Housekeeping(COUNTER cycle){ // When V bit is not assigned, this entry is free. ASSERT(Valid); // When GetPrefetchBit() returns a not negative value, // the fill of the entry will be finished. // In realistic processor, this judge should not based on prefetch bit. // (This might be done with the setting of the VALID bit of a tag array) // So, it is not so high complexity. if(GetPrefetchBit(1, Addr) >= 0) { // Release MSHR entry Valid = false; } } }; class MissStatusHandlingRegister { private: // MSHR Entry Size is 16 int ptr; // 4 bits (Added control register) MSHR_Entry mshr[MSHR_SIZE]; // 27 bits x 16 (But only 16 bits are counted as a budget) // Total Budget Count: 436 // Total Budget Count for Contest Storage: 20 bits public: MissStatusHandlingRegister() { ptr = 0; for(int i=0; i= 0) { // Release MSHR entry Valid = false; } } }; class PrefetchMissStatusHandlingRegister { private: // Total Budget Size : 901 bits // MSHR Entry Size is 32 int ptr; // 5 bits PrefetchMissStatusHandlingRegisterEntry mshr[PREF_MSHR_SIZE]; // 28 bits x 32 public: PrefetchMissStatusHandlingRegister() { ptr = 0; for(int i=0; iSearch(mshr[p].Addr)) { mshr[p].Init(); } else { mshr[p].IssuePrefetch(cycle); } return; } } } }; #endif // -*- mode:c++ -*- /////////////////////////////////////////////////////////////// // Adaptive Stream Prefetcher // // One of novel stream prefetcher // This prefetcher is used as a L1 prefetcher in our work. // // About detail of this prefetcher refer to following paper. // "Memory Prefetching Using Adaptive Stream Detection (Micro2006)" // Ibrahim Hur, Calvin Lin /////////////////////////////////////////////////////////////// #ifndef __ADAPTIVE_H__ #define __ADAPTIVE_H__ // #include "interface.h" // #include "base.h" static const int LIFETIME = 1023; static const int LHT_SIZE = 16; static const int SLOTSIZE = 16; static const COUNTER EPOCH_MASK = ((COUNTER)1 << 16) - 1; class AdaptiveStreamFilterSlot { private: // Budget Size // Address : 26 bit // Lifetime : 10 bit // Length : 4 bit // Direction : 1 bit // Valid : 1 bit public: CacheAddr_t Address; int Lifetime; int Length; bool Direction; // T->Positive, F->Negative bool Valid; AdaptiveStreamFilterSlot( ) { Valid = false; } void Init() { Valid = false; } void Assign(CacheAddr_t addr) { ASSERT(!(addr & BLK_MASK)); Address = addr; Lifetime = LIFETIME; Length = 0; Valid = true; Direction = true; } // // Hit / Miss judge // // When the stream was hit, // the address was already accessed position or // the address was neighbor last accessed address. // bool Hit(CacheAddr_t addr) { ASSERT(!(addr & BLK_MASK)); if(!Valid) return false; if(Length == 0) { return (addr == Address) || ((addr + BLK_SIZE) == Address) || ((addr - BLK_SIZE) == Address) ; } else { if(Direction) { CacheAddr_t BaseAddress = Address - Length * BLK_SIZE; return (BaseAddress <= addr && (Address + BLK_SIZE) >= addr); } else { CacheAddr_t BaseAddress = Address + Length * BLK_SIZE; return (BaseAddress >= addr && (Address - BLK_SIZE) <= addr); } } } // // Update Slot // // Return Value // // 0 : Missed // 1-7: 1st-7th stream address was detected // -1 : Already accessed position was detected. // int Update(CacheAddr_t addr) { ASSERT(!(addr & BLK_MASK)); if(!Hit(addr)) return 0; if(Length == 0) { if((Address - BLK_SIZE) == addr) { Direction = false; } } CacheAddr_t StreamAddr = Address + (Direction ? 1 : -1) * BLK_SIZE; if(StreamAddr == addr) { Lifetime = LIFETIME; Address = addr; Length = Length + 1; return Length; } return -1; } // Housekeeping Slot Info. void Housekeeping( ) { if(Valid) { // Too long stream is released since // prefetcher cannot handling such stream. if(Length >= LHT_SIZE) { Valid = false; return; } // When lifetime became 0, // The entry is released. if(Lifetime == 0) { Valid = false; return; } Lifetime--; ASSERT(Lifetime >= 0); } } }; class AdaptiveStreamPrefetcher { private: // Budget Count // Total Count = 672 + 1024 = 1696 bit // 16 bit * 2 direction * 16 entry * 2 series = 1024 bit int LHTnext[LHT_SIZE][2]; // Each counter holds 16 bit data int LHTcurr[LHT_SIZE][2]; // Each counter holds 16 bit data // 42 bit * 16 = 672 bit AdaptiveStreamFilterSlot slot[SLOTSIZE]; public: AdaptiveStreamPrefetcher() { for(int i=0; i 0){ if(idx <= LHT_SIZE) { LHTnext[idx][dir] += 1; for(int j=idx; j= 2 * LHTcurr[j][dir]) && (LHTcurr[idx-0][dir] < 2 * LHTcurr[j][dir]) ) { IssuePrefetchSub(cycle,addr+(j-idx)*(dir?-1:1)*BLK_SIZE); } } } } return; } } // When the searching was failed, // prefetcher try to assign new slot for this request. if(!hit) { for(int i=0; i> IDX_BITS) & TAG_MASK; } CacheAddr_t GenIdx(CacheAddr_t addr){ return (addr >> BLK_BITS) & IDX_MASK; } class MemoryAccessMap { public: int LRU; // 6 bits LRU information COUNTER LastAccess; // Timer(16 bits) COUNTER NumAccess; // Access counter(4 bits) CacheAddr_t Tag; // 18 bit address tag(18 bits) // 2 bit state machine x 256 = 512 bit Map enum MemoryAccessMapState AccessMap[MAP_Size]; // Total Budget Count : 556 bits public: ///////////////////////////////////////////////////// // Constructer & Copy Constructer ///////////////////////////////////////////////////// MemoryAccessMap(CacheAddr_t t=0) : Tag(t) { } MemoryAccessMap(const MemoryAccessMap& ent) : Tag(ent.Tag) { } ~MemoryAccessMap(){} ///////////////////////////////////////////////////// // Read prefetch infomation ///////////////////////////////////////////////////// bool Hit(CacheAddr_t addr) { return Tag == GenTag(addr); } bool AlreadyAccess(CacheAddr_t addr) { return (Hit(addr) && (AccessMap[GenIdx(addr)] != INIT)); } bool PrefetchHit(CacheAddr_t addr) { return (Hit(addr) && (AccessMap[GenIdx(addr)] == INIT)); } void Read(enum MemoryAccessMapState *ReadMap) { for(int i=0; i= 15) { NumAccess = 8; LastAccess = LastAccess >> 1; } else { NumAccess++; } return; } ///////////////////////////////////////////////////// // Profiling methods ///////////////////////////////////////////////////// int NumInit() { int Succ = 0; for(int i=0; i req ? req : max; } ///////////////////////////////////////////////////// // Housekeeping ///////////////////////////////////////////////////// // Updates some counters. void Housekeeping(COUNTER cycle){ LastAccess++; if(LastAccess > 65536) { LastAccess = 65536; } } }; class MemoryAccessMapTable { public: // These flags uses 3 bits bool Aggressive; bool SaveEntry; bool ConflictAvoid; // These counter uses 128 bits int NeedEntry; int Conflict; int PrefFail; int PrefSucc; // 54 entries table (556 x 52) MemoryAccessMap Table[NUM_SET][NUM_WAY]; // Total Budget Count = 29147 bits ///////////////////////////////////////////// // Constructor & Copy Constructor ///////////////////////////////////////////// MemoryAccessMapTable() { NeedEntry = 64; Conflict = 16; SaveEntry = false; ConflictAvoid = false; Aggressive = false; for(int Idx=0; Idx= MAP_Size && index < (2*MAP_Size)) || !passivemode)); } ///////////////////////////////////////////// // Read & Update memory access map ///////////////////////////////////////////// // Read Memory Access Map from Table MemoryAccessMap* AccessEntry(COUNTER cycle, CacheAddr_t addr) { int Oldest = 0; int Idx = GenTag(addr) % NUM_SET; ASSERT(Idx >= 0 && Idx < NUM_SET); for(int Way=0; Way Table[Idx][Oldest].LRU) { Oldest = Way; } } ASSERT(Table[Idx][Oldest].LRU == (NUM_WAY-1)); // Find Entry for(int i=0; i= 0 && Idx < NUM_SET); for(int Way=0; Way There are not enough Entry Size // 2. L2 Miss && Already Accessed && MSHR Miss (Finished filling) // -> L2 Cache conflict miss is detected. // // Our prefetcher detect these situation and profiling these case. // When the frequenty exceed some threshold, // the profiler changes the prefetch mode. ////////////////////////////////////////////////////////////////// bool PF_succ = GetPrefetchBit(1, addr) == 1; // Prefetch successed bool L2_miss = GetPrefetchBit(1, addr) < 0; // L2 miss detection UnSetPrefetchBit(1, addr); // Clear Prefetch Bit bool NE_INC = PF_succ && ent_m->PrefetchHit(addr); bool CF_INC = L2_miss && ent_m->AlreadyAccess(addr) && !MSHR_hit; NeedEntry += NE_INC ? 1 : 0; Conflict += CF_INC ? 1 : 0; ////////////////////////////////////////////////////////////////// // Prefetching Part... ////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////// // Read Entry (Read & Concatenate Accessed Maps) ////////////////////////////////////////////////////////////////// for(int i=0; iRead(&ReadMap[0 * MAP_Size]); if(ent_m) ent_m->Read(&ReadMap[1 * MAP_Size]); if(ent_h) ent_h->Read(&ReadMap[2 * MAP_Size]); ////////////////////////////////////////////////////////////////// // Generating Forwarding Prefetch ////////////////////////////////////////////////////////////////// if(ConflictAvoid) { NumFwdPrefTmp = 2; } else { NumFwdPrefTmp = 2 + (ent_m ? ent_m->MaxAccess() : 0); + (PassiveMode && ent_l ? 0 : ent_l->MaxAccess()); } // This block should be implemented in simple shifter for(int i=0; iMaxAccess() : 0); + (PassiveMode && ent_h ? 0 : ent_h->MaxAccess()); } for(int i=0; iUpdate(cycle, addr); ////////////////////////////////////////////////////////////////// // Setting Output Values (Copy to Arguments) ////////////////////////////////////////////////////////////////// *NumFwdPref = NumFwdPrefTmp; *NumBwdPref = NumBwdPrefTmp; for(int i=0; i 8 * prefetch fail count) is detected. // If this mode is enabled, // the prefetcher assume PREFETCH state as the ACCESS state. if((cycle & (((COUNTER)1 << 18)-1)) == 0) { if(PrefSucc > AP_THRESHOLD * PrefFail) { // printf("Aggressive Prefetch Mode\n"); Aggressive = true; } else if(PrefSucc < (AP_THRESHOLD/2) * PrefFail) { // printf("Normal Prefetch Mode\n"); Aggressive = false; } PrefSucc >>= 1; PrefFail >>= 1; } // Profiling Execution Status // This mode is enabled when the replacement of an access table entry // occurs frequently. When this mode is enabled, // the prefetcher stops reading neighbor entries. // This feature reduces the unprefered access entry replacements. if((cycle & (((COUNTER)1 << 18)-1)) == 0) { if(NeedEntry > SE_THRESHOLD) { // printf("Save Entry Mode\n"); SaveEntry = true; } else if(NeedEntry > (SE_THRESHOLD/4)) { // printf("Normal Entry Mode\n"); SaveEntry = false; } NeedEntry >>= 1; } // Profiling Execution Status // This mode is enabled when the L2 conflict misses are detected frequentry. // When the conflict avoidance mode is enabled, // the prefetcher reduces the number of prefetch requests. if((cycle & (((COUNTER)1 << 18)-1)) == 0) { if(Conflict > CA_THRESHOLD) { // printf("Conflict Avoidance Mode\n"); ConflictAvoid = true; } else if(Conflict > (CA_THRESHOLD/4)) { // printf("Normal Conflict Mode\n"); ConflictAvoid = false; } Conflict >>= 1; } } }; #endif