SESC配置文件及测试
1.SESC配置文件解析
# 注释procsPerNode = 4 # 4 核cacheLineSize = 64 # cache块大小issue = 2 # 每个周期最多发射2条指令################################################################### cpucore 指定机器拥有四个相同的核,编号范围从0到3(上述procsPerNode定义为4),# 每个核的配置被描述在 [issueX]段.这些核可以通过板载mesh网络相互通信。##################################################################cpucore[0:$(procsPerNode)-1] = 'issueX'############################### 处理器的配置 ###############################[issueX]frequency = 1e9 #1GHz################################################################### inorder设为false说明这是一个 out-of-order核,并且fetches, issues, and# retires up to 2 instructions per cycle (参数“issue” 在前面已经设为2).##################################################################inorder = falsefetchWidth = $(issue)issueWidth = $(issue)retireWidth = $(issue)bb4Cycle = 1 # 一个周期内可以取得基本块的数量,即一个周期内基本上可以获取多少个分支maxIRequests = 4 # Max number of outstanding instruction requestsinterClusterLat = 2intraClusterLat = 1cluster[0] = 'FXClusterIssueX'//配置整数运算指令cluster[1] = 'FPClusterIssueX'//配置浮点数运算指令robSize = 64intRegs = 64fpRegs = 64bpred = 'BPredIssueX'//配置分支预测器enableICache = truedtlb = 'FXDTLB'itlb = 'FXITLB'dataSource = "DMemory DL1"instrSource = "IMemory IL1"OSType = 'dummy' # Memory OS, either Dummy or std# 整数功能单元[FXClusterIssueX]winSize = 12*$(Issue)+32 # number of entries in windowrecycleAt = 'Execute'schedNumPorts = 4schedPortOccp = 1wakeUpNumPorts= 4wakeUpPortOccp= 1wakeupDelay = 2schedDelay = 1 # Minimum latency like a intraClusterLatiStoreLat = 1iStoreUnit = 'LDSTIssueX'iLoadLat = 1iLoadUnit = 'LDSTIssueX'iALULat = 1iALUUnit = 'ALUIssueX'iBJLat = 1iBJUnit = 'ALUIssueX'iDivLat = 12iDivUnit = 'ALUIssueX'iMultLat = 4iMultUnit = 'ALUIssueX'[LDSTIssueX]Num = $(issue)/3+1Occ = 1[ALUIssueX]Num = $(issue)/3+1Occ = 1# 浮点数功能单元[FPClusterIssueX]winSize = 8*$(issue)recycleAt = 'Execute'schedNumPorts = 4schedPortOccp = 1wakeUpNumPorts= 4wakeUpPortOccp= 1wakeupDelay = 2schedDelay = 1 # Minimum latency like a intraClusterLatfpALULat = 1fpALUUnit = 'FPIssueX'fpMultLat = 8fpMultUnit = 'FPIssueX'fpDivLat = 20fpDivUnit = 'FPIssueX'[FPIssueX]Num = $(issue)/2+1Occ = 1# 分支预测器配置[BPredIssueX]###################################################################"hybrid" 实际上是一个联合的预测器, 配有一个元预测器(有2048个entries),# 每一个entry是一个饱和计数器。##################################################################type = "oracle" # hybrid-->taken-->oracleBTACDelay = 0 #Branch Taken ACcess Delay, 0表示执行时无障碍,非零表示一个分支常数延迟#下面的数据为不同的分支预测器所用l1size = 1l2size = 2*1024l2Bits = 1historySize = 8Metasize = 2*1024MetaBits = 2localSize = 2*1024localBits = 2btbSize = 256btbBsize = 1btbAssoc = 2btbReplPolicy = 'LRU'btbHistory = 0rasSize = 32# memory translation mechanism[FXDTLB]size = 64*8assoc = 4bsize = 8numPorts = 2replPolicy = 'LRU'deviceType = 'cache' # libcore/GMemorySystem, options: dummy, cache, icache, smpcache[FXITLB]size = 64*8assoc = 4bsize = 8numPorts = 2replPolicy = 'LRU'deviceType = 'cache'############################### MEMORY SUBSYSTEM ################################ instruction source--IL1[IMemory]deviceType = 'icache'size = 32*1024assoc = 4bsize = $(cacheLineSize)writePolicy = 'WB'replPolicy = 'LRU'protocol = 'DMESI'numPorts = 1portOccp = 1hitDelay = 1missDelay = 1 # this number is added to the hitDelay#displNotify = falseMSHR = "iMSHR"lowerLevel = "Router RTR sharedBy 1" # Format: [Type] [UserDefinedName] [ShareOption]sideLowerLevel = "" # Another lower level[iMSHR]type = 'single'size = 32bsize = $(cacheLineSize)# data source -- DL1[DMemory]deviceType = 'smpcache' # SMP类型的cachesize = 16*1024 # 可以存储32KBytes 的数据assoc = 4 # 4路组相联# 64byte block/line size (cacheLineSize在前面被定义为 64)bsize = $(cacheLineSize)writePolicy = 'WB' # a writeback cache (写策略)replPolicy = 'LRU' # 使用 LRU 置换策略protocol = 'DMESI'numPorts = 2 # 有两个端口,故一个周期可以处理2次访问portOccp = 1 # Number of occupancy per port. 0: UnlimitedPort, 1:FullyPipelinedPort, other value: PortPipehitDelay = 1 #命中时间需要一个周期missDelay = 1 # 未命中检测需要1个周期# 如果出现一个miss,处理器会使用DMSHR (data miss handling registers)结构跟踪这个miss,# DMSHR被描述在[DMSHR]段,其拥有一个64-entry结构,且每个entry可以跟踪整个64字节块的一个miss。# 在一个miss上,L1 cache或从一个本地核L2cache的slice上请求数据或通过onchip路由与其连接的其他核上# 的L2 slice上请求数据。MSHR = "DMSHR"lowerLevel = "Router RTR sharedBy 1"sideLowerLevel = "L2Slice L2S" # Another lower level[DMSHR]type = 'single' # Options: none, nodeps, full, single, banked Check libsuc/MSHRsize = 64bsize = $(cacheLineSize)[Router]deviceType = 'router'delay = 1numPorts = 2 # read / writeportOccp = 1dimX = $(NOCdim) # This needs to be the same with NOC dimensiondimY = $(NOCdim) # This needs to be the same with NOC dimensionlowerLevel ="NOC NOC shared"[NOC]deviceType = 'booksim'booksim_config = 'mesh22.booksim'booksim_output = 'booksim.log'booksim_sample = 1000000lowerLevel = "MemoryCtrl MemCtrl shared"[L2Slice] # L2 CachedeviceType = 'slicecache'inclusive = falsesize = 1*1024*1024 # 一个slice大小为 1 MB (所以在此, L2 cache 总大小是4MB),assoc = 16 # 16路组相联bsize = $(cacheLineSize) # 64字节块大小writePolicy = 'WB' # 回写策略replPolicy = 'LRU' # LRU置换策略numPorts = 2 # 2 ports,one for L1, one for snoopingportOccp = 1 # 一个cache的吞吐量hitDelay = 12 # 命中时间需要12cyclemissDelay = 12 # 检测一个miss需要12 cyclesnumPortsDir = 1portOccpDir = 1hitDelayDir = 1MSHR = 'L2MSHR' # 使用一个 64entry MSHR 去跟踪misses# 当出现一个miss时,该miss将被本地onchip路由器处理,本地onchip路由器使用板载网络 (NOC)# 传递消息给内存控制器。接着,内存控制器使用 offchip 处理器内存总线访问主存。主存被配置在# [Memory]段,其被模型化一个拥有200 cycle延迟的极大的cache。lowerLevel = "Router RTR sharedBy 1"[L2MSHR]size = 64type = 'single'bsize = $(cacheLineSize)[MemoryCtrl]deviceType = 'memoryController'numPorts = 8 # 8 channelportOccp = 1delay = 1lowerLevel = "MemoryBus MemoryBus"[MemoryBus]deviceType = 'bus'numPorts = 8portOccp = $(cacheLineSize) / 8 # assuming 8*8 Gbyte/sdelay = 5lowerLevel = "Memory Memory"[Memory]deviceType = 'niceCache'size = 64assoc = 1bsize = 64writePolicy = 'WB'replPolicy = 'LRU'numPorts = 1portOccp = 1hitDelay = 200missDelay = 10000MSHR = NoMSHRlowerLevel = 'voidDevice'[NoMSHR]type = 'none'size = 128bsize = 64[voidDevice]deviceType = 'void'############################# BEGIN MIPSEMUL #############################[FileSys]mount="/bin=/mipsroot/tools/bin:/lib=/mipsroot/tools/lib:/tools=/mipsroot/tools"
2.分支预测器的类型测试(hybrid–>taken–>oracle)
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1# File : sesc_lu.mipseb.b4QrwJ : Sat Apr 16 03:33:27 2016Exe Speed Exe MHz Exe Time Sim Time (1000MHz)389.963 KIPS 0.5564 MHz 0.830 secs 0.462 msecProc Avg.Time BPType Total RAS BPred BTB BTAC0 55.215 hybrid 91.05% ( 99.89% of 11.34%) 89.92% ( 95.81% of 57.74%) 0.00%nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 12.66% 302 inst/repl : ALUIssueX 2.80Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other0 0.70 461822 35.0 0.0 0.0 0.1 18.0 0.0 0.0 0.0 0.0 45.2 0.1 1.6################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 DL1 0.0 0.42% ( 0.1%, 0.3%) 92.46% 0.05GB/s : MemoryBus 0 MB/s :################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 IL1 0.0 0.45% ( 0.5%, 0.0%) 192.05% 0.11GB/s : MemoryBus 0 MB/s :# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1# File : sesc_lu.mipseb.ZOV04r : Sat Apr 16 03:40:03 2016Exe Speed Exe MHz Exe Time Sim Time (1000MHz)363.673 KIPS 0.5692 MHz 0.890 secs 0.507 msecProc Avg.Time BPType Total RAS BPred BTB BTAC0 26.849 taken 67.56% ( 99.89% of 11.34%) 63.42% ( 94.16% of 59.71%) 0.00%nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 5.48% 306 inst/repl : ALUIssueX 2.18Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other0 0.64 506617 31.9 0.0 0.0 0.1 4.1 0.0 0.0 0.0 0.0 62.8 0.0 1.1################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 DL1 0.0 0.39% ( 0.1%, 0.3%) 98.40% 0.04GB/s : MemoryBus 0 MB/s :################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 IL1 0.0 0.45% ( 0.4%, 0.0%) 193.55% 0.10GB/s : MemoryBus 0 MB/s :# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1# File : sesc_lu.mipseb.9o5lQb : Sat Apr 16 03:42:42 2016Exe Speed Exe MHz Exe Time Sim Time (1000MHz)404.586 KIPS 0.5474 MHz 0.800 secs 0.438 msecProc Avg.Time BPType Total RAS BPred BTB BTAC0 98.266 oracle 96.50% ( 99.89% of 11.34%) 96.07% ( 94.16% of 59.71%) 0.00%nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 12.80% 300 inst/repl : ALUIssueX 3.15Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other0 0.74 437914 37.0 0.0 0.0 0.1 21.2 0.0 0.0 0.0 0.0 39.6 0.1 2.1################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 DL1 0.0 0.42% ( 0.1%, 0.3%) 92.41% 0.05GB/s : MemoryBus 0 MB/s :################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 IL1 0.0 0.45% ( 0.5%, 0.0%) 192.02% 0.11GB/s : MemoryBus 0 MB/s :
3.指令发射宽度测试(issue:2–>4)
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1# File : sesc_lu.mipseb.W213wq : Sat Apr 16 04:01:07 2016Exe Speed Exe MHz Exe Time Sim Time (1000MHz)449.540 KIPS 0.5527 MHz 0.720 secs 0.398 msecProc Avg.Time BPType Total RAS BPred BTB BTAC0 67.584 hybrid 91.05% ( 99.89% of 11.34%) 89.92% ( 95.81% of 57.74%) 0.00%nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 12.68% 302 inst/repl : ALUIssueX 0.25Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other0 0.81 397915 20.3 0.0 0.0 0.0 29.1 0.0 0.0 0.0 0.0 48.9 0.1 1.6################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 DL1 0.0 0.42% ( 0.1%, 0.3%) 92.59% 0.05GB/s : MemoryBus 0 MB/s :################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 IL1 0.0 0.85% ( 0.9%, 0.0%) 100.64% 0.12GB/s : MemoryBus 0 MB/s :# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1# File : sesc_lu.mipseb.hwtxXj : Sat Apr 16 04:01:27 2016Exe Speed Exe MHz Exe Time Sim Time (1000MHz)431.559 KIPS 0.5780 MHz 0.750 secs 0.433 msecProc Avg.Time BPType Total RAS BPred BTB BTAC0 30.247 taken 67.56% ( 99.89% of 11.34%) 63.42% ( 94.16% of 59.71%) 0.00%nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 5.50% 305 inst/repl : ALUIssueX 0.22Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other0 0.75 433486 18.7 0.0 0.0 0.0 11.4 0.0 0.0 0.0 0.0 68.6 0.0 1.3################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 DL1 0.0 0.40% ( 0.1%, 0.3%) 97.37% 0.05GB/s : MemoryBus 0 MB/s :################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 IL1 0.0 0.83% ( 0.8%, 0.0%) 102.55% 0.11GB/s : MemoryBus 0 MB/s :# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1# File : sesc_lu.mipseb.Y8YKpM : Sat Apr 16 04:01:40 2016Exe Speed Exe MHz Exe Time Sim Time (1000MHz)469.086 KIPS 0.5336 MHz 0.690 secs 0.368 msecProc Avg.Time BPType Total RAS BPred BTB BTAC0 126.080 oracle 96.50% ( 99.89% of 11.34%) 96.07% ( 94.16% of 59.71%) 0.00%nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 12.93% 301 inst/repl : ALUIssueX 0.25Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other0 0.88 368151 22.0 0.0 0.0 0.0 33.4 0.0 0.0 0.0 0.0 42.6 0.1 1.9################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 DL1 0.0 0.42% ( 0.1%, 0.3%) 92.49% 0.06GB/s : MemoryBus 0 MB/s :################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 IL1 0.0 0.85% ( 0.9%, 0.0%) 100.45% 0.13GB/s : MemoryBus 0 MB/s :
4. Cache测试(size:32KB–>16KB,numPorts:2–>4)
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -oocean.out -eocean.err ocean.mipseb -n 258 -p 1# File : sesc_ocean.mipseb.KYbtOE : Sat Apr 16 05:31:48 2016Exe Speed Exe MHz Exe Time Sim Time (1000MHz)250.623 KIPS 0.4201 MHz 1956.200 secs 821.856 msecProc Avg.Time BPType Total RAS BPred BTB BTAC0 98.401 oracle 99.94% ( 99.99% of 0.32%) 99.94% ( 99.93% of 88.33%) 0.00%nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)0 490268438 4.61% 22.88% 6.18% 31.74% 34.59% : 0.80% 582959 inst/repl : LDSTIssueX 0.23Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other0 0.60 821855801 14.9 0.0 0.0 0.3 84.5 0.0 0.0 0.0 0.0 0.0 0.2 0.0################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 DL1 0.2 5.76% ( 4.7%, 1.1%) 124.25% 0.79GB/s : MemoryBus 0 MB/s :################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 IL1 0.0 0.00% ( 0.0%, 0.0%) 92.02% 0.00GB/s : MemoryBus 0 MB/s :# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -oocean.out -eocean.err ocean.mipseb -n 258 -p 1# File : sesc_ocean.mipseb.KAxWgJ : Sat Apr 16 07:19:24 2016Exe Speed Exe MHz Exe Time Sim Time (1000MHz)228.784 KIPS 0.4045 MHz 2142.930 secs 866.814 msecProc Avg.Time BPType Total RAS BPred BTB BTAC0 98.629 oracle 99.94% ( 99.99% of 0.32%) 99.94% ( 99.93% of 88.33%) 0.00%nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)0 490268438 4.61% 22.88% 6.18% 31.74% 34.59% : 0.80% 576786 inst/repl : LDSTIssueX 0.23Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other0 0.57 866813880 14.1 0.0 0.0 0.3 85.4 0.0 0.0 0.0 0.0 0.0 0.2 0.0################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 DL1 0.0 7.03% ( 5.8%, 1.2%) 125.16% 0.93GB/s : MemoryBus 0 MB/s :################################################################################Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...0 IL1 0.0 0.00% ( 0.0%, 0.0%) 92.02% 0.00GB/s : MemoryBus 0 MB/s :
5.利用异构多核运行程序
SESC本来大多用来研究SMP、CMP问题,很少用来研究AMP问题。但是徐友军师兄,对其配置文件及源码进行了一些更改,使得其可以用于研究AMP问题。
我们运行模拟器时需要使用配置文件(如前解析),使用功耗功能时需要执行
make sesc.conf和make power.conf命令:
make sesc.conf命令将源码中的模板配置文件sesc.conf和shared.conf复制到当前文件夹(sesc.conf会将shared.conf包含进来);make power.conf命令将生成wattchify和 cactify文件。- wattchify利用sesc.conf生成tmp.conf;
- cactify 利用tmp.conf生成power.conf。
- 最后删除tmp.conf。
正如前面所述,’issueX’段对核进行了配置,我们只需要先成一种核的power.conf文件(文件1),然后在另外一个文件夹中将issueX更改为其他标志(如issueY),然后修改里面相应段的配置,利用修改后的文件再生成另外一种核的power.conf文件(文件2),合并文件1和文件2,修改相同下标处(使得下标连贯)即可生成AMP的配置文件。
- 这里使用的配置文件如下(8核,四大核四小核)
#BEGIN Configuration used. Extracted from "S.conf":procsPerNode=1thermal ='SescTherm'gNetwork ='m3tnetwork'L2ll ="AdvMem MemBus shared"depth_S =2pageSize =4096memSizing_S=1STUnits =1technology='techParam'wattchDataCacheEnergy=1.065153e+00cpucore[0:4]='issueX_S'cpucore[5:8]='issueX_L'LDUnits =1nCPUs =9NoMigration=falseUseTLS =0traceMode ='qemusparc'AdvMemMap ='M3TMemMap'thermSpot ='SescSpot'issue_S =2floorplan ='layoutDescr'[LDIssueX_S]Num =1Occ =1[techParam]numberOfFanouts=1padCapacitance=1tech =70microstripLength=10clockTreeStyle='htree'skewBudget=20optimalNumberOfBuffer=3loadInClockNode=20randomLogicStyle=1loadCapacitance=1areaOfChip=200numberOfClusters=1numberOfioBufferStage=5numberOfFunctions=4numberofGates=30000numberOfFanins=4frequency =2.400000e+09[DataL1_S]portOccp =1blockName ='Dcache'missDelay =1RdHitEnergy=1.065153e+00WrHitEnergy=1.065153e+00size =32768assoc =4WrMissEnergy=2.130306e+00writePolicy='WB'hitDelay =2bsize =64lowerLevel="CommonBus Bus shared"MSHR ='DL1MSHR_S'replPolicy='RANDOM'skew =falsenumPorts =1RdMissEnergy=2.130306e+00deviceType='cache'[issueX_S]windowCheckEnergy=4.349804e-02instQueueSize=12maxStores =38btbEnergy =2.156033e-01intRegs =64iALUEnergy=3.417957e-01LSQBanks =1ldqCheckEnergy=1.044431e-01renameEnergy=1.732633e-01clockEnergy=3.142553e+00windowRdWrEnergy=2.224685e-01windowSelEnergy=5.741436e-03inorder =trueresultBusEnergy=4.949979e-02wrRegEnergy=1.833822e-01dataSource="DataL1_S DL1_S"areaFactor=2.562500e-01bb4Cycle =1robSize =100maxBranches=16totEnergy =1.306076e+01interClusterLat=2cluster ='FXClusterIssueX_S'cluster[1:1]='FPClusterIssueX_S'bpredEnergy=8.987931e-02forwardBusEnergy=4.949979e-02dtlb ='FXDTLB_S'issueWrongPath=truearchBits =32decodeDelay=3rasEnergy =0.000000e+00fpRegs =64minTLBMissDelay=16itlb ='FXITLB_S'stqRdWrEnergy=1.462314e+00fpALUEnergy=1.047439e+00instrSource="InstL1_S IL1_S"bpred ='BPredIssueX_S'fetchWidth=6maxLoads =42issueWidth=2rdRegEnergy=1.833822e-01robEnergy =6.256428e-02stqCheckEnergy=9.549810e-02retireWidth=2renameDelay=3stForwardDelay=1regFileDelay=3OSType ='std'maxIRequests=3ldqRdWrEnergy=1.429250e+00enableICache=truebpredDelay=1[L2Cache]bsize =64writePolicy='WB'blockName ='L2'hitDelay =10RdMissEnergy=7.798407e+00missDelay =4WrHitEnergy=3.899203e+00numPorts =1portOccp =1size =524288replPolicy='LRU'assoc =8WrMissEnergy=7.798407e+00RdHitEnergy=3.899203e+00MSHR ='MSHRL2_S'lowerLevel="AdvMem MemBus shared"deviceType='cache'[BestBPred]BTACDelay =0btbReplPolicy='LRU'btbAssoc =2tbits =5rasSize =0btbSize =2048type ='ogehl'tcbits =7tsize =2048mtables =6btbBsize =1[PBuff]hitDelay =3missDelay =2buffCache ='PBuffBuff_S'learnHitDelay=4streamCache='PBuffStream_S'maxStride =512deviceType='prefbuff'depth =1learnMissDelay=6lowerLevel="AdvMem MemBus shared"missWindow=16[SimParams]hotspotLogFile='scooreX.out'setBinaryLog=1sampleRate=10dtmUsed =0floorPlanFile='scooreX.flp'omitLateralR=0[TaskScalar]SyncOnRestart=3VersionSize=32bsize =64MFThreshold=4IDP ='IntPred1_S'MLThreshold=32[InstL1_S]hitDelay =2WrHitEnergy=1.135176e+00size =32768portOccp =1deviceType='icache'WrMissEnergy=2.270351e+00numPorts =2MSHR ='InstL1MSHR_S'RdHitEnergy=1.135176e+00lowerLevel="L2Cache L2 shared"assoc =2writePolicy='WB'RdMissEnergy=2.270351e+00replPolicy='LRU'bsize =64blockName ='Icache'missDelay =0[STIssueX_S]Occ =1Num =1[IntPred1_S]RdMissEnergy=1.303039e+00WrMissEnergy=1.303039e+00RdHitEnergy=6.515196e-01ReplPolicy='LRU'Assoc =32deviceType='cache'IDPnChildMax=4WrHitEnergy=6.515196e-01portOccp =1bSize =8size =256numPorts =1[BPredIssueX_S]btbAssoc =2btbBsize =1historySize=11btbReplPolicy='LRU'localSize =16384Metasize =16384BTACDelay =0localBits =2l2Bits =1btbSize =2048type ='hybrid'MetaBits =2rasSize =0bpred4Cycle=1l2size =16384l1size =1[FXDTLB_S]deviceType='tlb'numPorts =1replPolicy='LRU'RdMissEnergy=2.762933e+00WrHitEnergy=1.381467e+00RdHitEnergy=1.381467e+00bsize =8assoc =64size =512WrMissEnergy=2.762933e+00[PBuffStream_S]portOccp =3RdHitEnergy=1.052776e+00WrHitEnergy=1.052776e+00ReplPolicy='LRU'WrMissEnergy=2.105553e+00RdMissEnergy=2.105553e+00Assoc =16numPorts =2Size =128BSize =8deviceType='cache'[SescSpot]InterfaceMaterialThickness=7.500000e-05SpreaderThickness=1.000000e-03ConvectionCapacitance=1.404000e+02SpreaderLength=3.000000e-02DTMUsed =falseChipThickness=5.000000e-04HeatsinkThinkness=6.900000e-03ConvectionResistance=1.000000e-01HeatsinkLength=6.000000e-02DTMTempThreshhold=1.118000e+02[AdvMem]numPorts =1busWidth =64delay =1deviceType='bus'lowerLevel='BigMem_S'iopins =trueportOccp =32[BigMem_S]WrMissEnergy=0.000000e+00hitDelay =490MSHR ='BigMemMSHR_S'numPorts =1size =1024assoc =1deviceType='niceCache'bsize =64writePolicy='WB'RdMissEnergy=0.000000e+00WrHitEnergy=0.000000e+00RdHitEnergy=0.000000e+00lowerLevel='voidDevice'replPolicy='LRU'missDelay =1600000portOccp =1[layer1]thickness =1.000000e-03material ='Silicon'[AmbientTemperature]initialTemp=60offsetConstant=2.731500e+02ambientTemp=40[Virtual]specHeat =0conductance=0density =0alpha =0[FXClusterIssueX_S]iDivUnit ='ALUIssueX_S'schedNumPorts=4schedPortOccp=1recycleAt ='Execute'iLoadUnit ='LDIssueX_S'iMultUnit ='ALUIssueX_S'winSize =56iDivLat =207iMultLat =10iStoreUnit='STIssueX_S'schedDelay=1iLoadLat =1blockName ='IntWin'windowRdWrEnergy=5.560074e-01iALULat =1wakeUpNumPorts=4iBJUnit ='ALUIssueX_S'iALUUnit ='ALUIssueX_S'iBJLat =1wakeupDelay=3wakeUpPortOccp=1iStoreLat =1[FXITLB_S]size =256RdHitEnergy=6.696175e-01RdMissEnergy=1.339235e+00assoc =32deviceType='tlb'bsize =8WrHitEnergy=6.696175e-01numPorts =1replPolicy='LRU'WrMissEnergy=1.339235e+00[CommonBus]busWidth =32buffWCReqs=1deviceType='bus'lowerLevel="L2Cache L2 shared"delay =3portOccp =1numPorts =2busLength =7500[layer5]heat_sink_width=1.000000e-01heat_sink_resistance=1.300000e-01heat_sink_fins=5thickness =1.000000e-01heat_sink_height=1.000000e-01heat_sink =truematerial ='Copper'[ucoolConf]current =4.000000e-01seebeck =2.400000e-04coupledDevices=12Resistivity=2.857000e-06Height =1.000000e-04crossSection=5.000000e-02Width =1.000000e-04conductivity=3.000000e+00[PBuffBuff_S]ReplPolicy='LRU'RdMissEnergy=3.573363e+00deviceType='cache'numPorts =2Size =16384RdHitEnergy=1.786681e+00Assoc =4WrMissEnergy=3.573363e+00BSize =32WrHitEnergy=1.786681e+00portOccp =3[FPClusterIssueX_S]schedPortOccp=1blockName ='FPWin'schedNumPorts=4schedDelay=1fpDivUnit ='FP0IssueX_S'wakeUpPortOccp=1recycleAt ='Execute'fpMultUnit='FP0IssueX_S'fpDivLat =60windowRdWrEnergy=4.776964e-01fpMultLat =4wakeupDelay=3winSize =24fpALUUnit ='FP0IssueX_S'fpALULat =5wakeUpNumPorts=4[SescTherm]CyclesPerSample=20000TimeIncrement=2.500000e-01ucool ='ucoolConf'MeshResolutionSpreader=1.000000e-01MeshResolutionChip=3.000000e-03initialTemp=2.500000e+01MeshResolutionSink=5.000000e-01FanVelocity=2.000000e+00material[2:2]='Virtual'material[1:1]='Copper'material ='Silicon'ambientTemp=40layer[5:5]='layer5'layer[4:4]='layer4'layer[3:3]='layer3'layer[2:2]='layer2'layer[1:1]='layer1'[HeatSink]convecR =1.000000e-01convecC =1.404000e+02heatsinkThick=6.900000e-02heatsinkSide=6.000000e-02[MSHRL2_S]type ='full'bsize =64size =32[layer3]thickness =2.000000e-03material ='Copper'[HeatSpreader]spreaderThick=1.000000e-03spreaderSide=3.000000e-02[Copper]alpha =1.170000e-04density =8933conductance=401specHeat =385[BPredTaken]btbSize =1btbBsize =1rasSize =1btbReplPolicy='LRU'btbAssoc =1type ='Static'[Silicon]alpha =8.920000e-05density =2330specHeat =712conductance=148[FileSys]mount =''[ALUIssueX_S]Occ =1Num =2[BigMemMSHR_S]bsize =64size =32type ='none'[InterfaceMaterial]interfaceThick=7.500000e-05[FP0IssueX_S]Occ =1Num =1[InstL1MSHR_S]type ='full'size =4bsize =64[miscEnergy]combWriteEnergy=3.952196e-02[DL1MSHR_S]type ='full'size =32bsize =64[ChipSpecs]tempThreshold=1.118000e+02chipThickness=5.000000e-04[voidDevice]deviceType='void'#END Configuration used. Extracted from "S.conf":#BEGIN Configuration used. Extracted from "L.conf":procsPerNode=1thermal ='SescTherm'gNetwork ='m3tnetwork'L2ll ="AdvMem MemBus shared"pageSize =4096STUnits =3memSizing_L=1wattchDataCacheEnergy=1.065153e+00LDUnits =3nCPUs =4NoMigration=falseUseTLS =0traceMode ='qemusparc'AdvMemMap ='M3TMemMap'issue_L =3depth_L =3thermSpot ='SescSpot'floorplan ='layoutDescr'[FXDTLB_L]WrHitEnergy=1.246505e+00bsize =8assoc =64replPolicy='LRU'WrMissEnergy=2.493009e+00deviceType='tlb'numPorts =1RdHitEnergy=1.246505e+00RdMissEnergy=2.493009e+00size =512[layoutDescr]blockDescr="FPWin 0.0025 0.001 0.000 0.000"blockDescr[1:1]="FPReg 0.0025 0.001 0.0025 0.000"blockDescr[3:3]="IntWin 0.003 0.0015 0.0035 0.001"blockDescr[4:4]="IntReg 0.002 0.0015 0.0065 0.001"blockDescr[6:6]="FPRAT 0.0035 0.0005 0.000 0.0010"blockDescr[7:7]="IntRAT 0.0035 0.0010 0.000 0.0015"blockDescr[10:10]="ROB 0.0025 0.0015 0.0035 0.0025"blockDescr[11:11]="Icache 0.0045 0.0015 0.000 0.004"blockDescr[12:12]="Dcache 0.005 0.0015 0.005 0.004"blockDescr[13:13]="MSHR 0.0005 0.0015 0.0045 0.004"blockDescr[14:14]="L2 0.0100 0.0055 0.000 0.0055"blockDescr[2:2]="FPUnitMult 0.0015 0.001 0.0085 0.000"blockDescr[18:18]="FPUnitAlu 0.0020 0.001 0.0065 0.000"blockDescr[19:19]="FPUnitDiv 0.0015 0.001 0.005 0.000"blockDescr[5:5]="IntUnitMult 0.0015 0.0005 0.0085 0.0020"blockDescr[20:20]="IntUnitAlu 0.0015 0.0005 0.0085 0.0015"blockDescr[21:21]="IntUnitDiv 0.0015 0.0005 0.0085 0.001"blockDescr[9:9]="LDQ 0.002 0.0015 0.006 0.0025"blockDescr[17:17]="STQ 0.002 0.0015 0.008 0.0025"blockDescr[8:8]="RAS 0.0005 0.00075 0.003 0.00325"blockDescr[16:16]="Bpred 0.0030 0.00075 0.000 0.00325"blockDescr[15:15]="BTB 0.0035 0.00075 0.000 0.0025"blockMatch='Proc(0)_FPClusterIssueX'blockMatch[1:1]="Proc(0):rdFPRegEnergy Proc(0):wrFPRegEnergy"blockMatch[3:3]='Proc(0)_FXClusterIssueX*'blockMatch[4:4]="Proc(0):rdIRegEnergy Proc(0):wrIRegEnergy"blockMatch[6:6]='Proc(0):renameEnergy'blockMatch[7:7]='Proc(0):renameEnergy'blockMatch[10:10]='Proc(0):robEnergy'blockMatch[11:11]="P(0)_IL1* P(0)_ITLB*"blockMatch[12:12]='P(0)_DTLB*'blockMatch[13:13]="P(0)_DL1_MSHR P(0)_PBuff"blockMatch[14:14]="L2 niceCache"blockMatch[2:2]='Cluster(0):fpMult'blockMatch[18:18]='Cluster(0):fpALU'blockMatch[19:19]='Cluster(0):fpDiv'blockMatch[5:5]='Cluster(0):iMult'blockMatch[20:20]='Cluster(0):iALU'blockMatch[21:21]='Cluster(0):iDiv'blockMatch[9:9]="FULoad(0) FUMemory(0)"blockMatch[17:17]="FUStore(0) FUMemory(0)"blockMatch[8:8]='BPred(0)_RAS'blockMatch[16:16]='BPred(0)_hybrid'blockMatch[15:15]='BPred(0)_BTB'[layer4]heat_spreader_height=2.000000e-02material ='Copper'thickness =2.000000e-02heat_spreader_width=2.000000e-02heat_spreader=true[issueX_L]retireWidth=3renameDelay=3OSType ='std'dataSource="DataL1_L DL1_L"resultBusEnergy=1.528119e-01windowSelEnergy=1.107277e-02bpredDelay=1issueWrongPath=truerenameEnergy=2.000427e-01minTLBMissDelay=16interClusterLat=2stForwardDelay=1fpRegs =80forwardBusEnergy=1.528119e-01bb4Cycle =1robEnergy =7.539995e-01wrRegEnergy=3.438024e-01maxIRequests=3areaFactor=5.687500e-01robSize =128maxLoads =48decodeDelay=3itlb ='FXITLB_L'inorder =falseintRegs =96instrSource="InstL1_L IL1_L"regFileDelay=3stqCheckEnergy=1.044431e-01cluster ='FXClusterIssueX_L'cluster[1:1]='FPClusterIssueX_L'dtlb ='FXDTLB_L'windowCheckEnergy=8.454319e-02ldqCheckEnergy=1.178606e-01enableICache=truebpred ='BPredIssueX_L'rasEnergy =0.000000e+00stqRdWrEnergy=1.507999e+00ldqRdWrEnergy=1.474934e+00totEnergy =1.464537e+01instQueueSize=12rdRegEnergy=3.438024e-01fpALUEnergy=1.047439e+00maxStores =42bpredEnergy=8.987931e-02btbEnergy =2.156033e-01issueWidth=3iALUEnergy=3.417957e-01fetchWidth=6clockEnergy=3.823849e+00maxBranches=22windowRdWrEnergy=3.958168e-01archBits =32LSQBanks =1[IntPred1_L]numPorts =1WrHitEnergy=5.346558e-01WrMissEnergy=1.069312e+00Assoc =32size =256RdHitEnergy=5.346558e-01IDPnChildMax=4bSize =8ReplPolicy='LRU'portOccp =1RdMissEnergy=1.069312e+00deviceType='cache'[FXClusterIssueX_L]iMultUnit ='ALUIssueX_L'iMultLat =8wakeUpPortOccp=1recycleAt ='Execute'wakeupDelay=3schedNumPorts=4iLoadLat =1schedPortOccp=1iStoreLat =1iBJUnit ='ALUIssueX_L'iLoadUnit ='LDIssueX_L'iDivLat =23iDivUnit ='ALUIssueX_L'iALUUnit ='ALUIssueX_L'blockName ='IntWin'winSize =68iALULat =1schedDelay=1iBJLat =1iStoreUnit='STIssueX_L'windowRdWrEnergy=6.876807e-01wakeUpNumPorts=4[FPClusterIssueX_L]winSize =32recycleAt ='Execute'fpDivLat =21schedPortOccp=1wakeUpNumPorts=4fpMultUnit='FP0IssueX_L'fpALULat =3schedNumPorts=4blockName ='FPWin'fpALUUnit ='FP0IssueX_L'fpDivUnit ='FP0IssueX_L'windowRdWrEnergy=5.641608e-01fpMultLat =5schedDelay=1wakeUpPortOccp=1wakeupDelay=3[DataL1_L]bsize =64replPolicy='RANDOM'portOccp =1MSHR ='DL1MSHR_L'numPorts =1size =32768RdMissEnergy=2.130306e+00WrHitEnergy=1.065153e+00assoc =4hitDelay =2missDelay =1WrMissEnergy=2.130306e+00lowerLevel="CommonBus Bus shared"blockName ='Dcache'RdHitEnergy=1.065153e+00deviceType='cache'skew =falsewritePolicy='WB'[FXITLB_L]WrHitEnergy=6.515196e-01bsize =8RdMissEnergy=1.303039e+00numPorts =1deviceType='tlb'RdHitEnergy=6.515196e-01assoc =32size =256replPolicy='LRU'WrMissEnergy=1.303039e+00[BPredIssueX_L]l2Bits =1btbReplPolicy='LRU'localBits =2BTACDelay =0l1size =1localSize =16384btbSize =2048rasSize =0btbBsize =1MetaBits =2bpred4Cycle=1Metasize =16384historySize=11l2size =16384type ='hybrid'btbAssoc =2[DL1MSHR_L]size =32type ='full'bsize =64[BigMemMSHR_L]type ='none'size =32bsize =64[InstL1_L]blockName ='Icache'missDelay =0lowerLevel="L2Cache L2 shared"RdHitEnergy=1.135176e+00portOccp =1MSHR ='InstL1MSHR_L'hitDelay =2assoc =2RdMissEnergy=2.270351e+00deviceType='icache'size =32768bsize =64WrMissEnergy=2.270351e+00writePolicy='WB'WrHitEnergy=1.135176e+00replPolicy='LRU'numPorts =2[BigMem_L]RdMissEnergy=0.000000e+00assoc =1WrMissEnergy=0.000000e+00bsize =64portOccp =1RdHitEnergy=0.000000e+00hitDelay =490numPorts =1MSHR ='BigMemMSHR_L'replPolicy='LRU'lowerLevel='voidDevice'missDelay =1600000WrHitEnergy=0.000000e+00deviceType='niceCache'size =1024writePolicy='WB'[PBuffStream_L]BSize =8RdMissEnergy=2.105553e+00numPorts =2ReplPolicy='LRU'deviceType='cache'WrHitEnergy=1.052776e+00Assoc =16WrMissEnergy=2.105553e+00RdHitEnergy=1.052776e+00portOccp =3Size =128[PBuffBuff_L]Assoc =4WrMissEnergy=3.573363e+00WrHitEnergy=1.786681e+00RdMissEnergy=3.573363e+00ReplPolicy='LRU'deviceType='cache'BSize =32portOccp =3Size =16384numPorts =2RdHitEnergy=1.786681e+00[MSHRL2_L]bsize =64type ='full'size =32[STIssueX_L]Occ = 1Num = 3[LDIssueX_L]Occ =1Num =3[InstL1MSHR_L]size =4type ='full'bsize =64[layer2]material ='Virtual'thickness =0[ALUIssueX_L]Num = 8Occ = 1[FP0IssueX_L]Num =1Occ =1
- 这里使用的基于SESC API的多进程程序如下:
#include <unistd.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include"sescapi.h"struct thread_data{int argc;char **argv;int done;};int basicmath_main(void *);int bitcount_main(void *);int cjpeg_main(void *);int djpeg_main(void *);int dijkstra_large_main(void *);int patricia_main(void *);int stringsearch_large_main(void *);int sha_main(void *);int rawdaudio_main(void *);int rawcaudio_main(void *);int CRC32_main(void *);int fft_main(void *);int fft_main(void *);#define numP 9int main(int argc, char *argv[]) {int i,j,k;struct thread_data ** thread_args = (struct thread_data**)malloc(numP * sizeof(struct thrad_data*));for(j = 0; j < numP; j++){thread_args[j] = (struct thread_data *) malloc(sizeof(struct thread_data));}i = 0 ;thread_args[i]->argc = 8;thread_args[i]->argv = (char **)malloc(8*sizeof(char *));thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[0],"cjpeg_mian",20) ;thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[1],"-dct",20);thread_args[i]->argv[2] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[2],"int",20);thread_args[i]->argv[3] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[3],"-progressive",20);thread_args[i]->argv[4] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[4],"-opt",20);thread_args[i]->argv[5] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[5],"-outfile",20);thread_args[i]->argv[6] = (char *)malloc(30 *sizeof(char));memcpy(thread_args[i]->argv[6],"output_large_encode.jpeg",30);thread_args[i]->argv[7] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[7],"input_large.ppm",20);i = 1 ;thread_args[i]->argc = 2;thread_args[i]->argv = (char **)malloc(2*sizeof(char *));thread_args[i]->argv[0] = (char *)malloc(30 *sizeof(char));memcpy(thread_args[i]->argv[0],"dijkstra_large_mian",30);thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[1],"input.dat",20);i = 2 ;thread_args[i]->argc = 2;thread_args[i]->argv = (char **)malloc(2*sizeof(char *));thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[0],"patricia_mian",20);thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[1],"large.udp",20);i = 3 ;thread_args[i]->argc = 1;thread_args[i]->argv = (char **)malloc(sizeof(char *));thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[0],"search_large_mian",20);i = 4 ;thread_args[i]->argc = 2;thread_args[i]->argv = (char **)malloc(2*sizeof(char *));thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[0],"sha_mian",20);thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[1],"input_large.asc",20);i = 5 ;thread_args[i]->argc = 1;thread_args[i]->argv = (char **)malloc(2*sizeof(char *));thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[0],"rawcaudio_mian",20);i = 6 ;thread_args[i]->argc = 3;thread_args[i]->argv = (char **)malloc(3*sizeof(char *));thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[0],"fft_mian",20);thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[1],"8",20);thread_args[i]->argv[2] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[2],"32768",20);i = 7 ;thread_args[i]->argc = 4;thread_args[i]->argv = (char **)malloc(4*sizeof(char *));thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[0],"fft_mian",20);thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[1],"8",20);thread_args[i]->argv[2] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[2],"32768",20);thread_args[i]->argv[3] = (char *)malloc(20 *sizeof(char));memcpy(thread_args[i]->argv[3],"-i",20);i = 0 ;sesc_spawn((void*)cjpeg_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);i = 1 ;sesc_spawn((void*)dijkstra_large_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);i = 2 ;sesc_spawn((void*)patricia_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);i = 3 ;sesc_spawn((void*)stringsearch_large_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);i = 4 ;sesc_spawn((void*)sha_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);i = 5 ;sesc_spawn((void*)rawcaudio_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);i = 6 ;sesc_spawn((void*)fft_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);i = 7 ;sesc_spawn((void*)fft_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);}
- 上述程序运行结果如下:
# Bench : ./sesc.mem -ccombina.conf -w100000 combina_1212# File : sesc_combina_1212.v6kaqg : Sun Apr 17 10:27:59 2016Exe Speed Exe MHz Exe Time Sim Time (2400MHz)1369.351 KIPS 0.7969 MHz 549.180 secs 182.350 msec (rabbit)Proc Avg.Time BPType Total RAS BPred BTB BTAC1 156.985 hybrid 59.80% (100.00% of 9.04%) 55.80% ( 59.87% of 58.62%) 0.00%2 230.504 hybrid 99.18% (100.00% of 0.77%) 99.18% ( 98.60% of 36.16%) 0.00%3 236.339 hybrid 47.88% (100.00% of 8.02%) 43.33% ( 41.61% of 32.31%) 0.00%4 123.601 hybrid 92.16% (100.00% of 3.23%) 91.89% ( 88.71% of 42.08%) 0.00%5 209.454 hybrid 54.61% (100.00% of 7.59%) 50.88% ( 45.81% of 32.37%) 0.00%6 976.000 hybrid 25.00% ( 0.00% of 0.00%) 25.00% ( 0.00% of 12.50%) 0.00%7 35.086 hybrid 86.45% (100.00% of 8.55%) 85.18% ( 78.66% of 51.11%) 0.00%8 35.870 hybrid 87.05% (100.00% of 8.92%) 85.78% ( 79.32% of 47.09%) 0.00%nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)1 11096 16.05% 18.56% 15.97% 49.41% 0.01% : 0.24% ???? inst/repl : LDIssueX_S 0.452 67271313 10.80% 20.26% 4.67% 64.28% 0.00% : 0.10% ???? inst/repl : LDIssueX_S 0.023 2063 20.55% 19.24% 15.08% 45.08% 0.05% : 2.27% ???? inst/repl : LDIssueX_S 0.394 9735226 15.35% 22.02% 11.69% 50.93% 0.00% : 0.68% ???? inst/repl : LDIssueX_S 0.145 2645 20.91% 20.79% 14.44% 43.82% 0.04% : 13.82% 1322 inst/repl : STIssueX_L 0.046 56 14.29% 5.36% 25.00% 53.57% 1.79% : 66.67% ???? inst/repl : STIssueX_L 0.047 408228696 15.31% 24.60% 12.35% 41.18% 6.56% : 33.62% 88 inst/repl : FP0IssueX_L 0.258 266768970 14.17% 24.68% 11.59% 39.45% 10.11% : 29.07% 150 inst/repl : FP0IssueX_L 0.25Proc IPC Active Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other1 0.08 0.03 133191 4.2 0.0 0.0 56.1 0.0 0.0 0.0 4.5 0.0 35.1 0.0 0.12 0.16 93.22 407946995 8.2 0.0 0.0 91.4 0.0 0.0 0.0 0.0 0.0 0.3 0.0 0.03 0.03 0.01 59650 1.7 0.0 0.0 29.3 0.0 0.0 0.0 8.0 0.0 60.9 0.0 0.04 0.18 12.43 54394504 8.9 0.0 0.0 87.5 0.0 0.0 0.0 0.0 0.0 3.5 0.0 0.05 0.05 0.01 58207 1.5 0.0 0.0 0.1 0.0 0.0 0.0 8.1 0.0 90.2 0.0 0.16 0.01 0.00 6935 0.3 0.0 0.0 0.0 0.0 0.0 0.0 52.3 0.0 47.4 0.0 0.07 0.93 100.00 437640780 31.1 2.9 0.0 1.9 6.5 0.3 0.0 0.2 0.0 54.3 0.1 2.68 0.97 62.79 274810653 32.4 4.8 0.0 3.1 3.8 0.0 0.0 0.4 0.0 53.0 0.1 2.4################################################################################Proc CacheName LVID revLVID Energy : ...################################################################################Proc Fetch Issue Mem Exec Clock Total (watts)0 0.001 0.000 0.087 0.000 1.582 1.6701 0.001 0.000 0.001 0.000 1.571 1.5732 0.086 0.393 0.273 0.524 1.725 3.0003 0.001 0.000 0.001 0.000 1.571 1.5734 0.014 0.057 0.051 0.094 1.597 1.8135 0.001 0.000 0.001 0.000 1.912 1.9146 0.001 0.000 0.001 0.000 1.912 1.9147 0.616 7.048 1.799 5.174 3.823 18.4588 0.390 4.609 1.186 3.377 3.160 12.722Total 1.109 12.106 3.399 9.169 18.854 44.636
6.下周任务
- 对SESC的使用进一步学习;
- 对SESC进行AMP试验进一步了解;
- 向师兄请教SESC支持AMP需要修改哪些文件,初步涉猎源码;
- 师兄所给资料信息量巨大,需要经自己整理慢慢消化。