SESC配置文件及测试
1.SESC配置文件解析
# 注释
procsPerNode = 4 # 4 核
cacheLineSize = 64 # cache块大小
issue = 2 # 每个周期最多发射2条指令
##################################################################
# cpucore 指定机器拥有四个相同的核,编号范围从0到3(上述procsPerNode定义为4),
# 每个核的配置被描述在 [issueX]段.这些核可以通过板载mesh网络相互通信。
##################################################################
cpucore[0:$(procsPerNode)-1] = 'issueX'
##############################
# 处理器的配置 #
##############################
[issueX]
frequency = 1e9 #1GHz
##################################################################
# inorder设为false说明这是一个 out-of-order核,并且fetches, issues, and
# retires up to 2 instructions per cycle (参数“issue” 在前面已经设为2).
##################################################################
inorder = false
fetchWidth = $(issue)
issueWidth = $(issue)
retireWidth = $(issue)
bb4Cycle = 1 # 一个周期内可以取得基本块的数量,即一个周期内基本上可以获取多少个分支
maxIRequests = 4 # Max number of outstanding instruction requests
interClusterLat = 2
intraClusterLat = 1
cluster[0] = 'FXClusterIssueX'//配置整数运算指令
cluster[1] = 'FPClusterIssueX'//配置浮点数运算指令
robSize = 64
intRegs = 64
fpRegs = 64
bpred = 'BPredIssueX'//配置分支预测器
enableICache = true
dtlb = 'FXDTLB'
itlb = 'FXITLB'
dataSource = "DMemory DL1"
instrSource = "IMemory IL1"
OSType = 'dummy' # Memory OS, either Dummy or std
# 整数功能单元
[FXClusterIssueX]
winSize = 12*$(Issue)+32 # number of entries in window
recycleAt = 'Execute'
schedNumPorts = 4
schedPortOccp = 1
wakeUpNumPorts= 4
wakeUpPortOccp= 1
wakeupDelay = 2
schedDelay = 1 # Minimum latency like a intraClusterLat
iStoreLat = 1
iStoreUnit = 'LDSTIssueX'
iLoadLat = 1
iLoadUnit = 'LDSTIssueX'
iALULat = 1
iALUUnit = 'ALUIssueX'
iBJLat = 1
iBJUnit = 'ALUIssueX'
iDivLat = 12
iDivUnit = 'ALUIssueX'
iMultLat = 4
iMultUnit = 'ALUIssueX'
[LDSTIssueX]
Num = $(issue)/3+1
Occ = 1
[ALUIssueX]
Num = $(issue)/3+1
Occ = 1
# 浮点数功能单元
[FPClusterIssueX]
winSize = 8*$(issue)
recycleAt = 'Execute'
schedNumPorts = 4
schedPortOccp = 1
wakeUpNumPorts= 4
wakeUpPortOccp= 1
wakeupDelay = 2
schedDelay = 1 # Minimum latency like a intraClusterLat
fpALULat = 1
fpALUUnit = 'FPIssueX'
fpMultLat = 8
fpMultUnit = 'FPIssueX'
fpDivLat = 20
fpDivUnit = 'FPIssueX'
[FPIssueX]
Num = $(issue)/2+1
Occ = 1
# 分支预测器配置
[BPredIssueX]
##################################################################
#"hybrid" 实际上是一个联合的预测器, 配有一个元预测器(有2048个entries),
# 每一个entry是一个饱和计数器。
##################################################################
type = "oracle" # hybrid-->taken-->oracle
BTACDelay = 0 #Branch Taken ACcess Delay, 0表示执行时无障碍,非零表示一个分支常数延迟
#下面的数据为不同的分支预测器所用
l1size = 1
l2size = 2*1024
l2Bits = 1
historySize = 8
Metasize = 2*1024
MetaBits = 2
localSize = 2*1024
localBits = 2
btbSize = 256
btbBsize = 1
btbAssoc = 2
btbReplPolicy = 'LRU'
btbHistory = 0
rasSize = 32
# memory translation mechanism
[FXDTLB]
size = 64*8
assoc = 4
bsize = 8
numPorts = 2
replPolicy = 'LRU'
deviceType = 'cache' # libcore/GMemorySystem, options: dummy, cache, icache, smpcache
[FXITLB]
size = 64*8
assoc = 4
bsize = 8
numPorts = 2
replPolicy = 'LRU'
deviceType = 'cache'
##############################
# MEMORY SUBSYSTEM #
##############################
# instruction source--IL1
[IMemory]
deviceType = 'icache'
size = 32*1024
assoc = 4
bsize = $(cacheLineSize)
writePolicy = 'WB'
replPolicy = 'LRU'
protocol = 'DMESI'
numPorts = 1
portOccp = 1
hitDelay = 1
missDelay = 1 # this number is added to the hitDelay
#displNotify = false
MSHR = "iMSHR"
lowerLevel = "Router RTR sharedBy 1" # Format: [Type] [UserDefinedName] [ShareOption]
sideLowerLevel = "" # Another lower level
[iMSHR]
type = 'single'
size = 32
bsize = $(cacheLineSize)
# data source -- DL1
[DMemory]
deviceType = 'smpcache' # SMP类型的cache
size = 16*1024 # 可以存储32KBytes 的数据
assoc = 4 # 4路组相联
# 64byte block/line size (cacheLineSize在前面被定义为 64)
bsize = $(cacheLineSize)
writePolicy = 'WB' # a writeback cache (写策略)
replPolicy = 'LRU' # 使用 LRU 置换策略
protocol = 'DMESI'
numPorts = 2 # 有两个端口,故一个周期可以处理2次访问
portOccp = 1 # Number of occupancy per port. 0: UnlimitedPort, 1:FullyPipelinedPort, other value: PortPipe
hitDelay = 1 #命中时间需要一个周期
missDelay = 1 # 未命中检测需要1个周期
# 如果出现一个miss,处理器会使用DMSHR (data miss handling registers)结构跟踪这个miss,
# DMSHR被描述在[DMSHR]段,其拥有一个64-entry结构,且每个entry可以跟踪整个64字节块的一个miss。
# 在一个miss上,L1 cache或从一个本地核L2cache的slice上请求数据或通过onchip路由与其连接的其他核上
# 的L2 slice上请求数据。
MSHR = "DMSHR"
lowerLevel = "Router RTR sharedBy 1"
sideLowerLevel = "L2Slice L2S" # Another lower level
[DMSHR]
type = 'single' # Options: none, nodeps, full, single, banked Check libsuc/MSHR
size = 64
bsize = $(cacheLineSize)
[Router]
deviceType = 'router'
delay = 1
numPorts = 2 # read / write
portOccp = 1
dimX = $(NOCdim) # This needs to be the same with NOC dimension
dimY = $(NOCdim) # This needs to be the same with NOC dimension
lowerLevel ="NOC NOC shared"
[NOC]
deviceType = 'booksim'
booksim_config = 'mesh22.booksim'
booksim_output = 'booksim.log'
booksim_sample = 1000000
lowerLevel = "MemoryCtrl MemCtrl shared"
[L2Slice] # L2 Cache
deviceType = 'slicecache'
inclusive = false
size = 1*1024*1024 # 一个slice大小为 1 MB (所以在此, L2 cache 总大小是4MB),
assoc = 16 # 16路组相联
bsize = $(cacheLineSize) # 64字节块大小
writePolicy = 'WB' # 回写策略
replPolicy = 'LRU' # LRU置换策略
numPorts = 2 # 2 ports,one for L1, one for snooping
portOccp = 1 # 一个cache的吞吐量
hitDelay = 12 # 命中时间需要12cycle
missDelay = 12 # 检测一个miss需要12 cycles
numPortsDir = 1
portOccpDir = 1
hitDelayDir = 1
MSHR = 'L2MSHR' # 使用一个 64entry MSHR 去跟踪misses
# 当出现一个miss时,该miss将被本地onchip路由器处理,本地onchip路由器使用板载网络 (NOC)
# 传递消息给内存控制器。接着,内存控制器使用 offchip 处理器内存总线访问主存。主存被配置在
# [Memory]段,其被模型化一个拥有200 cycle延迟的极大的cache。
lowerLevel = "Router RTR sharedBy 1"
[L2MSHR]
size = 64
type = 'single'
bsize = $(cacheLineSize)
[MemoryCtrl]
deviceType = 'memoryController'
numPorts = 8 # 8 channel
portOccp = 1
delay = 1
lowerLevel = "MemoryBus MemoryBus"
[MemoryBus]
deviceType = 'bus'
numPorts = 8
portOccp = $(cacheLineSize) / 8 # assuming 8*8 Gbyte/s
delay = 5
lowerLevel = "Memory Memory"
[Memory]
deviceType = 'niceCache'
size = 64
assoc = 1
bsize = 64
writePolicy = 'WB'
replPolicy = 'LRU'
numPorts = 1
portOccp = 1
hitDelay = 200
missDelay = 10000
MSHR = NoMSHR
lowerLevel = 'voidDevice'
[NoMSHR]
type = 'none'
size = 128
bsize = 64
[voidDevice]
deviceType = 'void'
############################
# BEGIN MIPSEMUL #
############################
[FileSys]
mount="/bin=/mipsroot/tools/bin:/lib=/mipsroot/tools/lib:/tools=/mipsroot/tools"
2.分支预测器的类型测试(hybrid–>taken–>oracle)
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1
# File : sesc_lu.mipseb.b4QrwJ : Sat Apr 16 03:33:27 2016
Exe Speed Exe MHz Exe Time Sim Time (1000MHz)
389.963 KIPS 0.5564 MHz 0.830 secs 0.462 msec
Proc Avg.Time BPType Total RAS BPred BTB BTAC
0 55.215 hybrid 91.05% ( 99.89% of 11.34%) 89.92% ( 95.81% of 57.74%) 0.00%
nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)
0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 12.66% 302 inst/repl : ALUIssueX 2.80
Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other
0 0.70 461822 35.0 0.0 0.0 0.1 18.0 0.0 0.0 0.0 0.0 45.2 0.1 1.6
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 DL1 0.0 0.42% ( 0.1%, 0.3%) 92.46% 0.05GB/s : MemoryBus 0 MB/s :
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 IL1 0.0 0.45% ( 0.5%, 0.0%) 192.05% 0.11GB/s : MemoryBus 0 MB/s :
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1
# File : sesc_lu.mipseb.ZOV04r : Sat Apr 16 03:40:03 2016
Exe Speed Exe MHz Exe Time Sim Time (1000MHz)
363.673 KIPS 0.5692 MHz 0.890 secs 0.507 msec
Proc Avg.Time BPType Total RAS BPred BTB BTAC
0 26.849 taken 67.56% ( 99.89% of 11.34%) 63.42% ( 94.16% of 59.71%) 0.00%
nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)
0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 5.48% 306 inst/repl : ALUIssueX 2.18
Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other
0 0.64 506617 31.9 0.0 0.0 0.1 4.1 0.0 0.0 0.0 0.0 62.8 0.0 1.1
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 DL1 0.0 0.39% ( 0.1%, 0.3%) 98.40% 0.04GB/s : MemoryBus 0 MB/s :
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 IL1 0.0 0.45% ( 0.4%, 0.0%) 193.55% 0.10GB/s : MemoryBus 0 MB/s :
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1
# File : sesc_lu.mipseb.9o5lQb : Sat Apr 16 03:42:42 2016
Exe Speed Exe MHz Exe Time Sim Time (1000MHz)
404.586 KIPS 0.5474 MHz 0.800 secs 0.438 msec
Proc Avg.Time BPType Total RAS BPred BTB BTAC
0 98.266 oracle 96.50% ( 99.89% of 11.34%) 96.07% ( 94.16% of 59.71%) 0.00%
nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)
0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 12.80% 300 inst/repl : ALUIssueX 3.15
Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other
0 0.74 437914 37.0 0.0 0.0 0.1 21.2 0.0 0.0 0.0 0.0 39.6 0.1 2.1
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 DL1 0.0 0.42% ( 0.1%, 0.3%) 92.41% 0.05GB/s : MemoryBus 0 MB/s :
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 IL1 0.0 0.45% ( 0.5%, 0.0%) 192.02% 0.11GB/s : MemoryBus 0 MB/s :
3.指令发射宽度测试(issue:2–>4)
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1
# File : sesc_lu.mipseb.W213wq : Sat Apr 16 04:01:07 2016
Exe Speed Exe MHz Exe Time Sim Time (1000MHz)
449.540 KIPS 0.5527 MHz 0.720 secs 0.398 msec
Proc Avg.Time BPType Total RAS BPred BTB BTAC
0 67.584 hybrid 91.05% ( 99.89% of 11.34%) 89.92% ( 95.81% of 57.74%) 0.00%
nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)
0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 12.68% 302 inst/repl : ALUIssueX 0.25
Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other
0 0.81 397915 20.3 0.0 0.0 0.0 29.1 0.0 0.0 0.0 0.0 48.9 0.1 1.6
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 DL1 0.0 0.42% ( 0.1%, 0.3%) 92.59% 0.05GB/s : MemoryBus 0 MB/s :
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 IL1 0.0 0.85% ( 0.9%, 0.0%) 100.64% 0.12GB/s : MemoryBus 0 MB/s :
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1
# File : sesc_lu.mipseb.hwtxXj : Sat Apr 16 04:01:27 2016
Exe Speed Exe MHz Exe Time Sim Time (1000MHz)
431.559 KIPS 0.5780 MHz 0.750 secs 0.433 msec
Proc Avg.Time BPType Total RAS BPred BTB BTAC
0 30.247 taken 67.56% ( 99.89% of 11.34%) 63.42% ( 94.16% of 59.71%) 0.00%
nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)
0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 5.50% 305 inst/repl : ALUIssueX 0.22
Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other
0 0.75 433486 18.7 0.0 0.0 0.0 11.4 0.0 0.0 0.0 0.0 68.6 0.0 1.3
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 DL1 0.0 0.40% ( 0.1%, 0.3%) 97.37% 0.05GB/s : MemoryBus 0 MB/s :
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 IL1 0.0 0.83% ( 0.8%, 0.0%) 102.55% 0.11GB/s : MemoryBus 0 MB/s :
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -olu.out -elr.err lu.mipseb -n32 -p 1
# File : sesc_lu.mipseb.Y8YKpM : Sat Apr 16 04:01:40 2016
Exe Speed Exe MHz Exe Time Sim Time (1000MHz)
469.086 KIPS 0.5336 MHz 0.690 secs 0.368 msec
Proc Avg.Time BPType Total RAS BPred BTB BTAC
0 126.080 oracle 96.50% ( 99.89% of 11.34%) 96.07% ( 94.16% of 59.71%) 0.00%
nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)
0 323669 9.49% 18.01% 9.25% 55.68% 7.57% : 12.93% 301 inst/repl : ALUIssueX 0.25
Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other
0 0.88 368151 22.0 0.0 0.0 0.0 33.4 0.0 0.0 0.0 0.0 42.6 0.1 1.9
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 DL1 0.0 0.42% ( 0.1%, 0.3%) 92.49% 0.06GB/s : MemoryBus 0 MB/s :
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 IL1 0.0 0.85% ( 0.9%, 0.0%) 100.45% 0.13GB/s : MemoryBus 0 MB/s :
4. Cache测试(size:32KB–>16KB,numPorts:2–>4)
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -oocean.out -eocean.err ocean.mipseb -n 258 -p 1
# File : sesc_ocean.mipseb.KYbtOE : Sat Apr 16 05:31:48 2016
Exe Speed Exe MHz Exe Time Sim Time (1000MHz)
250.623 KIPS 0.4201 MHz 1956.200 secs 821.856 msec
Proc Avg.Time BPType Total RAS BPred BTB BTAC
0 98.401 oracle 99.94% ( 99.99% of 0.32%) 99.94% ( 99.93% of 88.33%) 0.00%
nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)
0 490268438 4.61% 22.88% 6.18% 31.74% 34.59% : 0.80% 582959 inst/repl : LDSTIssueX 0.23
Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other
0 0.60 821855801 14.9 0.0 0.0 0.3 84.5 0.0 0.0 0.0 0.0 0.0 0.2 0.0
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 DL1 0.2 5.76% ( 4.7%, 1.1%) 124.25% 0.79GB/s : MemoryBus 0 MB/s :
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 IL1 0.0 0.00% ( 0.0%, 0.0%) 92.02% 0.00GB/s : MemoryBus 0 MB/s :
# Bench : sesc.opt -c /home/ud233user/sesc/confs/cmp4-noc.conf -oocean.out -eocean.err ocean.mipseb -n 258 -p 1
# File : sesc_ocean.mipseb.KAxWgJ : Sat Apr 16 07:19:24 2016
Exe Speed Exe MHz Exe Time Sim Time (1000MHz)
228.784 KIPS 0.4045 MHz 2142.930 secs 866.814 msec
Proc Avg.Time BPType Total RAS BPred BTB BTAC
0 98.629 oracle 99.94% ( 99.99% of 0.32%) 99.94% ( 99.93% of 88.33%) 0.00%
nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)
0 490268438 4.61% 22.88% 6.18% 31.74% 34.59% : 0.80% 576786 inst/repl : LDSTIssueX 0.23
Proc IPC Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other
0 0.57 866813880 14.1 0.0 0.0 0.3 85.4 0.0 0.0 0.0 0.0 0.0 0.2 0.0
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 DL1 0.0 7.03% ( 5.8%, 1.2%) 125.16% 0.93GB/s : MemoryBus 0 MB/s :
################################################################################
Proc Cache Occ MissRate (RD, WR) %DMemAcc MB/s : ...
0 IL1 0.0 0.00% ( 0.0%, 0.0%) 92.02% 0.00GB/s : MemoryBus 0 MB/s :
5.利用异构多核运行程序
SESC本来大多用来研究SMP、CMP问题,很少用来研究AMP问题。但是徐友军师兄,对其配置文件及源码进行了一些更改,使得其可以用于研究AMP问题。
我们运行模拟器时需要使用配置文件(如前解析),使用功耗功能时需要执行
make sesc.conf
和make power.conf
命令:
make sesc.conf
命令将源码中的模板配置文件sesc.conf和shared.conf复制到当前文件夹(sesc.conf会将shared.conf包含进来);make power.conf
命令将生成wattchify和 cactify文件。- wattchify利用sesc.conf生成tmp.conf;
- cactify 利用tmp.conf生成power.conf。
- 最后删除tmp.conf。
正如前面所述,’issueX’段对核进行了配置,我们只需要先成一种核的power.conf文件(文件1),然后在另外一个文件夹中将issueX更改为其他标志(如issueY),然后修改里面相应段的配置,利用修改后的文件再生成另外一种核的power.conf文件(文件2),合并文件1和文件2,修改相同下标处(使得下标连贯)即可生成AMP的配置文件。
- 这里使用的配置文件如下(8核,四大核四小核)
#BEGIN Configuration used. Extracted from "S.conf":
procsPerNode=1
thermal ='SescTherm'
gNetwork ='m3tnetwork'
L2ll ="AdvMem MemBus shared"
depth_S =2
pageSize =4096
memSizing_S=1
STUnits =1
technology='techParam'
wattchDataCacheEnergy=1.065153e+00
cpucore[0:4]='issueX_S'
cpucore[5:8]='issueX_L'
LDUnits =1
nCPUs =9
NoMigration=false
UseTLS =0
traceMode ='qemusparc'
AdvMemMap ='M3TMemMap'
thermSpot ='SescSpot'
issue_S =2
floorplan ='layoutDescr'
[LDIssueX_S]
Num =1
Occ =1
[techParam]
numberOfFanouts=1
padCapacitance=1
tech =70
microstripLength=10
clockTreeStyle='htree'
skewBudget=20
optimalNumberOfBuffer=3
loadInClockNode=20
randomLogicStyle=1
loadCapacitance=1
areaOfChip=200
numberOfClusters=1
numberOfioBufferStage=5
numberOfFunctions=4
numberofGates=30000
numberOfFanins=4
frequency =2.400000e+09
[DataL1_S]
portOccp =1
blockName ='Dcache'
missDelay =1
RdHitEnergy=1.065153e+00
WrHitEnergy=1.065153e+00
size =32768
assoc =4
WrMissEnergy=2.130306e+00
writePolicy='WB'
hitDelay =2
bsize =64
lowerLevel="CommonBus Bus shared"
MSHR ='DL1MSHR_S'
replPolicy='RANDOM'
skew =false
numPorts =1
RdMissEnergy=2.130306e+00
deviceType='cache'
[issueX_S]
windowCheckEnergy=4.349804e-02
instQueueSize=12
maxStores =38
btbEnergy =2.156033e-01
intRegs =64
iALUEnergy=3.417957e-01
LSQBanks =1
ldqCheckEnergy=1.044431e-01
renameEnergy=1.732633e-01
clockEnergy=3.142553e+00
windowRdWrEnergy=2.224685e-01
windowSelEnergy=5.741436e-03
inorder =true
resultBusEnergy=4.949979e-02
wrRegEnergy=1.833822e-01
dataSource="DataL1_S DL1_S"
areaFactor=2.562500e-01
bb4Cycle =1
robSize =100
maxBranches=16
totEnergy =1.306076e+01
interClusterLat=2
cluster ='FXClusterIssueX_S'
cluster[1:1]='FPClusterIssueX_S'
bpredEnergy=8.987931e-02
forwardBusEnergy=4.949979e-02
dtlb ='FXDTLB_S'
issueWrongPath=true
archBits =32
decodeDelay=3
rasEnergy =0.000000e+00
fpRegs =64
minTLBMissDelay=16
itlb ='FXITLB_S'
stqRdWrEnergy=1.462314e+00
fpALUEnergy=1.047439e+00
instrSource="InstL1_S IL1_S"
bpred ='BPredIssueX_S'
fetchWidth=6
maxLoads =42
issueWidth=2
rdRegEnergy=1.833822e-01
robEnergy =6.256428e-02
stqCheckEnergy=9.549810e-02
retireWidth=2
renameDelay=3
stForwardDelay=1
regFileDelay=3
OSType ='std'
maxIRequests=3
ldqRdWrEnergy=1.429250e+00
enableICache=true
bpredDelay=1
[L2Cache]
bsize =64
writePolicy='WB'
blockName ='L2'
hitDelay =10
RdMissEnergy=7.798407e+00
missDelay =4
WrHitEnergy=3.899203e+00
numPorts =1
portOccp =1
size =524288
replPolicy='LRU'
assoc =8
WrMissEnergy=7.798407e+00
RdHitEnergy=3.899203e+00
MSHR ='MSHRL2_S'
lowerLevel="AdvMem MemBus shared"
deviceType='cache'
[BestBPred]
BTACDelay =0
btbReplPolicy='LRU'
btbAssoc =2
tbits =5
rasSize =0
btbSize =2048
type ='ogehl'
tcbits =7
tsize =2048
mtables =6
btbBsize =1
[PBuff]
hitDelay =3
missDelay =2
buffCache ='PBuffBuff_S'
learnHitDelay=4
streamCache='PBuffStream_S'
maxStride =512
deviceType='prefbuff'
depth =1
learnMissDelay=6
lowerLevel="AdvMem MemBus shared"
missWindow=16
[SimParams]
hotspotLogFile='scooreX.out'
setBinaryLog=1
sampleRate=10
dtmUsed =0
floorPlanFile='scooreX.flp'
omitLateralR=0
[TaskScalar]
SyncOnRestart=3
VersionSize=32
bsize =64
MFThreshold=4
IDP ='IntPred1_S'
MLThreshold=32
[InstL1_S]
hitDelay =2
WrHitEnergy=1.135176e+00
size =32768
portOccp =1
deviceType='icache'
WrMissEnergy=2.270351e+00
numPorts =2
MSHR ='InstL1MSHR_S'
RdHitEnergy=1.135176e+00
lowerLevel="L2Cache L2 shared"
assoc =2
writePolicy='WB'
RdMissEnergy=2.270351e+00
replPolicy='LRU'
bsize =64
blockName ='Icache'
missDelay =0
[STIssueX_S]
Occ =1
Num =1
[IntPred1_S]
RdMissEnergy=1.303039e+00
WrMissEnergy=1.303039e+00
RdHitEnergy=6.515196e-01
ReplPolicy='LRU'
Assoc =32
deviceType='cache'
IDPnChildMax=4
WrHitEnergy=6.515196e-01
portOccp =1
bSize =8
size =256
numPorts =1
[BPredIssueX_S]
btbAssoc =2
btbBsize =1
historySize=11
btbReplPolicy='LRU'
localSize =16384
Metasize =16384
BTACDelay =0
localBits =2
l2Bits =1
btbSize =2048
type ='hybrid'
MetaBits =2
rasSize =0
bpred4Cycle=1
l2size =16384
l1size =1
[FXDTLB_S]
deviceType='tlb'
numPorts =1
replPolicy='LRU'
RdMissEnergy=2.762933e+00
WrHitEnergy=1.381467e+00
RdHitEnergy=1.381467e+00
bsize =8
assoc =64
size =512
WrMissEnergy=2.762933e+00
[PBuffStream_S]
portOccp =3
RdHitEnergy=1.052776e+00
WrHitEnergy=1.052776e+00
ReplPolicy='LRU'
WrMissEnergy=2.105553e+00
RdMissEnergy=2.105553e+00
Assoc =16
numPorts =2
Size =128
BSize =8
deviceType='cache'
[SescSpot]
InterfaceMaterialThickness=7.500000e-05
SpreaderThickness=1.000000e-03
ConvectionCapacitance=1.404000e+02
SpreaderLength=3.000000e-02
DTMUsed =false
ChipThickness=5.000000e-04
HeatsinkThinkness=6.900000e-03
ConvectionResistance=1.000000e-01
HeatsinkLength=6.000000e-02
DTMTempThreshhold=1.118000e+02
[AdvMem]
numPorts =1
busWidth =64
delay =1
deviceType='bus'
lowerLevel='BigMem_S'
iopins =true
portOccp =32
[BigMem_S]
WrMissEnergy=0.000000e+00
hitDelay =490
MSHR ='BigMemMSHR_S'
numPorts =1
size =1024
assoc =1
deviceType='niceCache'
bsize =64
writePolicy='WB'
RdMissEnergy=0.000000e+00
WrHitEnergy=0.000000e+00
RdHitEnergy=0.000000e+00
lowerLevel='voidDevice'
replPolicy='LRU'
missDelay =1600000
portOccp =1
[layer1]
thickness =1.000000e-03
material ='Silicon'
[AmbientTemperature]
initialTemp=60
offsetConstant=2.731500e+02
ambientTemp=40
[Virtual]
specHeat =0
conductance=0
density =0
alpha =0
[FXClusterIssueX_S]
iDivUnit ='ALUIssueX_S'
schedNumPorts=4
schedPortOccp=1
recycleAt ='Execute'
iLoadUnit ='LDIssueX_S'
iMultUnit ='ALUIssueX_S'
winSize =56
iDivLat =207
iMultLat =10
iStoreUnit='STIssueX_S'
schedDelay=1
iLoadLat =1
blockName ='IntWin'
windowRdWrEnergy=5.560074e-01
iALULat =1
wakeUpNumPorts=4
iBJUnit ='ALUIssueX_S'
iALUUnit ='ALUIssueX_S'
iBJLat =1
wakeupDelay=3
wakeUpPortOccp=1
iStoreLat =1
[FXITLB_S]
size =256
RdHitEnergy=6.696175e-01
RdMissEnergy=1.339235e+00
assoc =32
deviceType='tlb'
bsize =8
WrHitEnergy=6.696175e-01
numPorts =1
replPolicy='LRU'
WrMissEnergy=1.339235e+00
[CommonBus]
busWidth =32
buffWCReqs=1
deviceType='bus'
lowerLevel="L2Cache L2 shared"
delay =3
portOccp =1
numPorts =2
busLength =7500
[layer5]
heat_sink_width=1.000000e-01
heat_sink_resistance=1.300000e-01
heat_sink_fins=5
thickness =1.000000e-01
heat_sink_height=1.000000e-01
heat_sink =true
material ='Copper'
[ucoolConf]
current =4.000000e-01
seebeck =2.400000e-04
coupledDevices=12
Resistivity=2.857000e-06
Height =1.000000e-04
crossSection=5.000000e-02
Width =1.000000e-04
conductivity=3.000000e+00
[PBuffBuff_S]
ReplPolicy='LRU'
RdMissEnergy=3.573363e+00
deviceType='cache'
numPorts =2
Size =16384
RdHitEnergy=1.786681e+00
Assoc =4
WrMissEnergy=3.573363e+00
BSize =32
WrHitEnergy=1.786681e+00
portOccp =3
[FPClusterIssueX_S]
schedPortOccp=1
blockName ='FPWin'
schedNumPorts=4
schedDelay=1
fpDivUnit ='FP0IssueX_S'
wakeUpPortOccp=1
recycleAt ='Execute'
fpMultUnit='FP0IssueX_S'
fpDivLat =60
windowRdWrEnergy=4.776964e-01
fpMultLat =4
wakeupDelay=3
winSize =24
fpALUUnit ='FP0IssueX_S'
fpALULat =5
wakeUpNumPorts=4
[SescTherm]
CyclesPerSample=20000
TimeIncrement=2.500000e-01
ucool ='ucoolConf'
MeshResolutionSpreader=1.000000e-01
MeshResolutionChip=3.000000e-03
initialTemp=2.500000e+01
MeshResolutionSink=5.000000e-01
FanVelocity=2.000000e+00
material[2:2]='Virtual'
material[1:1]='Copper'
material ='Silicon'
ambientTemp=40
layer[5:5]='layer5'
layer[4:4]='layer4'
layer[3:3]='layer3'
layer[2:2]='layer2'
layer[1:1]='layer1'
[HeatSink]
convecR =1.000000e-01
convecC =1.404000e+02
heatsinkThick=6.900000e-02
heatsinkSide=6.000000e-02
[MSHRL2_S]
type ='full'
bsize =64
size =32
[layer3]
thickness =2.000000e-03
material ='Copper'
[HeatSpreader]
spreaderThick=1.000000e-03
spreaderSide=3.000000e-02
[Copper]
alpha =1.170000e-04
density =8933
conductance=401
specHeat =385
[BPredTaken]
btbSize =1
btbBsize =1
rasSize =1
btbReplPolicy='LRU'
btbAssoc =1
type ='Static'
[Silicon]
alpha =8.920000e-05
density =2330
specHeat =712
conductance=148
[FileSys]
mount =''
[ALUIssueX_S]
Occ =1
Num =2
[BigMemMSHR_S]
bsize =64
size =32
type ='none'
[InterfaceMaterial]
interfaceThick=7.500000e-05
[FP0IssueX_S]
Occ =1
Num =1
[InstL1MSHR_S]
type ='full'
size =4
bsize =64
[miscEnergy]
combWriteEnergy=3.952196e-02
[DL1MSHR_S]
type ='full'
size =32
bsize =64
[ChipSpecs]
tempThreshold=1.118000e+02
chipThickness=5.000000e-04
[voidDevice]
deviceType='void'
#END Configuration used. Extracted from "S.conf":
#BEGIN Configuration used. Extracted from "L.conf":
procsPerNode=1
thermal ='SescTherm'
gNetwork ='m3tnetwork'
L2ll ="AdvMem MemBus shared"
pageSize =4096
STUnits =3
memSizing_L=1
wattchDataCacheEnergy=1.065153e+00
LDUnits =3
nCPUs =4
NoMigration=false
UseTLS =0
traceMode ='qemusparc'
AdvMemMap ='M3TMemMap'
issue_L =3
depth_L =3
thermSpot ='SescSpot'
floorplan ='layoutDescr'
[FXDTLB_L]
WrHitEnergy=1.246505e+00
bsize =8
assoc =64
replPolicy='LRU'
WrMissEnergy=2.493009e+00
deviceType='tlb'
numPorts =1
RdHitEnergy=1.246505e+00
RdMissEnergy=2.493009e+00
size =512
[layoutDescr]
blockDescr="FPWin 0.0025 0.001 0.000 0.000"
blockDescr[1:1]="FPReg 0.0025 0.001 0.0025 0.000"
blockDescr[3:3]="IntWin 0.003 0.0015 0.0035 0.001"
blockDescr[4:4]="IntReg 0.002 0.0015 0.0065 0.001"
blockDescr[6:6]="FPRAT 0.0035 0.0005 0.000 0.0010"
blockDescr[7:7]="IntRAT 0.0035 0.0010 0.000 0.0015"
blockDescr[10:10]="ROB 0.0025 0.0015 0.0035 0.0025"
blockDescr[11:11]="Icache 0.0045 0.0015 0.000 0.004"
blockDescr[12:12]="Dcache 0.005 0.0015 0.005 0.004"
blockDescr[13:13]="MSHR 0.0005 0.0015 0.0045 0.004"
blockDescr[14:14]="L2 0.0100 0.0055 0.000 0.0055"
blockDescr[2:2]="FPUnitMult 0.0015 0.001 0.0085 0.000"
blockDescr[18:18]="FPUnitAlu 0.0020 0.001 0.0065 0.000"
blockDescr[19:19]="FPUnitDiv 0.0015 0.001 0.005 0.000"
blockDescr[5:5]="IntUnitMult 0.0015 0.0005 0.0085 0.0020"
blockDescr[20:20]="IntUnitAlu 0.0015 0.0005 0.0085 0.0015"
blockDescr[21:21]="IntUnitDiv 0.0015 0.0005 0.0085 0.001"
blockDescr[9:9]="LDQ 0.002 0.0015 0.006 0.0025"
blockDescr[17:17]="STQ 0.002 0.0015 0.008 0.0025"
blockDescr[8:8]="RAS 0.0005 0.00075 0.003 0.00325"
blockDescr[16:16]="Bpred 0.0030 0.00075 0.000 0.00325"
blockDescr[15:15]="BTB 0.0035 0.00075 0.000 0.0025"
blockMatch='Proc(0)_FPClusterIssueX'
blockMatch[1:1]="Proc(0):rdFPRegEnergy Proc(0):wrFPRegEnergy"
blockMatch[3:3]='Proc(0)_FXClusterIssueX*'
blockMatch[4:4]="Proc(0):rdIRegEnergy Proc(0):wrIRegEnergy"
blockMatch[6:6]='Proc(0):renameEnergy'
blockMatch[7:7]='Proc(0):renameEnergy'
blockMatch[10:10]='Proc(0):robEnergy'
blockMatch[11:11]="P(0)_IL1* P(0)_ITLB*"
blockMatch[12:12]='P(0)_DTLB*'
blockMatch[13:13]="P(0)_DL1_MSHR P(0)_PBuff"
blockMatch[14:14]="L2 niceCache"
blockMatch[2:2]='Cluster(0):fpMult'
blockMatch[18:18]='Cluster(0):fpALU'
blockMatch[19:19]='Cluster(0):fpDiv'
blockMatch[5:5]='Cluster(0):iMult'
blockMatch[20:20]='Cluster(0):iALU'
blockMatch[21:21]='Cluster(0):iDiv'
blockMatch[9:9]="FULoad(0) FUMemory(0)"
blockMatch[17:17]="FUStore(0) FUMemory(0)"
blockMatch[8:8]='BPred(0)_RAS'
blockMatch[16:16]='BPred(0)_hybrid'
blockMatch[15:15]='BPred(0)_BTB'
[layer4]
heat_spreader_height=2.000000e-02
material ='Copper'
thickness =2.000000e-02
heat_spreader_width=2.000000e-02
heat_spreader=true
[issueX_L]
retireWidth=3
renameDelay=3
OSType ='std'
dataSource="DataL1_L DL1_L"
resultBusEnergy=1.528119e-01
windowSelEnergy=1.107277e-02
bpredDelay=1
issueWrongPath=true
renameEnergy=2.000427e-01
minTLBMissDelay=16
interClusterLat=2
stForwardDelay=1
fpRegs =80
forwardBusEnergy=1.528119e-01
bb4Cycle =1
robEnergy =7.539995e-01
wrRegEnergy=3.438024e-01
maxIRequests=3
areaFactor=5.687500e-01
robSize =128
maxLoads =48
decodeDelay=3
itlb ='FXITLB_L'
inorder =false
intRegs =96
instrSource="InstL1_L IL1_L"
regFileDelay=3
stqCheckEnergy=1.044431e-01
cluster ='FXClusterIssueX_L'
cluster[1:1]='FPClusterIssueX_L'
dtlb ='FXDTLB_L'
windowCheckEnergy=8.454319e-02
ldqCheckEnergy=1.178606e-01
enableICache=true
bpred ='BPredIssueX_L'
rasEnergy =0.000000e+00
stqRdWrEnergy=1.507999e+00
ldqRdWrEnergy=1.474934e+00
totEnergy =1.464537e+01
instQueueSize=12
rdRegEnergy=3.438024e-01
fpALUEnergy=1.047439e+00
maxStores =42
bpredEnergy=8.987931e-02
btbEnergy =2.156033e-01
issueWidth=3
iALUEnergy=3.417957e-01
fetchWidth=6
clockEnergy=3.823849e+00
maxBranches=22
windowRdWrEnergy=3.958168e-01
archBits =32
LSQBanks =1
[IntPred1_L]
numPorts =1
WrHitEnergy=5.346558e-01
WrMissEnergy=1.069312e+00
Assoc =32
size =256
RdHitEnergy=5.346558e-01
IDPnChildMax=4
bSize =8
ReplPolicy='LRU'
portOccp =1
RdMissEnergy=1.069312e+00
deviceType='cache'
[FXClusterIssueX_L]
iMultUnit ='ALUIssueX_L'
iMultLat =8
wakeUpPortOccp=1
recycleAt ='Execute'
wakeupDelay=3
schedNumPorts=4
iLoadLat =1
schedPortOccp=1
iStoreLat =1
iBJUnit ='ALUIssueX_L'
iLoadUnit ='LDIssueX_L'
iDivLat =23
iDivUnit ='ALUIssueX_L'
iALUUnit ='ALUIssueX_L'
blockName ='IntWin'
winSize =68
iALULat =1
schedDelay=1
iBJLat =1
iStoreUnit='STIssueX_L'
windowRdWrEnergy=6.876807e-01
wakeUpNumPorts=4
[FPClusterIssueX_L]
winSize =32
recycleAt ='Execute'
fpDivLat =21
schedPortOccp=1
wakeUpNumPorts=4
fpMultUnit='FP0IssueX_L'
fpALULat =3
schedNumPorts=4
blockName ='FPWin'
fpALUUnit ='FP0IssueX_L'
fpDivUnit ='FP0IssueX_L'
windowRdWrEnergy=5.641608e-01
fpMultLat =5
schedDelay=1
wakeUpPortOccp=1
wakeupDelay=3
[DataL1_L]
bsize =64
replPolicy='RANDOM'
portOccp =1
MSHR ='DL1MSHR_L'
numPorts =1
size =32768
RdMissEnergy=2.130306e+00
WrHitEnergy=1.065153e+00
assoc =4
hitDelay =2
missDelay =1
WrMissEnergy=2.130306e+00
lowerLevel="CommonBus Bus shared"
blockName ='Dcache'
RdHitEnergy=1.065153e+00
deviceType='cache'
skew =false
writePolicy='WB'
[FXITLB_L]
WrHitEnergy=6.515196e-01
bsize =8
RdMissEnergy=1.303039e+00
numPorts =1
deviceType='tlb'
RdHitEnergy=6.515196e-01
assoc =32
size =256
replPolicy='LRU'
WrMissEnergy=1.303039e+00
[BPredIssueX_L]
l2Bits =1
btbReplPolicy='LRU'
localBits =2
BTACDelay =0
l1size =1
localSize =16384
btbSize =2048
rasSize =0
btbBsize =1
MetaBits =2
bpred4Cycle=1
Metasize =16384
historySize=11
l2size =16384
type ='hybrid'
btbAssoc =2
[DL1MSHR_L]
size =32
type ='full'
bsize =64
[BigMemMSHR_L]
type ='none'
size =32
bsize =64
[InstL1_L]
blockName ='Icache'
missDelay =0
lowerLevel="L2Cache L2 shared"
RdHitEnergy=1.135176e+00
portOccp =1
MSHR ='InstL1MSHR_L'
hitDelay =2
assoc =2
RdMissEnergy=2.270351e+00
deviceType='icache'
size =32768
bsize =64
WrMissEnergy=2.270351e+00
writePolicy='WB'
WrHitEnergy=1.135176e+00
replPolicy='LRU'
numPorts =2
[BigMem_L]
RdMissEnergy=0.000000e+00
assoc =1
WrMissEnergy=0.000000e+00
bsize =64
portOccp =1
RdHitEnergy=0.000000e+00
hitDelay =490
numPorts =1
MSHR ='BigMemMSHR_L'
replPolicy='LRU'
lowerLevel='voidDevice'
missDelay =1600000
WrHitEnergy=0.000000e+00
deviceType='niceCache'
size =1024
writePolicy='WB'
[PBuffStream_L]
BSize =8
RdMissEnergy=2.105553e+00
numPorts =2
ReplPolicy='LRU'
deviceType='cache'
WrHitEnergy=1.052776e+00
Assoc =16
WrMissEnergy=2.105553e+00
RdHitEnergy=1.052776e+00
portOccp =3
Size =128
[PBuffBuff_L]
Assoc =4
WrMissEnergy=3.573363e+00
WrHitEnergy=1.786681e+00
RdMissEnergy=3.573363e+00
ReplPolicy='LRU'
deviceType='cache'
BSize =32
portOccp =3
Size =16384
numPorts =2
RdHitEnergy=1.786681e+00
[MSHRL2_L]
bsize =64
type ='full'
size =32
[STIssueX_L]
Occ = 1
Num = 3
[LDIssueX_L]
Occ =1
Num =3
[InstL1MSHR_L]
size =4
type ='full'
bsize =64
[layer2]
material ='Virtual'
thickness =0
[ALUIssueX_L]
Num = 8
Occ = 1
[FP0IssueX_L]
Num =1
Occ =1
- 这里使用的基于SESC API的多进程程序如下:
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include"sescapi.h"
struct thread_data
{
int argc;
char **argv;
int done;
};
int basicmath_main(void *);
int bitcount_main(void *);
int cjpeg_main(void *);
int djpeg_main(void *);
int dijkstra_large_main(void *);
int patricia_main(void *);
int stringsearch_large_main(void *);
int sha_main(void *);
int rawdaudio_main(void *);
int rawcaudio_main(void *);
int CRC32_main(void *);
int fft_main(void *);
int fft_main(void *);
#define numP 9
int main(int argc, char *argv[]) {
int i,j,k;
struct thread_data ** thread_args = (struct thread_data**)malloc(numP * sizeof(struct thrad_data*));
for(j = 0; j < numP; j++){
thread_args[j] = (struct thread_data *) malloc(sizeof(struct thread_data));
}
i = 0 ;
thread_args[i]->argc = 8;
thread_args[i]->argv = (char **)malloc(8*sizeof(char *));
thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[0],"cjpeg_mian",20) ;
thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[1],"-dct",20);
thread_args[i]->argv[2] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[2],"int",20);
thread_args[i]->argv[3] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[3],"-progressive",20);
thread_args[i]->argv[4] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[4],"-opt",20);
thread_args[i]->argv[5] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[5],"-outfile",20);
thread_args[i]->argv[6] = (char *)malloc(30 *sizeof(char));
memcpy(thread_args[i]->argv[6],"output_large_encode.jpeg",30);
thread_args[i]->argv[7] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[7],"input_large.ppm",20);
i = 1 ;
thread_args[i]->argc = 2;
thread_args[i]->argv = (char **)malloc(2*sizeof(char *));
thread_args[i]->argv[0] = (char *)malloc(30 *sizeof(char));
memcpy(thread_args[i]->argv[0],"dijkstra_large_mian",30);
thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[1],"input.dat",20);
i = 2 ;
thread_args[i]->argc = 2;
thread_args[i]->argv = (char **)malloc(2*sizeof(char *));
thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[0],"patricia_mian",20);
thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[1],"large.udp",20);
i = 3 ;
thread_args[i]->argc = 1;
thread_args[i]->argv = (char **)malloc(sizeof(char *));
thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[0],"search_large_mian",20);
i = 4 ;
thread_args[i]->argc = 2;
thread_args[i]->argv = (char **)malloc(2*sizeof(char *));
thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[0],"sha_mian",20);
thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[1],"input_large.asc",20);
i = 5 ;
thread_args[i]->argc = 1;
thread_args[i]->argv = (char **)malloc(2*sizeof(char *));
thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[0],"rawcaudio_mian",20);
i = 6 ;
thread_args[i]->argc = 3;
thread_args[i]->argv = (char **)malloc(3*sizeof(char *));
thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[0],"fft_mian",20);
thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[1],"8",20);
thread_args[i]->argv[2] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[2],"32768",20);
i = 7 ;
thread_args[i]->argc = 4;
thread_args[i]->argv = (char **)malloc(4*sizeof(char *));
thread_args[i]->argv[0] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[0],"fft_mian",20);
thread_args[i]->argv[1] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[1],"8",20);
thread_args[i]->argv[2] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[2],"32768",20);
thread_args[i]->argv[3] = (char *)malloc(20 *sizeof(char));
memcpy(thread_args[i]->argv[3],"-i",20);
i = 0 ;
sesc_spawn((void*)cjpeg_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);
i = 1 ;
sesc_spawn((void*)dijkstra_large_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);
i = 2 ;
sesc_spawn((void*)patricia_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);
i = 3 ;
sesc_spawn((void*)stringsearch_large_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);
i = 4 ;
sesc_spawn((void*)sha_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);
i = 5 ;
sesc_spawn((void*)rawcaudio_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);
i = 6 ;
sesc_spawn((void*)fft_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);
i = 7 ;
sesc_spawn((void*)fft_main,(void *)thread_args[i],SESC_FLAG_MAP| i + 1);
}
- 上述程序运行结果如下:
# Bench : ./sesc.mem -ccombina.conf -w100000 combina_1212
# File : sesc_combina_1212.v6kaqg : Sun Apr 17 10:27:59 2016
Exe Speed Exe MHz Exe Time Sim Time (2400MHz)
1369.351 KIPS 0.7969 MHz 549.180 secs 182.350 msec (rabbit)
Proc Avg.Time BPType Total RAS BPred BTB BTAC
1 156.985 hybrid 59.80% (100.00% of 9.04%) 55.80% ( 59.87% of 58.62%) 0.00%
2 230.504 hybrid 99.18% (100.00% of 0.77%) 99.18% ( 98.60% of 36.16%) 0.00%
3 236.339 hybrid 47.88% (100.00% of 8.02%) 43.33% ( 41.61% of 32.31%) 0.00%
4 123.601 hybrid 92.16% (100.00% of 3.23%) 91.89% ( 88.71% of 42.08%) 0.00%
5 209.454 hybrid 54.61% (100.00% of 7.59%) 50.88% ( 45.81% of 32.37%) 0.00%
6 976.000 hybrid 25.00% ( 0.00% of 0.00%) 25.00% ( 0.00% of 12.50%) 0.00%
7 35.086 hybrid 86.45% (100.00% of 8.55%) 85.18% ( 78.66% of 51.11%) 0.00%
8 35.870 hybrid 87.05% (100.00% of 8.92%) 85.78% ( 79.32% of 47.09%) 0.00%
nInst BJ Load Store INT FP : LD Forward , Replay : Worst Unit (clk)
1 11096 16.05% 18.56% 15.97% 49.41% 0.01% : 0.24% ???? inst/repl : LDIssueX_S 0.45
2 67271313 10.80% 20.26% 4.67% 64.28% 0.00% : 0.10% ???? inst/repl : LDIssueX_S 0.02
3 2063 20.55% 19.24% 15.08% 45.08% 0.05% : 2.27% ???? inst/repl : LDIssueX_S 0.39
4 9735226 15.35% 22.02% 11.69% 50.93% 0.00% : 0.68% ???? inst/repl : LDIssueX_S 0.14
5 2645 20.91% 20.79% 14.44% 43.82% 0.04% : 13.82% 1322 inst/repl : STIssueX_L 0.04
6 56 14.29% 5.36% 25.00% 53.57% 1.79% : 66.67% ???? inst/repl : STIssueX_L 0.04
7 408228696 15.31% 24.60% 12.35% 41.18% 6.56% : 33.62% 88 inst/repl : FP0IssueX_L 0.25
8 266768970 14.17% 24.68% 11.59% 39.45% 10.11% : 29.07% 150 inst/repl : FP0IssueX_L 0.25
Proc IPC Active Cycles Busy LDQ STQ IWin ROB Regs Ports TLB maxBr MisBr Br4Clk Other
1 0.08 0.03 133191 4.2 0.0 0.0 56.1 0.0 0.0 0.0 4.5 0.0 35.1 0.0 0.1
2 0.16 93.22 407946995 8.2 0.0 0.0 91.4 0.0 0.0 0.0 0.0 0.0 0.3 0.0 0.0
3 0.03 0.01 59650 1.7 0.0 0.0 29.3 0.0 0.0 0.0 8.0 0.0 60.9 0.0 0.0
4 0.18 12.43 54394504 8.9 0.0 0.0 87.5 0.0 0.0 0.0 0.0 0.0 3.5 0.0 0.0
5 0.05 0.01 58207 1.5 0.0 0.0 0.1 0.0 0.0 0.0 8.1 0.0 90.2 0.0 0.1
6 0.01 0.00 6935 0.3 0.0 0.0 0.0 0.0 0.0 0.0 52.3 0.0 47.4 0.0 0.0
7 0.93 100.00 437640780 31.1 2.9 0.0 1.9 6.5 0.3 0.0 0.2 0.0 54.3 0.1 2.6
8 0.97 62.79 274810653 32.4 4.8 0.0 3.1 3.8 0.0 0.0 0.4 0.0 53.0 0.1 2.4
################################################################################
Proc CacheName LVID revLVID Energy : ...
################################################################################
Proc Fetch Issue Mem Exec Clock Total (watts)
0 0.001 0.000 0.087 0.000 1.582 1.670
1 0.001 0.000 0.001 0.000 1.571 1.573
2 0.086 0.393 0.273 0.524 1.725 3.000
3 0.001 0.000 0.001 0.000 1.571 1.573
4 0.014 0.057 0.051 0.094 1.597 1.813
5 0.001 0.000 0.001 0.000 1.912 1.914
6 0.001 0.000 0.001 0.000 1.912 1.914
7 0.616 7.048 1.799 5.174 3.823 18.458
8 0.390 4.609 1.186 3.377 3.160 12.722
Total 1.109 12.106 3.399 9.169 18.854 44.636
6.下周任务
- 对SESC的使用进一步学习;
- 对SESC进行AMP试验进一步了解;
- 向师兄请教SESC支持AMP需要修改哪些文件,初步涉猎源码;
- 师兄所给资料信息量巨大,需要经自己整理慢慢消化。