0%

gem5 ISA 扩展

object1 : 添加一条R型指令 add_one rs1, 实现寄存器值自增。

要求

  1. 延时可调
  2. O3CPU FuncUnit

Step

  1. @ src/arch/riscv/isa/decoder.isa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
0x0c: decode FUNCT3 {
format ROp {
0x0: decode KFUNCT5 {
0x00: decode BS {
0x0: add({{
Rd = rvSext(Rs1_sd + Rs2_sd);
}});
0x1: sub({{
Rd = rvSext(Rs1_sd - Rs2_sd);
}});
0x2: add_one({{
Rd = Rd + 1;
}}, IntAddOneOp);
}
...

FUNCT3, KFUNCT5为定义的Bitfield

Gem5 会解析 decoder.isa ,在 {{}} 内进行替换,= 左边视为 DestReg, = 右边视为SrcReg
+= 会被视为只使用SrcReg, 不会对 DestReg 赋值,最终生成的 CodeBlock 如下:

@ build/RISCV/arch/riscv/generated/decoder-ns.cc.inc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// ROp::add_one(['\n                            Rd = Rd + 1;\n                        ', 'IntAddOneOp'],{})

Add_one::Add_one(ExtMachInst machInst)
: RegOp("add_one", machInst, IntAddOneOp)
{

setRegIdxArrays(
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::destRegIdxArr));
;

setSrcRegIdx(_numSrcRegs++, ((RD) == 0) ? RegId() : intRegClass[RD]);
setDestRegIdx(_numDestRegs++, ((RD) == 0) ? RegId() : intRegClass[RD]);
_numTypedDestRegs[intRegClass.type()]++;
flags[IsInteger] = true;;
}

1
2
3
4
5
6
7
8
9
```

2. @ `src/cpu/op_class.h`
```cpp
static const OpClass IntAluOp = enums::IntAlu;
static const OpClass IntMultOp = enums::IntMult;
static const OpClass IntDivOp = enums::IntDiv;
static const OpClass IntAddOneOp = enums::IntAddOne;
...
  1. @src/cpu/FuncUnit.py
1
2
3
4
5
6
7
8
9
10
class OpClass(Enum):
vals = [
"No_OpClass",
"IntAlu",
"IntMult",
"IntDiv",
"IntAddOne",
"FloatAdd",
"FloatCmp",
...
  1. src/cpu/o3/FuncUnitConfig.py
1
2
3
4
5
6
7
8
class IntMultDiv(FUDesc):
opList = [
OpDesc(opClass="IntMult", opLat=3),
OpDesc(opClass="IntDiv", opLat=20, pipelined=False),
OpDesc(opClass="IntAddOne", opLat=2, pipelined=False),
]

count = 2

object2: 添加一条set_mod指令,其中mod是专用寄存器 (类似RVV的vsetvl)

指令调用

set_mod rs1

要求

  1. major code 与标准扩展不重合

Step

  1. 添加专用寄存器操作数
    @ src/arch/riscv/isa/operands.isa
1
2
3
4
5
6
7
8
9
10
def operands {{
...
# VL and VTYPE
'Vtype': PCStateOp('ud', 'vtype', (None, None, 'IsControl'), 10),
'VL': PCStateOp('uw', 'vl', (None, None, 'IsControl'), 11),
#VLENB, actually the CSR is read only.
'VlenbBits': PCStateOp('ud', 'vlenb', (None, None, 'IsControl'), 12),
#CustomMod
'CustomMod': PCStateOp('ud', 'custommod', (None, None, 'IsControl'), 10),
}};
  1. 将专用寄存器 custommod 添加到 PCState
    @ src/arch/riscv/pcstate.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
class PCState : public GenericISA::UPCState<4>
{
protected:
...
uint64_t _vlenb = 32;
VTYPE _vtype = (1ULL << 63); // vtype.vill = 1 at initial;
uint32_t _vl = 0;

uint64_t _custommod = 0;

public:
PCState(const PCState &other) : Base(other),
_rvType(other._rvType), _vlenb(other._vlenb),
_vtype(other._vtype), _vl(other._vl),
_custommod(other._custommod)
{}
...
void
update(const PCStateBase &other) override
{
...
_vlenb = pcstate._vlenb;
_vtype = pcstate._vtype;
_vl = pcstate._vl;
_custommod = pcstate._custommod;
}

...
void vl(uint32_t v) { _vl = v; }
uint32_t vl() const { return _vl; }

void custommod(uint64_t s) { _custommod = s; }
uint64_t custommod() const { return _custommod; }

bool
equals(const PCStateBase &other) const override
{
auto &opc = other.as<PCState>();
return Base::equals(other) &&
_vlenb == opc._vlenb &&
_vtype == opc._vtype &&
_vl == opc._vl &&
_custommod == opc._custommod;
}
void
serialize(CheckpointOut &cp) const override
{
...
SERIALIZE_SCALAR(_vl);
SERIALIZE_SCALAR(_custommod);
}

void
unserialize(CheckpointIn &cp) override
{
...
UNSERIALIZE_SCALAR(_vl);
UNSERIALIZE_SCALAR(_custommod);
}
  1. 添加 decoder
    Major Opcode 采用 0x0b (Custom-0)
    @ src/arch/riscv/isa/decoder.isa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
decode QUADRANT default Unknown::unknown() {
...
0x3: decode OPCODE5 {
...
0x02: decode FUNCT3 {
format ROp {
0x0: decode FUNCT7 {
0x0: set_mod({{
CustomMod = Rs1;
}});
}
}
}
...
}
...
}

object3: 将object2中的 set_mod 指令改为新的 CustomOP

  1. 修改 decoder
    @ src/arch/riscv/isa/decoder.isa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
decode QUADRANT default Unknown::unknown() {
...
0x3: decode OPCODE5 {
...
0x02: decode FUNCT3 {
format CustomOp {
0x0: decode FUNCT7 {
0x0: set_mod({{
CustomMod = Rs1;
}});
}
}
}
...
}
...
}
  1. 定义 CustomOP 格式
    new file @ src/arch/riscv/isa/format/custom.isa
1
2
3
4
5
6
7
def format CustomOp(code, *opt_flags) {{
iop = InstObjParams(name, Name, 'CustomOp', code, opt_flags)
header_output = BasicDeclare.subst(iop)
decoder_output = BasicConstructor.subst(iop)
decode_block = BasicDecode.subst(iop)
exec_output = BasicExecute.subst(iop)
}};
  1. Include CustomOP
    @ src/arch/riscv/isa/format/format.isa
1
2
3
4
5
6
...
##include "vector_conf.isa"
##include "vector_arith.isa"
##include "vector_mem.isa"
##include "custom.isa"
...
  1. 为 CustomOP 添加声明和定义
    new file @ src/arch/riscv/insts/custom.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
namespace gem5
{

namespace RiscvISA
{

/**
* Base class for operations that work for Custom-Extension
*/
class CustomOp : public RiscvStaticInst
{
protected:
using RiscvStaticInst::RiscvStaticInst;

std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
};

} // namespace RiscvISA
} // namespace gem5

new file @ src/arch/riscv/insts/custom.cc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
namespace gem5
{

namespace RiscvISA
{

std::string
CustomOp::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
registerName(srcRegIdx(0));
if (_numSrcRegs >= 2)
ss << ", " << registerName(srcRegIdx(1));
if (_numSrcRegs >= 3)
ss << ", " << registerName(srcRegIdx(2));
return ss.str();
}

} // namespace RiscvISA
} // namespace gem5
  1. Include CustomOP Header File
    @ src/arch/riscv/isa/includes.isa
1
2
3
4
5
6
output header {{
...
#include "arch/riscv/insts/unknown.hh"
#include "arch/riscv/insts/vector.hh"
#include "arch/riscv/insts/custom.hh"
...

object4: 添加一条向量三操作数相加指令vtriadd_vv

1
2
vtriadd.vv vd, vs2, vs1
vd[i] = vd[i] + vs2[i] + vs1[i]
  1. 直接在decode.isa里面的VectorIntFormat下加就行
1
2
3
4
// FHE Extension
0x3d: VectorVXXFormat::vtriadd_vv({{
Vd_vu[i] = Vs3_vu[i] + Vs2_vu[i] + Vs1_vu[i];
}}, SimdAluOp);

object5: 添加一条向量三操作数(vd[i] + rs1 + rs2)相加指令vtriadd_vxx

1
2
vtriadd.vxx vd, rs2, rs1
vd[i] = vd[i] + rs2 + rs1
  1. 现有的RVV1.0没有这样的指令格式,需要先定义新的Format(仿照VectorIntFormat)
    主要修改 src2 的解析,可以删除一些不必要的逻辑
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def format VectorVXXFormat(code, *flags) {{
macroop_class_name = 'VectorArithMacroInst'
microop_class_name = 'VectorArithMicroInst'

iop = InstObjParams(
name,
Name,
macroop_class_name,
{'code': code,
'declare_varith_template': declareVArithTemplate(Name)},
flags
)
inst_name, inst_suffix = name.split("_", maxsplit=1)
v0_required = inst_name not in ["vmv"]
mask_cond = v0_required and (inst_suffix not in ['vvm', 'vxm', 'vim'])
need_elem_idx = mask_cond or code.find("ei") != -1

dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"

num_src_regs = 0

src2_reg_id = "intRegClass[_machInst.rs2]"
num_src_regs += 1

src1_reg_id = "intRegClass[_machInst.rs1]"
num_src_regs += 1

old_vd_idx = num_src_regs
src3_reg_id = "vecRegClass[_machInst.vd + _microIdx]"

set_dest_reg_idx = setDestWrapper(dest_reg_id)

set_src_reg_idx = ""
set_src_reg_idx += setSrcWrapper(src1_reg_id)
set_src_reg_idx += setSrcWrapper(src2_reg_id)
set_src_reg_idx += setSrcWrapper(src3_reg_id)
if v0_required:
set_src_reg_idx += setSrcVm()

# code
if mask_cond:
code = maskCondWrapper(code)
if need_elem_idx:
code = eiDeclarePrefix(code)
code = loopWrapper(code)

vm_decl_rd = ""
if v0_required:
vm_decl_rd = vmDeclAndReadData()

set_vlenb = setVlenb()
set_vlen = setVlen() if need_elem_idx else ""

microiop = InstObjParams(name + "_micro",
Name + "Micro",
microop_class_name,
{'code': code,
'set_dest_reg_idx': set_dest_reg_idx,
'set_src_reg_idx': set_src_reg_idx,
'set_vlenb' : set_vlenb,
'set_vlen' : set_vlen,
'vm_decl_rd': vm_decl_rd,
'copy_old_vd': copyOldVd(old_vd_idx),
'declare_varith_template': declareVArithTemplate(Name + "Micro")},
flags)

header_output = \
VectorIntMicroDeclare.subst(microiop) + \
VectorIntMacroDeclare.subst(iop)
decoder_output = \
VectorIntMicroConstructor.subst(microiop) + \
VectorIntMacroConstructor.subst(iop)
exec_output = VectorIntMicroExecute.subst(microiop)
decode_block = VectorIntDecodeBlock.subst(iop)
}};

  1. 在decode.isa中添加新的指令
1
2
3
4
// FHE Extension
0x3d: VectorVXXFormat::vtriadd_vxx({{
Vd_vu[i] = Vs3_vu[i] + Rs2 + Rs1;
}}, SimdAluOp);

object6: 实现自定义的指令扩展,并且指令使用自定义向量寄存器

sub object1: 实现自定义的向量寄存器

  1. 定义自定义寄存器类型 in src/cpu/reg_class.hh
1
2
3
4
5
6
7
8
9
/** Enumerate the classes of registers. */
enum RegClassType
{
...
CusVecRegClass, ///< Custom Vector Register
...
};
...
inline constexpr char CusVecRegClassName[] = "cusvec";
  1. 定义自定义寄存器以及物理寄存器数量参数 in src/cpu/o3/regfile.hh regfile.cc
1
2
3
4
5
6
7
8
/** Custom vector register file. */
RegFile cusVectorRegFile;
std::vector<PhysRegId> cusVecRegIds;

/**
* Number of physical custom vector registers
*/
unsigned numPhysicalCusVecRegs;

并修改相应的函数(主要是增加新增寄存器的参数)

  • 构造函数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
        PhysRegFile(...,
    unsigned _numPhysicalCusVecRegs,
    const BaseISA::RegClasses &classes)
    : ...
    cusVecRegFile(*reg_classes.at(CusVecRegClass), _numPhysicalCusVecRegs),
    ...,
    numPhysicalCusVecRegs(_numPhysicalCusVecRegs),
    totalNumRegs(_numPhysicalIntRegs
    + _numPhysicalFloatRegs
    + _numPhysicalVecRegs
    + numPhysicalVecElemRegs
    + _numPhysicalVecPredRegs
    + _numPhysicalMatRegs
    + _numPhysicalCCRegs
    + _numPhysicalCusVecRegs){

    ...
    // The next batch of the registers are the custom vector physical
    // registers; put them onto the custom vector free list.
    for (phys_reg = 0; phys_reg < numPhysicalCusVecRegs; phys_reg++) {
    cusVecRegIds.emplace_back(*reg_classes.at(CusVecRegClass), phys_reg,
    flat_reg_idx++);
    }
  • getReg() 函数

    1
    2
    3
    4
    5
    case CusVecRegClass:
    cusVecRegFile.get(idx, val);
    DPRINTF(IEW, "RegFile: Access to custom vector register %i, has "
    "data %s\n", idx, cusVecRegFile.regClass.valString(val));
    break;
  • getWritableReg() 函数

    1
    2
    case SpmmRegClass:
    return spmmRegFile.ptr(idx);
  • setReg() 函数

    1
    2
    3
    4
    5
    case SpmmRegClass:
    DPRINTF(IEW, "RegFile: Setting SpMM register %i to %s\n",
    idx, spmmRegFile.regClass.valString(val));
    spmmRegFile.set(idx, val);
    break;

    注意: getReg()setReg() 函数分别有两个版本,一个用于标量寄存器的读写,一个用于向量/矩阵等寄存器读写,需要根据扩展寄存器的类型实现其中一种即可

  • initFreeList() 函数

    1
    2
    3
    4
    5
    6
    // The next batch of the registers are the custom vector physical
    // registers; put them onto the custom vector free list.
    for (reg_idx = 0; reg_idx < numPhysicalCusVecRegs; reg_idx++) {
    assert(cusvecRegIds[reg_idx].index() == reg_idx);
    }
    freeList->addRegs(cusvecRegIds.begin(), cusvecRegIds.end());
  1. ISA 添加寄存器

    • 添加 debug flag in src/arch/SConscript
    1
    2
    3
    4
    5
    DebugFlag('SpMMRegs')
    DebugFlag('CCRegs')
    DebugFlag('MiscRegs')
    CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'VecRegs', 'VecPredRegs',
    'MatRegs', 'SpMMRegs', 'CCRegs', 'MiscRegs' ])
    • 添加 src/arch/riscv/regs/spmm.hh

    • ISA 构造函数 in src/arch/riscv/isa.cc

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    #include "arch/riscv/regs/spmm.hh"
    ...
    ISA::ISA(const Params &p) : ...
    {
    ...
    _regClasses.push_back(&matRegClass);
    _regClasses.push_back(&spmmRegClass);
    _regClasses.push_back(&ccRegClass);
    _regClasses.push_back(&miscRegClass);
    ...
    }
    • 其他函数 in src/arch/riscv/isa.cc
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    void
    ISA::copyRegsFrom(ThreadContext *src)
    {
    ...
    // Fourth loop through the spmm registers.
    RiscvISA::SpMMRegContainer spmm;
    for (auto &id: spmmRegClass) {
    src->getReg(id, &spmm);
    tc->setReg(id, &spmm);
    }
    ...
    }
  2. cpu中添加相应寄存器支持

    • CPU 参数定义 in src/cpu/o3/BaseO3CPU.py

      1
      numPhysCusVecRegs = Param.Unsigned(32, "Number of physical custom vector registers")
    • 添加统计变量

    1
    2
    3
    4
    5
    6
    7
    8
    struct ExecuteCPUStats: public statistics::Group
    {
    ...
    /* Number of spmm register file accesses */
    mutable statistics::Scalar numSpMMRegReads;
    statistics::Scalar numSpMMRegWrites;
    ...
    }
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    BaseCPU::
    ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
    : ...
    ADD_STAT(numSpMMRegReads, statistics::units::Count::get(),
    "Number of times the spmm registers were read"),
    ADD_STAT(numSpMMRegWrites, statistics::units::Count::get(),
    "Number of times the spmm registers were written"),
    ...
    {
    ...
    numSpMMRegReads
    .prereq(numSpMMRegReads);
    numSpMMRegWrites
    .prereq(numSpMMRegWrites);
    ...
    }
    • 函数修改 in src/cpu/o3/cpu.cc

      • 构造函数
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      CPU::CPU(const BaseO3CPUParams &params)
      : ..., regFile(...
      params.numPhysSpMMRegs,
      params.isa[0]->regClasses()),...{
      ...
      panic_if(params.numPhysSpMMRegs <=
      numThreads * regClasses.at(SpMMRegClass)->numRegs() &&
      regClasses.at(SpMMRegClass)->numRegs() != 0,
      "Not enough physical registers, consider increasing "
      "numPhysSpMMRegs\n");
      ...
      }
      • CPU::getReg()
      1
      2
      3
      case SpMMRegClass:
      executeStats[tid]->numSpMMRegReads++;
      break;
      • CPU::getWritableReg() & CPU::setReg()
      1
      2
      3
      case SpMMRegClass:
      executeStats[tid]->numSpMMRegWrites++;
      break;
  3. Utility 支持

in src/arch/riscv/utility.hh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#include "arch/riscv/regs/spmm.hh"
...
inline std::string
registerName(RegId reg)
{
...
else if (reg.is(SpMMRegClass)) {
if (reg.index() >= NumSpMMRegs) {
std::stringstream str;
str << "?? (v" << reg.index() << ')';
return str.str();
}
return SpMMRegNames[reg.index()];
}
...
}

此处的 include 是必要的,decoder.isa 通过 utility.hh 来 include reg/spmm.hh

  1. 其他函数修改
    • SimpleThread 构造函数初始化 in src/cpu/simple_thread.cc
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15

    SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
    Process *_process, BaseMMU *_mmu,
    BaseISA *_isa, InstDecoder *_decoder)
    : ThreadState(_cpu, _thread_num, _process),
    regFiles{{
    {*_isa->regClasses().at(IntRegClass)},
    {*_isa->regClasses().at(FloatRegClass)},
    {*_isa->regClasses().at(VecRegClass)},
    {*_isa->regClasses().at(VecElemClass)},
    {*_isa->regClasses().at(VecPredRegClass)},
    {*_isa->regClasses().at(MatRegClass)},
    {*_isa->regClasses().at(SpMMRegClass)},
    {*_isa->regClasses().at(CCRegClass)}
    }},

sub object2: 为自定义寄存器创建操作数以便于 decoder 使用

定义 RegOperand in src/arch/isa_parser/operand_types.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

class SpMMRegOperand(RegOperand):
reg_class = "spmmRegClass"

def __init__(self, parser, full_name, ext, is_src, is_dest):
super().__init__(parser, full_name, ext, is_src, is_dest)
self.elemExt = None

def makeRead(self):
...

def makeWrite(self):
...

class SpMMRegOperandDesc(RegOperandDesc):
def __init__(self, *args, **kwargs):
super().__init__("spmmRegClass", SpMMRegOperand, *args, **kwargs)

为 ISA Parser 提供寄存器解析字典 in src/arch/isa_parser/isa_parser.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
self.exportContext.update(
{
"overrideInOperand": overrideInOperand,
"IntRegOp": IntRegOperandDesc,
"FloatRegOp": FloatRegOperandDesc,
"CCRegOp": CCRegOperandDesc,
"VecElemOp": VecElemOperandDesc,
"VecRegOp": VecRegOperandDesc,
"VecPredRegOp": VecPredRegOperandDesc,
"MatRegOp": MatRegOperandDesc,
"SpMMRegOp": SpMMRegOperandDesc,
"ControlRegOp": ControlRegOperandDesc,
"MemOp": MemOperandDesc,
"PCStateOp": PCStateOperandDesc,
}
)

添加 operands in src/arch/riscv/isa/operands.isa

1
2
3
4
5
6
7
8
9
10
11
12
13
def operand_types {{
...
'svc' : 'RiscvISA::SpMMRegContainer',
}};

def operands {{
...
#SpMM Operand
'Vr1': SpMMRegOp('svc', 'RS1', None, 1),
'Vr2': SpMMRegOp('svc', 'RS2', None, 2),
'Vr3': SpMMRegOp('svc', 'RD', None, 3),
...
}};

为 decoder.isa 添加 include 头文件 in src/arch/riscv/isa/includes.isa

1
2
3
4
5
6
7
8
9
10
output header {{
...
#include "arch/riscv/insts/spmm.hh"
...
}};
output exec {{
...
#include "arch/riscv/regs/spmm.hh"
...
}};

sub object3: 添加 ISA 参数控制寄存器

in src/arch/riscv/RiscvISA.isa

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class RiscvSpMMVectorLength(UInt32):
min = 8
max = 65536

def _check(self):
super()._check()

# VLEN needs to be a whole power of 2. We already know value is
# not zero. Hence:
if self.value & (self.value - 1) != 0:
raise TypeError("VLEN is not a power of 2: %d" % self.value)


class RiscvSpMMVectorElementLength(UInt32):
min = 8
max = 64

def _check(self):
super()._check()

# ELEN needs to be a whole power of 2. We already know value is
# not zero. Hence:
if self.value & (self.value - 1) != 0:
raise TypeError("ELEN is not a power of 2: %d" % self.value)

class RiscvISA(BaseISA):
...
enable_spmm = Param.Bool(True, "Enable spmm extension")
spmm_vlen = Param.RiscvSpMMVectorLength(
256,
"Length of each vector register in bits. (for SpMM)",
)
spmm_elen = Param.RiscvSpMMVectorElementLength(
64,
"Length of each vector element register in bits. (for SpMM)",
)

in src/arch/riscv/isa.hh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
class ISA : public BaseISA
{
protected:
...
bool enableSpMM;

/** Length of each SpMM vector register in bits.
*/
unsigned spmm_vlen;

/** Length of each SpMM vector element in bits.
*/
unsigned spmm_elen;
...
public:
...
bool getEnableSpMM() const { return enableSpMM; }
...
/** Methods for getting SpMM_Vlen, Vlenb and elen values */
unsigned getSpMMVecLenInBits() { return spmm_vlen; }
unsigned getSpMMVecLenInBytes() { return spmm_vlen >> 3; }
unsigned getSpMMVecElemLenInBits() { return spmm_elen; }
...
};

in src/arch/riscv/isa.cc

1
2
3
4
5
ISA::ISA(const Params &p) : BaseISA(p, "riscv"),
_rvType(p.riscv_type), enableRvv(p.enable_rvv), enableSpMM(p.enable_spmm),
vlen(p.vlen), elen(p.elen), spmm_vlen(p.spmm_vlen), spmm_elen(p.spmm_elen),
_privilegeModeSet(p.privilege_mode_set),
_wfiResumeOnPending(p.wfi_resume_on_pending), _enableZcd(p.enable_Zcd)

为 Decoder 添加寄存器参数 in src/arch/riscv/decoder.hh decoder.cc

1
2
3
4
5
6
7
8
class Decoder : public InstDecoder
{
protected:
...
uint32_t spmm_vlen;
uint32_t spmm_elen;
};

1
2
3
4
5
6
7
Decoder::Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst)
{
...
spmm_vlen = isa->getSpMMVecLenInBits();
spmm_elen = isa->getSpMMVecElemLenInBits();
...
}

sub object4: 修改 core 逻辑

添加 rename 统计变量: in src/cpu/o3/rename.hh

1
2
3
4
5
struct RenameStats : public statistics::Group
{
...
statistics::Scalar spmmLookups;
}
1
2
3
4
5
6
7
8
9
10
Rename::RenameStats::RenameStats(statistics::Group *parent)
:...
ADD_STAT(spmmLookups, statistics::units::Count::get(),
"Number of SpMM rename lookups"),
...
{
...
spmmLookups.prereq(spmmLookups);
...
}

rename : in src/cpu/o3/rename.cc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
void
Rename::renameSrcRegs(const DynInstPtr &inst, ThreadID tid)
{
...
case SpMMRegClass:
stats.spmmLookups++;
break;
...
}

void
Rename::readFreeEntries(ThreadID tid)
{
...
renameMap[tid]->numFreeEntries(SpMMRegClass),
...
}

object7: 为特定指令执行提供 buffer

思路: 指令通过 ExecContext 执行 read_buffer/write_buffer 功能(需要添加的函数),在 O3CPU 中 ExecContext 由 DynInst 继承实现后以直接访问 cpu 的 buffer 部件.

sub object1: 为 O3CPU 定义并实现 SpMM Buffer

定义并实现 SpMMBuffer 类 in src/cpu/o3/spmm_buffer.hh spmm_buffer.cc

为 cpu 添加 SpMMBuffer 组件 in src/cpu/o3/cpu.hh

1
2
3
4
5
6
7
8
9
10
...
#include "cpu/o3/cpu.hh"
...
class CPU : public BaseCPU
{
...
/** The SpMM buffer. */
SpMMBuffer spmm_buffer;
...
};

in src/cpu/o3/cpu.cc

1
2
3
CPU::CPU(const BaseO3CPUParams &params)
:..., spmm_buffer(params.SpMMBufferSize, params.isa[0]->getSpMMVectorLengthInBytes()),
...

sub object2: 为 ExecContext / DynInst 定义并实现函数

in src/cpu/exec_context.hh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
class ExecContext
{
...
/**
* read the SpMM buffer.
*/
virtual Fault
readSpMMBuffer(unsigned int index, uint8_t *data)
{
panic("ExecContext::readSpMMBuffer() should be overridden\n");
}

/**
* write the SpMM buffer.
*/
virtual Fault
writeSpMMBuffer(unsigned int index, uint8_t *data)
{
panic("ExecContext::writeSpMMBuffer() should be overridden\n");
}
...
};

in src/cpu/o3/dyn_inst.hh

1
2
3
4
5
6
7
8
class DynInst : public ExecContext, public RefCounted
{
...
Fault readSpMMBuffer(unsigned int index, uint8_t *data) override;

Fault writeSpMMBuffer(unsigned int index, uint8_t *data) override;
...
};

in src/cpu/o3/dyn_inst.cc

1
2
3
4
5
6
7
8
9
10
11
Fault 
DynInst::readSpMMBuffer(unsigned int index, uint8_t *data){
cpu->spmm_buffer.read(index, data);
return NoFault;
}

Fault
DynInst::writeSpMMBuffer(unsigned int index, uint8_t *data){
cpu->spmm_buffer.write(index, data);
return NoFault;
}

NOTE 易错点

  1. gem5 在解析 decoder.isa 中的 code 时会按照 operands.isa 中定义的源操作数优先级从指令的 _srcRegIdxRtr 中获取对应的寄存器指针,而 _srcRegIdxRtr 中寄存器指针的顺序由指令的构造函数中的 setSrcRegIdx() 调用的顺序对应,这个顺序和源操作数的优先级之间必须手动保持一致。