| //=- AArch64SchedOryon.td - Qualcomm Oryon CPU 001 ---*- tablegen -*-=// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the scheduling model for Qualcomm Oryon |
| // family of processors. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| // Pipeline Description. |
| |
| def OryonModel : SchedMachineModel { |
| let IssueWidth = 14; |
| let MicroOpBufferSize = 376; |
| let LoadLatency = 4; |
| let MispredictPenalty = 13; // 13 cycles for mispredicted branch. |
| let LoopMicroOpBufferSize = 0; // Do not have a LoopMicroOpBuffer |
| let PostRAScheduler = 1; // Using PostRA sched. |
| let CompleteModel = 1; |
| |
| list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, |
| SMEUnsupported.F, |
| MTEUnsupported.F, |
| PAUnsupported.F, |
| [HasPAuth, HasCSSC]); |
| } |
| |
| let SchedModel = OryonModel in { |
| |
| // Issue ports. |
| // IXU has 6 ports p0 ~ p5 |
| // LSU has 4 ports p6 ~ p9(ls0 ~ ls3), p10/p11(std0, std1) has to work with ls0~ls3 |
| // VXU has 4 ports p12 ~ p15 |
| |
| // cross IXU/LSU/VXU resource group for FMOV P41 of VXU |
| // I2V |
| def ORYONI4FP0 : ProcResource<1>; |
| def ORYONI5FP1 : ProcResource<1>; |
| // V2I |
| def ORYONFP0I4 : ProcResource<1>; |
| def ORYONFP1I5 : ProcResource<1>; |
| |
| // store 1 for normal store instructions |
| def ORYONST0 : ProcResource<1>; |
| // store 2 for normal store instructions |
| def ORYONST1 : ProcResource<1>; |
| |
| // Port 0: ALU/Indirect/Direct Branch. |
| def ORYONP0 : ProcResource<1>; |
| |
| // Port 1: ALU/Direct Branch. |
| def ORYONP1 : ProcResource<1>; |
| |
| // Port 2: ALU. |
| def ORYONP2 : ProcResource<1>; |
| |
| // Port 3: ALU. |
| def ORYONP3 : ProcResource<1>; |
| |
| // Port 4: ALU. |
| def ORYONP4 : ProcResource<1> { |
| let Super = ORYONI4FP0; |
| let Super = ORYONFP0I4; } |
| |
| // Port 5: ALU. |
| def ORYONP5 : ProcResource<1> { |
| let Super = ORYONI5FP1; |
| let Super = ORYONFP1I5; } |
| |
| // Port 6: Load/Store. LS0 |
| def ORYONP6 : ProcResource<1> { |
| let Super = ORYONST0; } |
| |
| // Port 7: Load/store. LS1 |
| def ORYONP7 : ProcResource<1> { |
| let Super = ORYONST0; } |
| |
| // Port 8: Load/Store. LS2 |
| def ORYONP8 : ProcResource<1> { |
| let Super = ORYONST1; } |
| |
| // Port 9: Load/store. LS3 |
| def ORYONP9 : ProcResource<1> { |
| let Super = ORYONST1; } |
| |
| // Port 10: Load/Store. STD0 |
| def ORYONP10SD0 : ProcResource<1> { |
| let Super = ORYONST0; } |
| |
| // Port 11: Load/store. STD1 |
| def ORYONP11SD1 : ProcResource<1> { |
| let Super = ORYONST1; } |
| |
| // Port 12: FP/Neon/SIMD/Crypto. |
| def ORYONP12FP0 : ProcResource<1> { |
| let Super = ORYONI4FP0; |
| let Super = ORYONFP0I4; } |
| |
| // Port 13: FP/Neon/SIMD/Crypto. |
| def ORYONP13FP1 : ProcResource<1> { |
| let Super = ORYONI5FP1; |
| let Super = ORYONFP1I5; } |
| |
| // Port 14: FP/Neon/SIMD/Crypto. |
| def ORYONP14FP2 : ProcResource<1>; |
| |
| // Port 15: FP/Neon/SIMD/Crypto. |
| def ORYONP15FP3 : ProcResource<1>; |
| |
| // Define groups for the functional units on each issue port. Each group |
| // created will be used by a WriteRes. |
| |
| // Integer add/shift/logical/misc. instructions on port I0/I1/I2/I3/I4/I5. |
| def ORYONI012345 : ProcResGroup<[ORYONP0, ORYONP1, ORYONP2, |
| ORYONP3, ORYONP4, ORYONP5]> { |
| let BufferSize = 120; |
| } |
| |
| // Direct Conditional Branch instructions on ports I0/I1. |
| def ORYONI01 : ProcResGroup<[ORYONP0, ORYONP1]> { |
| let BufferSize = 40; |
| } |
| |
| // Indirect/crypto Conditional Branch instructions on ports I0. |
| def ORYONI0 : ProcResGroup<[ORYONP0]> { |
| let BufferSize = 20; |
| } |
| |
| // Crypto/CRC/PAU instructions on ports I2. |
| def ORYONI2 : ProcResGroup<[ORYONP2]> { |
| let BufferSize = 20; |
| } |
| |
| // Multiply/Multiply-ADD instructions on ports I4/I5. |
| def ORYONI45 : ProcResGroup<[ORYONP4, ORYONP5]> { |
| let BufferSize = 40; |
| } |
| |
| // Divide instructions on ports I5. |
| def ORYONI5 : ProcResGroup<[ORYONP5]> { |
| let BufferSize = 20; |
| } |
| |
| // Comparison instructions on ports I0/I1/I2/I3. |
| def ORYONI0123 : ProcResGroup<[ORYONP0, ORYONP1, |
| ORYONP2, ORYONP3]> { |
| let BufferSize = 80; |
| } |
| |
| // Load instructions on ports P6/P7/P8/P9. |
| def ORYONLD : ProcResGroup<[ORYONP6, ORYONP7, ORYONP8, ORYONP9]> { |
| let BufferSize = 64; |
| } |
| |
| // Store instructions on combo of STA/STD pipes |
| def ORYONST : ProcResGroup<[ORYONST0, ORYONST1]> { |
| let BufferSize = 64; |
| } |
| |
| // Arithmetic and CRYP-AED ASIMD/FP instructions on ports FP0/FP1/FP2/FP3. |
| def ORYONFP0123 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1, |
| ORYONP14FP2, ORYONP15FP3]> { |
| let BufferSize = 192; |
| } |
| |
| // FP Comparison and F/I move instructions on ports FP0/FP1. |
| def ORYONFP01 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1]> { |
| let BufferSize = 96; |
| } |
| |
| // FDIV instructions on ports FP3. |
| def ORYONFP3 : ProcResGroup<[ORYONP15FP3]> { |
| let BufferSize = 48; |
| } |
| |
| // CRYP-SHA instructions on ports FP1. |
| def ORYONFP1 : ProcResGroup<[ORYONP14FP2]> { |
| let BufferSize = 48; |
| } |
| |
| def ORYONFP2 : ProcResGroup<[ORYONP14FP2]> { |
| let BufferSize = 48; |
| } |
| |
| // Reciprocal, Squre root on FP0. |
| def ORYONFP0 : ProcResGroup<[ORYONP12FP0]> { |
| let BufferSize = 48; |
| } |
| |
| // cross IXU/LSU/VXU resource group for FMOV P41 of VXU |
| // I2V |
| def ORYONI2V : ProcResGroup<[ORYONI4FP0, ORYONI5FP1]> { |
| let BufferSize = 40; |
| } |
| |
| // V2I |
| def ORYONV2I : ProcResGroup<[ORYONFP0I4, ORYONFP1I5]> { |
| let BufferSize = 96; |
| } |
| |
| // Define commonly used write types for InstRW specializations. |
| // All definitions follow the format: ORYONWrite_<NumCycles>Cyc_<Resources>. |
| |
| // Because of the complexity of Oryon CPU, we skip the following |
| // generic definitions and define each instruction specifically |
| |
| // These WriteRes entries are not used in the Falkor sched model. |
| def : WriteRes<WriteImm, []> { let Unsupported = 1; } |
| def : WriteRes<WriteI, []> { let Unsupported = 1; } |
| def : WriteRes<WriteISReg, []> { let Unsupported = 1; } |
| def : WriteRes<WriteIEReg, []> { let Unsupported = 1; } |
| def : WriteRes<WriteExtr, []> { let Unsupported = 1; } |
| def : WriteRes<WriteIS, []> { let Unsupported = 1; } |
| def : WriteRes<WriteID32, []> { let Unsupported = 1; } |
| def : WriteRes<WriteID64, []> { let Unsupported = 1; } |
| def : WriteRes<WriteIM32, []> { let Unsupported = 1; } |
| def : WriteRes<WriteIM64, []> { let Unsupported = 1; } |
| def : WriteRes<WriteBr, []> { let Unsupported = 1; } |
| def : WriteRes<WriteBrReg, []> { let Unsupported = 1; } |
| def : WriteRes<WriteLD, []> { let Unsupported = 1; } |
| def : WriteRes<WriteST, []> { let Unsupported = 1; } |
| def : WriteRes<WriteSTP, []> { let Unsupported = 1; } |
| def : WriteRes<WriteAdr, []> { let Unsupported = 1; } |
| def : WriteRes<WriteLDIdx, []> { let Unsupported = 1; } |
| def : WriteRes<WriteSTIdx, []> { let Unsupported = 1; } |
| def : WriteRes<WriteF, []> { let Unsupported = 1; } |
| def : WriteRes<WriteFCmp, []> { let Unsupported = 1; } |
| def : WriteRes<WriteFCvt, []> { let Unsupported = 1; } |
| def : WriteRes<WriteFCopy, []> { let Unsupported = 1; } |
| def : WriteRes<WriteFImm, []> { let Unsupported = 1; } |
| def : WriteRes<WriteFMul, []> { let Unsupported = 1; } |
| def : WriteRes<WriteFDiv, []> { let Unsupported = 1; } |
| def : WriteRes<WriteVd, []> { let Unsupported = 1; } |
| def : WriteRes<WriteVq, []> { let Unsupported = 1; } |
| def : WriteRes<WriteVLD, []> { let Unsupported = 1; } |
| def : WriteRes<WriteVST, []> { let Unsupported = 1; } |
| def : WriteRes<WriteSys, []> { let Unsupported = 1; } |
| def : WriteRes<WriteBarrier, []> { let Unsupported = 1; } |
| def : WriteRes<WriteHint, []> { let Unsupported = 1; } |
| def : WriteRes<WriteLDHi, []> { let Unsupported = 1; } |
| def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } |
| |
| // These ReadAdvance entries will be defined in later implementation |
| def : ReadAdvance<ReadI, 0>; |
| def : ReadAdvance<ReadISReg, 0>; |
| def : ReadAdvance<ReadIEReg, 0>; |
| def : ReadAdvance<ReadIM, 0>; |
| def : ReadAdvance<ReadIMA, 0>; |
| def : ReadAdvance<ReadID, 0>; |
| def : ReadAdvance<ReadExtrHi, 0>; |
| def : ReadAdvance<ReadAdrBase, 0>; |
| def : ReadAdvance<ReadVLD, 0>; |
| def : ReadAdvance<ReadST, 0>; |
| |
| |
| //IXU resource definition |
| // 1 cycles NO pipe |
| def ORYONWrite_1Cyc_NONE : SchedWriteRes<[]>; |
| |
| // 1 cycles on I01. |
| def ORYONWrite_1Cyc_I01 : SchedWriteRes<[ORYONI01]>; |
| |
| def ORYONWrite_1Cyc_2Uops_I01 : SchedWriteRes<[ORYONI01]> { |
| let NumMicroOps = 2; |
| } |
| |
| def ORYONWrite_1Cyc_I0 : SchedWriteRes<[ORYONI0]>; |
| |
| // 7 cycles on I2. PAC*/AUT* instructions |
| def ORYONWrite_7Cyc_I2 : SchedWriteRes<[ORYONI2]> { |
| let Latency = 7; |
| } |
| |
| // 7 cycles on I2. PAC*/AUT* instructions |
| def ORYONWrite_7Cyc_3Uops_I2 : SchedWriteRes<[ORYONI2]> { |
| let Latency = 7; |
| let NumMicroOps = 3; |
| } |
| |
| // 9 (7+1+1) cycles on I2 and I0/I1, I0. Authentication branch instructions |
| // these instructions are broken down to three uops |
| // a. PtrAuth on pipe 2 taking 7 cycles |
| // b. Link Register Update on pipes 0 and 1 taking 1 cycle |
| // c. Indirect branch on pipe 0 taking 1 cycle |
| |
| def ORYONWrite_9Cyc_I012 : SchedWriteRes<[ORYONI2, ORYONI01]> { |
| let Latency = 9; |
| let NumMicroOps = 3; |
| } |
| |
| // 3 cycles on I2. CRC32 and CRC32C instructions |
| def ORYONWrite_3Cyc_I2 : SchedWriteRes<[ORYONI2]> { |
| let Latency = 3; |
| } |
| |
| // 1 cycle on I012345 |
| def ORYONWrite_1Cyc_I012345 : SchedWriteRes<[ORYONI012345]>; |
| |
| // 1 cycle on I0123 |
| def ORYONWrite_1Cyc_I0123 : SchedWriteRes<[ORYONI0123]>; |
| |
| // 1 cycle on 2 of I012345 |
| def ORYONWrite_1Cyc_I012345_I012345 : |
| SchedWriteRes<[ORYONI012345, ORYONI012345]> ; |
| |
| // 2 cycle on 2 of I0123 with ReleaseAtCycles |
| def ORYONWrite_2Cyc_I0123_I0123_RC : |
| SchedWriteRes<[ORYONI0123, ORYONI0123]> { |
| let Latency = 2; |
| let ReleaseAtCycles = [2,2]; |
| } |
| |
| // 2 cycle on 2 of I012345 |
| def ORYONWrite_2Cyc_I012345_I012345_RC : |
| SchedWriteRes<[ORYONI012345, ORYONI012345]> { |
| let Latency = 2; |
| let ReleaseAtCycles = [2,2]; |
| } |
| |
| // 3 cycle on 2 of I45 |
| def ORYONWrite_3Cyc_I45_I45_RC : |
| SchedWriteRes<[ORYONI45, ORYONI45]> { |
| let Latency = 3; |
| let ReleaseAtCycles = [2,2]; |
| } |
| |
| // 3 cycle on I45 |
| def ORYONWrite_3Cyc_I45 : SchedWriteRes<[ORYONI45]> { |
| let Latency = 3; |
| } |
| |
| // 7 cycle on I2 32-bit integer division |
| def ORYONWrite_7Cyc_I2_RC : SchedWriteRes<[ORYONI2]> { |
| let Latency = 7; |
| let ReleaseAtCycles = [2]; |
| } |
| |
| // 9 cycle on I2 64-bit integer division |
| def ORYONWrite_9Cyc_I2_RC : SchedWriteRes<[ORYONI2]> { |
| let Latency = 9; |
| let ReleaseAtCycles = [2]; |
| } |
| |
| // LSU resource definition |
| // need to define WriteLDAdr, WriteAdrAdr, WriteLDHi, WriteSTX |
| // 4 cycle on LS(P6789) |
| def ORYONWrite_4Cyc_LD : SchedWriteRes<[ORYONLD]> { |
| let Latency = 4; |
| } |
| |
| // 4 cycle for Post/Pre inc/dec access, also covers all pair loads Post/Pre |
| def ORYONWrite_4Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { |
| let Latency = 4; |
| } |
| |
| // 5 (4+1) for VXU SIMD access/could also include FP |
| // resource might not be correct, as VXU resource not included |
| def ORYONWrite_5Cyc_LD : SchedWriteRes<[ORYONLD]> { |
| let Latency = 5; |
| } |
| |
| def ORYONWrite_5Cyc_2Uops_LD : SchedWriteRes<[ORYONLD]> { |
| let Latency = 5; |
| let NumMicroOps = 2; |
| } |
| |
| def ORYONWrite_5Cyc_3Uops_LD : SchedWriteRes<[ORYONLD]> { |
| let Latency = 5; |
| let NumMicroOps = 3; |
| } |
| |
| def ORYONWrite_5Cyc_4Uops_LD : SchedWriteRes<[ORYONLD]> { |
| let Latency = 5; |
| let NumMicroOps = 4; |
| } |
| |
| def ORYONWrite_5Cyc_5Uops_LD : SchedWriteRes<[ORYONLD]> { |
| let Latency = 5; |
| let NumMicroOps = 5; |
| } |
| |
| def ORYONWrite_5Cyc_6Uops_LD : SchedWriteRes<[ORYONLD]> { |
| let Latency = 5; |
| let NumMicroOps = 6; |
| } |
| |
| def ORYONWrite_5Cyc_8Uops_LD : SchedWriteRes<[ORYONLD]> { |
| let Latency = 5; |
| let NumMicroOps = 8; |
| } |
| |
| def ORYONWrite_5Cyc_10Uops_LD : SchedWriteRes<[ORYONLD]> { |
| let Latency = 5; |
| let NumMicroOps = 10; |
| } |
| |
| // 6 cycle for Post/Pre inc/dec access |
| def ORYONWrite_5Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { |
| let Latency = 5; |
| } |
| |
| def ORYONWrite_5Cyc_2Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { |
| let Latency = 5; |
| let NumMicroOps = 2; |
| } |
| |
| def ORYONWrite_5Cyc_3Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { |
| let Latency = 5; |
| let NumMicroOps = 3; |
| } |
| |
| def ORYONWrite_5Cyc_4Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { |
| let Latency = 5; |
| let NumMicroOps = 4; |
| } |
| |
| def ORYONWrite_5Cyc_5Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { |
| let Latency = 5; |
| let NumMicroOps = 5; |
| } |
| |
| def ORYONWrite_5Cyc_6Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { |
| let Latency = 5; |
| let NumMicroOps = 6; |
| } |
| |
| def ORYONWrite_5Cyc_8Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { |
| let Latency = 5; |
| let NumMicroOps = 8; |
| } |
| |
| def ORYONWrite_5Cyc_10Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> { |
| let Latency = 5; |
| let NumMicroOps = 10; |
| } |
| |
| // 1 cycle for all generic stores |
| def ORYONWrite_1Cyc_ST : SchedWriteRes<[ORYONST]>; |
| |
| def ORYONWrite_1Cyc_2Uops_ST : SchedWriteRes<[ORYONST]> { |
| let NumMicroOps = 2; |
| } |
| |
| def ORYONWrite_1Cyc_3Uops_ST : SchedWriteRes<[ORYONST]> { |
| let NumMicroOps = 3; |
| } |
| |
| def ORYONWrite_1Cyc_4Uops_ST : SchedWriteRes<[ORYONST]> { |
| let NumMicroOps = 4; |
| } |
| |
| def ORYONWrite_1Cyc_5Uops_ST : SchedWriteRes<[ORYONST]> { |
| let NumMicroOps = 5; |
| } |
| |
| def ORYONWrite_1Cyc_6Uops_ST : SchedWriteRes<[ORYONST]> { |
| let NumMicroOps = 6; |
| } |
| |
| def ORYONWrite_1Cyc_8Uops_ST : SchedWriteRes<[ORYONST]> { |
| let NumMicroOps = 8; |
| } |
| |
| def ORYONWrite_1Cyc_10Uops_ST : SchedWriteRes<[ORYONST]> { |
| let NumMicroOps = 10; |
| } |
| |
| // 1 cycle for neon write: float + ASIMD with Post/Pre Inc/Dec access |
| // also includes Pair store until further informed |
| def ORYONWrite_1Cyc_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { |
| let NumMicroOps = 3; |
| } |
| |
| def ORYONWrite_1Cyc_2Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { |
| let NumMicroOps = 2; |
| } |
| |
| def ORYONWrite_1Cyc_3Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { |
| let NumMicroOps = 3; |
| } |
| |
| def ORYONWrite_1Cyc_4Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { |
| let NumMicroOps = 4; |
| } |
| |
| def ORYONWrite_1Cyc_5Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { |
| let NumMicroOps = 5; |
| } |
| |
| def ORYONWrite_1Cyc_6Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { |
| let NumMicroOps = 6; |
| } |
| |
| def ORYONWrite_1Cyc_8Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { |
| let NumMicroOps = 8; |
| } |
| |
| def ORYONWrite_1Cyc_10Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> { |
| let NumMicroOps = 10; |
| } |
| |
| // VXU resource definition |
| |
| // I2V instruction has 1 uOp |
| // I2v with convert has 2 uOps |
| // all I2V, V2I's throughputs are 2 |
| // On VXU doc, p37 -- latencies and throughput |
| // P41, resource taken, P42, uOps |
| def ORYONWrite_I2V_4Cyc_I45 : SchedWriteRes<[ORYONI2V]> { |
| let Latency = 4; |
| } |
| |
| // inline a FCVT, so add one more uOp |
| def ORYONWrite_I2V_7Cyc_I45 : SchedWriteRes<[ORYONI2V]> { |
| let Latency = 7; |
| let NumMicroOps = 2; |
| } |
| |
| // V2I move instruction has 1/2 uOps, P42 in VXU doc |
| // Latency is 3, FCVT is also 3 cycle |
| // move + convert is 6 (3+3) cycles |
| // throughput is 2 |
| def ORYONWrite_V2I_3Cyc_FP01 : SchedWriteRes<[ORYONV2I]> { |
| let Latency = 3; |
| } |
| |
| // inline a FCVT, so add one more uOp |
| def ORYONWrite_V2I_6Cyc_FP01 : SchedWriteRes<[ORYONV2I]> { |
| let Latency = 6; |
| let NumMicroOps = 2; |
| } |
| |
| def ORYONWrite_V2V_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { |
| let Latency = 2; |
| } |
| |
| def ORYONWrite_V2V_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { |
| let Latency = 3; |
| } |
| |
| def ORYONWrite_V2V_6Cyc_FP01 : SchedWriteRes<[ORYONFP0123]> { |
| let Latency = 6; |
| let NumMicroOps = 3; |
| } |
| |
| def ORYONWrite_4Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { |
| let Latency = 4; |
| } |
| |
| def ORYONWrite_3Cyc_FP0 : SchedWriteRes<[ORYONFP0]> { |
| let Latency = 3; |
| } |
| |
| def ORYONWrite_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { |
| let Latency = 3; |
| } |
| |
| def ORYONWrite_3Cyc_2Uops_FP0123 : SchedWriteRes<[ORYONFP0123]> { |
| let Latency = 3; |
| let NumMicroOps = 2; |
| } |
| |
| def ORYONWrite_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> { |
| let Latency = 2; |
| } |
| |
| def ORYONWrite_2Cyc_FP01 : SchedWriteRes<[ORYONFP01]> { |
| let Latency = 2; |
| } |
| |
| // 2 cycle on FP1 |
| def ORYONWrite_2Cyc_FP1 : SchedWriteRes<[ORYONFP1]> { |
| let Latency = 2; |
| } |
| |
| // 3 cycle on FP1 |
| def ORYONWrite_3Cyc_FP1 : SchedWriteRes<[ORYONFP1]> { |
| let Latency = 3; |
| } |
| |
| // 4 cycle , 0.5 throughput on FP1 |
| def ORYONWrite_4Cyc_FP1_RC4 : SchedWriteRes<[ORYONFP1]> { |
| let Latency = 4; |
| let ReleaseAtCycles = [4]; |
| } |
| |
| // 5 cycle , 1 throughput on FP1 |
| def ORYONWrite_5Cyc_FP1 : SchedWriteRes<[ORYONFP1]> { |
| let Latency = 5; |
| } |
| |
| // 8 cycle , 2 throughput on FP0123 |
| def ORYONWrite_8Cyc_FP0123_RC : SchedWriteRes<[ORYONFP0123]> { |
| let Latency = 8; |
| let ReleaseAtCycles = [2]; |
| } |
| |
| def ORYONWrite_6Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { |
| let Latency = 6; |
| } |
| |
| def ORYONWrite_7Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { |
| let Latency = 7; |
| } |
| |
| def ORYONWrite_8Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { |
| let Latency = 8; |
| } |
| |
| def ORYONWrite_9Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { |
| let Latency = 9; |
| } |
| |
| def ORYONWrite_10Cyc_FP3 : SchedWriteRes<[ORYONFP3]> { |
| let Latency = 10; |
| } |
| |
| def ORYONWrite_8Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> { |
| let Latency = 8; |
| let ReleaseAtCycles = [2]; |
| } |
| |
| def ORYONWrite_10Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> { |
| let Latency = 10; |
| let ReleaseAtCycles = [2]; |
| } |
| |
| def ORYONWrite_13Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> { |
| let Latency = 13; |
| let ReleaseAtCycles = [2]; |
| } |
| |
| def ORYONWrite_4Cyc_FP0123_RC : |
| SchedWriteRes<[ORYONFP0123]> { |
| let Latency = 4; |
| let ReleaseAtCycles = [2]; |
| } |
| |
| def ORYONWrite_4Cyc_FP0123_FP0123_RC : |
| SchedWriteRes<[ORYONFP0123, ORYONFP0123]> { |
| let Latency = 4; |
| let NumMicroOps = 2; |
| let ReleaseAtCycles = [2,2]; |
| } |
| |
| def ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC : |
| SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123]> { |
| let Latency = 4; |
| let NumMicroOps = 3; |
| let ReleaseAtCycles = [3,3,3]; |
| } |
| |
| def ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC : |
| SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123, ORYONFP0123]> { |
| let Latency = 6; |
| let NumMicroOps = 4; |
| let ReleaseAtCycles = [6,6,6,6]; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Instruction Tables in IXU |
| //===----------------------------------------------------------------------===// |
| |
| //--- |
| // Arithmetic Instructions |
| //--- |
| |
| //1, 1, 6 |
| def : InstRW<[ORYONWrite_1Cyc_I012345], |
| (instregex "^ADD(W|X)r(i|r|x)", "^SUB(W|X)r(i|r|x)")>; |
| |
| //2,2,3 |
| def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC], |
| (instregex "^ADD(W|X)rs", "^SUB(W|X)rs")>; |
| |
| //1,1,4 alias CMP, CMN on page 75 |
| def : InstRW<[ORYONWrite_1Cyc_I0123], |
| (instregex "^ADDS(W|X)r(i|r|x)(64)?", "^SUBS(W|X)r(i|r|x)")>; |
| |
| //2,2,2 alias CMP, CMN on page 75 |
| def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC], |
| (instregex "^ADDS(W|X)rs", "^SUBS(W|X)rs")>; |
| |
| //1,1,4 |
| def : InstRW<[ORYONWrite_1Cyc_I0123], |
| (instregex "^ADC(W|X)r","^SBC(W|X)r", |
| "^ADCS(W|X)r","^SBCS(W|X)r")>; |
| |
| //1,1,2 |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_I01], |
| (instrs ADR,ADRP)>; |
| |
| //1,1,4 |
| def : InstRW<[ORYONWrite_1Cyc_I0123], |
| (instregex "^CSEL(W|X)r", "^CSINV(W|X)r", |
| "^CSNEG(W|X)r", "^CSINC(W|X)r")>; |
| |
| //--- |
| //Compare Instruciton |
| //--- |
| |
| // We have CCMP, CCMN as LLVM DAG node |
| // CMP is an alias of SUBS as above |
| // CMN is an alias of ADDS as above |
| // We also have no way to get shift compare node in LLVM |
| //2,2,1.5 CMP, CMN |
| |
| //1,1,4 |
| def : InstRW<[ORYONWrite_1Cyc_I0123], |
| (instregex "^CCMP(W|X)(i|r)", "^CCMN(W|X)(i|r)")>; |
| |
| //--- |
| // Branch |
| //--- |
| |
| def : InstRW<[ORYONWrite_1Cyc_NONE], (instrs B)>; |
| def : InstRW<[ORYONWrite_1Cyc_I01], (instrs BL)>; |
| def : InstRW<[ORYONWrite_1Cyc_I01], |
| (instrs Bcc, CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; |
| def : InstRW<[ORYONWrite_1Cyc_I0], (instrs BR, BLR)>; |
| def : InstRW<[ORYONWrite_1Cyc_I0], (instrs RET)>; |
| |
| // 3 uOp, 1 cycle for branch, 7 cycle for Authentication, |
| // 1 cycle for updating link register |
| // V8.3a PAC |
| def : InstRW<[ORYONWrite_9Cyc_I012], |
| (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, |
| BRAA, BRAAZ, BRAB, BRABZ)>; |
| def : InstRW<[ORYONWrite_9Cyc_I012], (instrs RETAA, RETAB, ERETAA, ERETAB)>; |
| |
| def : InstRW<[ORYONWrite_7Cyc_3Uops_I2], (instregex "^LDRAA", "^LDRAB")>; |
| |
| // Logical Instructions |
| //--- |
| |
| //1,1,4 TST is an alias of ANDS |
| def : InstRW<[ORYONWrite_1Cyc_I0123], |
| (instregex "^ANDS(W|X)r(i|r|x)", "^BICS(W|X)r(i|r|x)")>; |
| |
| //2,2,2 TST shift is an alias |
| def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC], |
| (instregex "^ANDS(W|X)rs", "^BICS(W|X)rs")>; |
| |
| //1,1,6 |
| def : InstRW<[ORYONWrite_1Cyc_I012345], |
| (instregex "^AND(W|X)r(i|r|x)", "^EOR(W|X)r(i|r|x)", |
| "^ORR(W|X)r(i|r|x)", "^BIC(W|X)r(i|r|x)", |
| "^EON(W|X)r(i|r|x)", "^ORN(W|X)r(i|r|x)")>; |
| |
| //2,2,3 |
| def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC], |
| (instregex "^AND(W|X)rs", "^EOR(W|X)rs", "^ORR(W|X)rs", |
| "^BIC(W|X)rs", "^EON(W|X)rs", "^ORN(W|X)rs")>; |
| |
| |
| //--- |
| // Shift Instructions |
| //--- |
| |
| //1,1,6 |
| def : InstRW<[ORYONWrite_1Cyc_I012345], |
| (instregex "^ASRV(W|X)r", "^LSLV(W|X)r", |
| "^LSRV(W|X)r", "^RORV(W|X)r", |
| "RMIF")>; |
| |
| //--- |
| // Move-Data Bit-field and Sign_Extension Instructions |
| //--- |
| |
| //1,1,6 |
| def : InstRW<[ORYONWrite_1Cyc_I012345], |
| (instregex "^MOVK(W|X)i", "^MOVN(W|X)i", |
| "^MOVZ(W|X)i", "^SBFM(W|X)ri", |
| "^UBFM(W|X)ri", "^BFM(W|X)ri", |
| "^SXT(W|B|H|X)", "^UXT(H|B)")>; |
| |
| // COPY instruction is an LLVM internal DAG node, needs further study |
| def : InstRW<[ORYONWrite_1Cyc_I012345], (instrs COPY)>; |
| |
| //--- |
| // Reverse Instructions |
| //--- |
| |
| //1,1,6 |
| def : InstRW<[ORYONWrite_1Cyc_I012345], |
| (instregex "^RBIT(W|X)r", "^REV(16|32|64)?(W|X)r")>; |
| |
| |
| //--- |
| // Flag Manipulate Instructions |
| //--- |
| |
| //1,1,4 |
| def : InstRW<[ORYONWrite_1Cyc_I0123], |
| (instregex "^SETF8", "^SETF16", "^CFINV")>; |
| |
| //--- |
| // Miscellaneous Instructions |
| //--- |
| |
| //1,1,6 |
| def : InstRW<[ORYONWrite_1Cyc_I012345], |
| (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$", "^EXTR(W|X)rri")>; |
| |
| |
| //--- |
| // Multiply Instructions |
| //--- |
| |
| //1,3,2 |
| def : InstRW<[ORYONWrite_3Cyc_I45], |
| (instregex "^MADD(W|X)rrr", "^MSUB(W|X)rrr", |
| "^(S|U)MADDLrrr", "^(S|U)MSUBLrrr", |
| "^(S|U)MULHrr")>; |
| |
| //--- |
| // Divide Instructions |
| //--- |
| |
| def : InstRW<[ORYONWrite_7Cyc_I2_RC], |
| (instregex "^(S|U)DIVWr")>; |
| |
| def : InstRW<[ORYONWrite_9Cyc_I2_RC], |
| (instregex "^(S|U)DIVXr")>; |
| |
| |
| //--- |
| // Cryptgraphy Instructions |
| // |
| //1,3,1 on I2 |
| def : InstRW<[ORYONWrite_3Cyc_I2], |
| (instregex "^CRC32(B|H|W|X)rr", "^CRC32C(B|H|W|X)rr")>; |
| |
| //--- |
| // PAU instructions |
| //--- |
| |
| // on p47 of IXU document, we have 7 cycles for all PAU instructions |
| // here we just assume all signing and pauth instructions are 7 cycles |
| // assume all are 7 cycles here |
| |
| // signing instrucitons |
| def : InstRW<[ORYONWrite_7Cyc_I2], (instrs PACIA, PACIB, |
| PACDA, PACDB, |
| PACIZA, PACIZB, |
| PACDZA, PACDZB, |
| PACGA)>; |
| // authentication instrucitons |
| def : InstRW<[ORYONWrite_7Cyc_I2], (instrs AUTIA, AUTIB, |
| AUTDA, AUTDB, |
| AUTIZA, AUTIZB, |
| AUTDZA, AUTDZB)>; |
| def : InstRW<[ORYONWrite_7Cyc_I2], (instrs XPACI, XPACD)>; |
| |
| //===----------------------------------------------------------------------===// |
| // Instruction Tables in LSU |
| //===----------------------------------------------------------------------===// |
| |
| // 4 cycle Load-to-use from L1D$ |
| // Neon load with 5 cycle |
| // 6 cycle to STA ? |
| // STD cycle ? |
| // NEON STD + 2 |
| |
| // Load Instructions |
| // FP Load Instructions |
| |
| // Load pair, immed pre-index, normal |
| // Load pair, immed pre-index, signed words |
| // Load pair, immed post-index, normal |
| // Load pair, immed post-index, signed words |
| // NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPDi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPQi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPSi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPWi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPXi)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPDi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPQi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSWi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPWi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPXi)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBui)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDui)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHui)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQui)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSui)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDl)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQl)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWl)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXl)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRBi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRHi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRWi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRXi)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBWi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBXi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHWi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHXi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSWi)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], |
| (instrs LDPDpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], |
| (instrs LDPQpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], |
| (instrs LDPSpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], |
| (instrs LDPWpre)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpre)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpost)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpost)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpost)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpre)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpost)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], |
| (instrs LDPDpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], |
| (instrs LDPQpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], |
| (instrs LDPSpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], |
| (instrs LDPWpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], |
| (instrs LDPXpost)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpost)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpost)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroW)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroW)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroW)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroW)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroW)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroW)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroW)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroW)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroW)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroW)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroX)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroX)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroX)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroX)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroX)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroX)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroX)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroX)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroX)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroX)>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBBi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURDi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHHi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURQi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURXi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBWi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBXi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHWi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHXi)>; |
| def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSWi)>; |
| |
| |
| |
| // Store register, immed post-index |
| // NOTE: Handled by WriteST, ReadAdrBase |
| |
| // Store register, immed pre-index |
| // NOTE: Handled by WriteST |
| |
| // Store pair, immed post-index, W-form |
| // Store pair, immed post-indx, X-form |
| // Store pair, immed pre-index, W-form |
| // Store pair, immed pre-index, X-form |
| // NOTE: Handled by WriteSTP. |
| |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBBi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURDi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHHi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURQi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURSi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURWi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURXi)>; |
| |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRBi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRHi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRWi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRXi)>; |
| |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPDi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPQi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPXi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPWi)>; |
| |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPDi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPQi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPXi)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPWi)>; |
| |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRBui)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRDui)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRHui)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRQui)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRXui)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRWui)>; |
| |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STPDpre, STPDpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STPSpre, STPSpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STPWpre, STPWpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STPXpre, STPXpost)>; |
| |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STRBpre, STRBpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STRBBpre, STRBBpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STRDpre, STRDpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STRHpre, STRHpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STRHHpre, STRHHpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STRQpre, STRQpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STRSpre, STRSpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STRWpre, STRWpost)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instrs STRXpre, STRXpost)>; |
| |
| def : InstRW<[ORYONWrite_1Cyc_ST], |
| (instrs STRBroW, STRBroX)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], |
| (instrs STRDroW, STRDroX)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], |
| (instrs STRHroW, STRHroX)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], |
| (instrs STRHHroW, STRHHroX)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], |
| (instrs STRQroW, STRQroX)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], |
| (instrs STRSroW, STRSroX)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], |
| (instrs STRWroW, STRWroX)>; |
| def : InstRW<[ORYONWrite_1Cyc_ST], |
| (instrs STRXroW, STRXroX)>; |
| |
| // ASIMD Load instructions, 4 cycle access + 2 cycle NEON access |
| // ASIMD load, 1 element, multiple, 1 reg, D-form 1uOps |
| // ASIMD load, 1 element, multiple, 1 reg, Q-form 1uOps |
| def : InstRW<[ORYONWrite_5Cyc_LD], |
| (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; |
| |
| def : InstRW<[ORYONWrite_5Cyc_LD_I012345], |
| (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD load, 1 element, multiple, 2 reg, D-form 3 uOps |
| // ASIMD load, 1 element, multiple, 2 reg, Q-form 2 uOps |
| def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], |
| (instregex "^LD1Twov(8b|4h|2s|1d)$")>; |
| |
| def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], |
| (instregex "^LD1Twov(16b|8h|4s|2d)$")>; |
| |
| def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], |
| (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; |
| |
| def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345], |
| (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD load, 1 element, multiple, 3 reg, D-form 4 uOps |
| // ASIMD load, 1 element, multiple, 3 reg, Q-form 3 uOps |
| def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], |
| (instregex "^LD1Threev(8b|4h|2s|1d)$")>; |
| |
| def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], |
| (instregex "^LD1Threev(16b|8h|4s|2d)$")>; |
| |
| def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], |
| (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; |
| |
| def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], |
| (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD load, 1 element, multiple, 4 reg, D-form 6 uOps |
| // ASIMD load, 1 element, multiple, 4 reg, Q-form 4 uOps |
| def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], |
| (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], |
| (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; |
| |
| def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], |
| (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; |
| def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], |
| (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD load, 1 element, one lane, B/H/S 2uOps |
| // ASIMD load, 1 element, one lane, D 2UOps |
| def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], (instregex "^LD1i(8|16|32|64)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345], |
| (instregex "^LD1i(8|16|32|64)_POST$")>; |
| |
| // ASIMD load, 1 element, all lanes, D-form, B/H/S 2uOps |
| // ASIMD load, 1 element, all lanes, D-form, D 2uOps |
| // ASIMD load, 1 element, all lanes, Q-form 2uOps |
| def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], |
| (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345], |
| (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD load, 2 element, multiple, D-form, B/H/S 3 uOps |
| // ASIMD load, 2 element, multiple, Q-form, D 4 uOps |
| def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], |
| (instregex "^LD2Twov(8b|4h|2s)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], |
| (instregex "^LD2Twov(16b|8h|4s|2d)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], |
| (instregex "^LD2Twov(8b|4h|2s)_POST$")>; |
| def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], |
| (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD load, 2 element, one lane, B/H 3 uOps |
| // ASIMD load, 2 element, one lane, S 3 uOps |
| // ASIMD load, 2 element, one lane, D 3 uOps |
| def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], (instregex "^LD2i(8|16|32|64)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], |
| (instregex "^LD2i(8|16|32|64)_POST$")>; |
| |
| // ASIMD load, 2 element, all lanes, D-form, B/H/S 3 uOps |
| // ASIMD load, 2 element, all lanes, D-form, D 3 uOps |
| // ASIMD load, 2 element, all lanes, Q-form 3 uOps |
| def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], |
| (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345], |
| (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD load, 3 element, multiple, D-form, B/H/S 5 uOps |
| // ASIMD load, 3 element, multiple, Q-form, B/H/S 6 uOps |
| // ASIMD load, 3 element, multiple, Q-form, D 6 uOps |
| def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], |
| (instregex "^LD3Threev(8b|4h|2s)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], |
| (instregex "^LD3Threev(16b|8h|4s|2d)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], |
| (instregex "^LD3Threev(8b|4h|2s)_POST$")>; |
| def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], |
| (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD load, 3 element, one lone, B/H 4 uOps |
| // ASIMD load, 3 element, one lane, S 4 uOps |
| // ASIMD load, 3 element, one lane, D 5 uOps |
| def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], (instregex "^LD3i(8|16|32)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD3i(64)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], |
| (instregex "^LD3i(8|16|32)_POST$")>; |
| def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], |
| (instregex "^LD3i(64)_POST$")>; |
| |
| // ASIMD load, 3 element, all lanes, D-form, B/H/S 4 uOps |
| // ASIMD load, 3 element, all lanes, D-form, D 5 uOps |
| // ASIMD load, 3 element, all lanes, Q-form, B/H/S 4 uOps |
| // ASIMD load, 3 element, all lanes, Q-form, D 5 uOps |
| def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], |
| (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], |
| (instregex "^LD3Rv(1d|2d)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345], |
| (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)_POST$")>; |
| def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], |
| (instregex "^LD3Rv(1d|2d)_POST$")>; |
| |
| // ASIMD load, 4 element, multiple, D-form, B/H/S 6 uOps |
| // ASIMD load, 4 element, multiple, Q-form, B/H/S 10 uOps |
| // ASIMD load, 4 element, multiple, Q-form, D 8 uOps |
| def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], |
| (instregex "^LD4Fourv(8b|4h|2s)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_10Uops_LD], |
| (instregex "^LD4Fourv(16b|8h|4s)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_8Uops_LD], |
| (instregex "^LD4Fourv(2d)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], |
| (instregex "^LD4Fourv(8b|4h|2s)_POST$")>; |
| def : InstRW<[ORYONWrite_5Cyc_10Uops_LD_I012345], |
| (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; |
| def : InstRW<[ORYONWrite_5Cyc_8Uops_LD_I012345], |
| (instregex "^LD4Fourv(2d)_POST$")>; |
| |
| // ASIMD load, 4 element, one lane, B/H 5 uOps |
| // ASIMD load, 4 element, one lane, S 5 uOps |
| // ASIMD load, 4 element, one lane, D 6 uOps |
| def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD4i(8|16|32)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], (instregex "^LD4i(64)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], |
| (instregex "^LD4i(8|16|32)_POST$")>; |
| def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], |
| (instregex "^LD4i(64)_POST$")>; |
| |
| // ASIMD load, 4 element, all lanes, D-form, B/H/S 5 uOps |
| // ASIMD load, 4 element, all lanes, D-form, D 6 uOps |
| // ASIMD load, 4 element, all lanes, Q-form, B/H/S 5 uOps |
| // ASIMD load, 4 element, all lanes, Q-form, D 6 uOps |
| def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], |
| (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], |
| (instregex "^LD4Rv(1d|2d)$")>; |
| def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345], |
| (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)_POST$")>; |
| def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345], |
| (instregex "^LD4Rv(1d|2d)_POST$")>; |
| |
| // ASIMD Store Instructions |
| // ASIMD store, 1 element, multiple, 1 reg, D-form 1 uOps |
| // ASIMD store, 1 element, multiple, 1 reg, Q-form 1 uops |
| def : InstRW<[ORYONWrite_1Cyc_ST], |
| (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_ST_I012345], |
| (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD store, 1 element, multiple, 2 reg, D-form 2 uOps |
| // ASIMD store, 1 element, multiple, 2 reg, Q-form 2 uOps |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], |
| (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], |
| (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD store, 1 element, multiple, 3 reg, D-form 3 uOps |
| // ASIMD store, 1 element, multiple, 3 reg, Q-form 3 uOps |
| def : InstRW<[ORYONWrite_1Cyc_3Uops_ST], |
| (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345], |
| (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD store, 1 element, multiple, 4 reg, D-form 4 uOps |
| // ASIMD store, 1 element, multiple, 4 reg, Q-form 4 uOps |
| def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], |
| (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], |
| (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD store, 1 element, one lane, B/H/S 2 uOps |
| // ASIMD store, 1 element, one lane, D 2 uOps |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], |
| (instregex "^ST1i(8|16|32|64)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], |
| (instregex "^ST1i(8|16|32|64)_POST$")>; |
| |
| // ASIMD store, 2 element, multiple, D-form, B/H/S 2 uOps |
| // ASIMD store, 2 element, multiple, Q-form, B/H/S 4 uOps |
| // ASIMD store, 2 element, multiple, Q-form, D 4 uOps |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], |
| (instregex "^ST2Twov(8b|4h|2s)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], |
| (instregex "^ST2Twov(16b|8h|4s|2d)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], |
| (instregex "^ST2Twov(8b|4h|2s)_POST$")>; |
| def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], |
| (instregex "^ST2Twov(16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD store, 2 element, one lane, B/H/S 2 uOps |
| // ASIMD store, 2 element, one lane, D 2 uOps |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], |
| (instregex "^ST2i(8|16|32|64)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], |
| (instregex "^ST2i(8|16|32|64)_POST$")>; |
| |
| // ASIMD store, 3 element, multiple, D-form, B/H/S 4 uOps |
| // ASIMD store, 3 element, multiple, Q-form, B/H/S 6 uOps |
| // ASIMD store, 3 element, multiple, Q-form, D 6 uOps |
| def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], |
| (instregex "^ST3Threev(8b|4h|2s)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_6Uops_ST], |
| (instregex "^ST3Threev(16b|8h|4s|2d)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], |
| (instregex "^ST3Threev(8b|4h|2s)_POST$")>; |
| def : InstRW<[ORYONWrite_1Cyc_6Uops_ST_I012345], |
| (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>; |
| |
| // ASIMD store, 3 element, one lane, B/H 2 uOps |
| // ASIMD store, 3 element, one lane, S 2 uOps |
| // ASIMD store, 3 element, one lane, D 4 uOps |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], (instregex "^ST3i(8|16|32)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST3i(64)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345], |
| (instregex "^ST3i(8|16|32)_POST$")>; |
| def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], |
| (instregex "^ST3i(64)_POST$")>; |
| |
| |
| // ASIMD store, 4 element, multiple, D-form, B/H/S 5 uOps |
| // ASIMD store, 4 element, multiple, Q-form, B/H/S 10 uOps |
| // ASIMD store, 4 element, multiple, Q-form, D 8 uOps |
| def : InstRW<[ORYONWrite_1Cyc_5Uops_ST], |
| (instregex "^ST4Fourv(8b|4h|2s)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_10Uops_ST], |
| (instregex "^ST4Fourv(16b|8h|4s)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_8Uops_ST], |
| (instregex "^ST4Fourv(2d)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_5Uops_ST_I012345], |
| (instregex "^ST4Fourv(8b|4h|2s)_POST$")>; |
| def : InstRW<[ORYONWrite_1Cyc_10Uops_ST_I012345], |
| (instregex "^ST4Fourv(16b|8h|4s)_POST$")>; |
| def : InstRW<[ORYONWrite_1Cyc_8Uops_ST_I012345], |
| (instregex "^ST4Fourv(2d)_POST$")>; |
| |
| // ASIMD store, 4 element, one lane, B/H 3 uOps |
| // ASIMD store, 4 element, one lane, S 3 uOps |
| // ASIMD store, 4 element, one lane, D 4 uOps |
| def : InstRW<[ORYONWrite_1Cyc_3Uops_ST], (instregex "^ST4i(8|16|32)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST4i(64)$")>; |
| def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345], |
| (instregex "^ST4i(8|16|32)_POST$")>; |
| def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345], |
| (instregex "^ST4i(64)_POST$")>; |
| |
| |
| //===----------------------------------------------------------------------===// |
| // Instruction Tables in VXU |
| //===----------------------------------------------------------------------===// |
| // all uOps are not clearly written in the VXU document |
| |
| // I2V |
| def : InstRW<[ORYONWrite_I2V_4Cyc_I45], (instregex "^FMOV[HSD][WX]r", "^FMOVDXHighr")>; |
| |
| // I2V with convert |
| def : InstRW<[ORYONWrite_I2V_7Cyc_I45], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>; |
| |
| // V2I |
| def : InstRW<[ORYONWrite_V2I_3Cyc_FP01], (instregex "^FMOV[WX][HSD]r", "FMOVXDHighr")>; |
| |
| // V2I with convert 2nd [SU] necessary? |
| def : InstRW<[ORYONWrite_V2I_6Cyc_FP01], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; |
| |
| // float to float move immediate, row 7 in big chart |
| def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]r")>; |
| def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]i")>; |
| |
| // float to float conversion within VXU, precision conversion |
| def : InstRW<[ORYONWrite_V2V_6Cyc_FP01], (instregex "^FJCVTZS")>; |
| def : InstRW<[ORYONWrite_V2V_3Cyc_FP0123], (instregex "^FCVT[HSD][HSD]r", |
| "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; |
| |
| // floating comparison write to NZCV |
| def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCMP(E)?[HSD]r[ir]")>; |
| def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCCMP(E)?[HSD]rr")>; |
| |
| // floating point conditional select |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FCSEL")>; |
| |
| // floating multiply-add |
| def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^(F|FN)MADD", "^(F|FN)MSUB")>; |
| |
| // floating unary, cycle/throughput? xls row14 |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^F(ABS|NEG)[SD]r")>; |
| |
| //floating division/square root |
| def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVHrr")>; |
| def : InstRW<[ORYONWrite_8Cyc_FP3], (instregex "^FDIVSrr")>; |
| def : InstRW<[ORYONWrite_10Cyc_FP3], (instregex "^FDIVDrr")>; |
| |
| def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTHr")>; |
| def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTSr")>; |
| def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTDr")>; |
| |
| //========== |
| // SIMD move instructions |
| //========== |
| |
| // ASIMD DUP element |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^DUPv.+lane")>; |
| // ASIMD DUP general thoughput undecided, 3? FP0123 |
| // VXU doc, p42, 2 uOps |
| def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^DUPv.+gpr")>; |
| |
| // ASIMD insert, element to element |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^INSv.+lane")>; |
| // ASIMD insert, gen reg 3? FP0123? |
| def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^INSv.+gpr")>; |
| |
| // ASIMD move, FP immed |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMOVv")>; |
| |
| // ASIMD transfer, element to gen reg |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^[SU]MOVv")>; |
| |
| //========== |
| // SIMD arithmetic instructions |
| //========== |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDv", "^SUBv", |
| "^BIFv", "^BITv", "^BSLv", |
| "^ANDv", "^BICv", "^EORv", |
| "^ORRv", "^ORNv")>; |
| |
| |
| def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FABDv", "^FADDv", "^FSUBv")>; |
| |
| // floating division |
| def : InstRW<[ORYONWrite_6Cyc_FP3], (instregex "^FDIVv.*16$")>; |
| def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVv.*32$")>; |
| def : InstRW<[ORYONWrite_9Cyc_FP3], (instregex "^FDIVv.*64$")>; |
| |
| def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMUL(X)?v", |
| "^FRECPSv", "^FRSQRTSv")>; |
| |
| def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^MLAv","^MLSv", "^MULv", |
| "^PMULv", "UABAv")>; |
| |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "SABAv", "SABDv", |
| "^(SH|UH)(ADD|SUB)v", |
| "^S(MAX|MIN)v", |
| "^(SQ|UQ)(ADD|SUB)v", |
| "^(SQ|SQR|UQ|UQR)SHLv", |
| "^(SR|UR)HADDv", |
| "^(SR|UR)SHLv", |
| "^UABDv", |
| "^U(MAX|MIN)v")>; |
| // IMAX or UMAX in the above line |
| //========== |
| // SIMD compare instructions |
| //========== |
| |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^CMEQv","^CMGEv","^CMGTv", |
| "^CMLEv","^CMLTv", "^CMHIv", |
| "^CMHSv", |
| "^FCMEQv", "^FCMGEv", |
| "^FCMGTv", "^FCMLEv", |
| "^FCMLTv", |
| "^FACGEv", "^FACGTv")>; |
| |
| //========== |
| // SIMD widening and narrowing arithmetic instructions |
| //========== |
| // NO need to list ADDHN2, RADDHN2, RSUBHN2 as they are not distinguished |
| // from ADDHN, RADDHN, RSUBHN in td file(v16i8, v8i16, v4i32). |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDHNv", |
| "^SUBHNv", |
| "^RADDHNv", |
| "^RSUBHNv", |
| "^SABD(L|L2)v", "^UABD(L|L2)v", |
| "^(S|U)(ADD|SUB)(L|L2|W|W2)v")>; |
| |
| def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^PMUL(L|L2)v","^SABA(L|L2)v", |
| "^(S|U|SQ)(MLA|MSL|MUL)(L|L2)v")>; |
| |
| //========== |
| // SIMD unary arithmetic instructions |
| //========== |
| //^MVNv is an alias of ^NOTv |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ABSv", "^CLSv","^CLZv", "^CNTv", |
| "^NEGv", "^NOTv", |
| "^RBITv", "^REV(16|32|64)v", |
| "^SQ(ABS|NEG)v", "^SQ(XT|XTU)(N|N2)v", |
| "^(SU|US)QADDv", |
| "^UQXT(N|N2)v", "^XTN2?v")>; |
| |
| def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FCVT(L|L2|N|N2|XN|XN2)v", |
| "^FRINT[AIMNPXZ]v", |
| "^FRSQRTEv", |
| "^(S|U)ADALPv", |
| "^(S|U)ADDLPv")>; |
| |
| |
| def : InstRW<[ORYONWrite_3Cyc_FP0], (instregex "^URECPEv", "^URSQRTEv", |
| "^FRECPEv", "^FRECPXv")>; |
| |
| def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTv.*16$")>; |
| def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTv.*32$")>; |
| def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTv.*64$")>; |
| |
| //========== |
| // SIMD binary elememt arithmetic instructions |
| //========== |
| |
| def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMLAv", "^FMLSv")>; |
| |
| def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^SQDMULHv", |
| "^SQRD(MLA|MLS|MUL)Hv")>; |
| |
| //========== |
| // SIMD permute instructions |
| //========== |
| |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^EXTv", "^TRN(1|2)v", |
| "^UZP(1|2)v", "^ZIP(1|2)v")>; |
| |
| //========== |
| // SIMD immediate instructions |
| //========== |
| |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^MOVIv", "^MVNIv")>; |
| |
| //========== |
| // SIMD shift(immediate) instructions |
| //========== |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^RSHR(N|N2)v", "^SHLv", |
| "^(SHL|SHR)(N|N2)v", |
| "^SLIv", |
| "^(SQ|SQR)SHR(U)?(N|N2)v", |
| "^(UQ|UQR)SHR(N|N2)v", |
| "^SQSHLUv", |
| "^SRIv", |
| "^(S|SR|U|UR)SHRv", |
| "^(S|SR|U|UR)SRAv", |
| "^(S|U)SHL(L|L2)v")>; |
| |
| //========== |
| // SIMD floating-point and integer conversion instructions |
| //========== |
| // same as above conversion |
| |
| //========== |
| // SIMD reduce (acoss vector lanes) instructions |
| //========== |
| |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDVv", |
| "^(FMAX|FMIN)(V|NMV)v", |
| "^(S|U)ADDLVv", |
| "^(S|U)(MAX|MIN)Vv")>; |
| //========== |
| // SIMD pairwise arithmetic instructions |
| //========== |
| |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDPv", "^FADDPv", |
| "^(FMAX|FMIN)(NMP|P)v", |
| "^(S|U)(MIN|MAX)Pv")>; |
| //========== |
| // SIMD dot prodcut instructions |
| //========== |
| |
| def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(U|S)DOTv")>; |
| |
| //========== |
| // SIMD table lookup instructions |
| //========== |
| // TBL 1-reg/2-reg; TBX 1-reg, 1uOp, throughput=4 latency=2 |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instrs TBLv8i8One, TBLv16i8One, |
| TBXv8i8One, TBXv16i8One, |
| TBLv8i8Two, TBLv16i8Two)>; |
| |
| // TBL 3-reg/4-reg, 3uops, throughtput=4/3=1.33 latency=4 |
| def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC], |
| (instrs TBLv8i8Three, TBLv16i8Three, |
| TBLv8i8Four, TBLv16i8Four)>; |
| |
| |
| // TBX 2-reg 2 uOps, throughput=2 latency=4 |
| def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_RC], (instrs TBXv8i8Two, TBXv16i8Two)>; |
| |
| // TBX 3-reg/4-reg, 4uOps, throughput=1, latency=6 |
| def : InstRW<[ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC], |
| (instrs TBXv8i8Three, TBXv16i8Three, |
| TBXv8i8Four, TBXv16i8Four)>; |
| |
| |
| //========== |
| // SIMD complex number arithmetic instructions |
| //========== |
| |
| def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FCADDv", "^FCMLAv")>; |
| |
| //========== |
| // SIMD cryptographic instructions |
| //========== |
| // 3,4 on IMLA, CRYP |
| def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^AES[DE]", |
| "^SM3(TT1|TT2)(A|B)")>; |
| |
| // 2,4 on CRYP |
| def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^AESI?MC", |
| "^EOR3", |
| "^RAX1", |
| "^XAR", |
| "^BCAX", |
| "^SM3SS1", |
| "^SM3PART(W1|W2)")>; |
| // 5,1 on CRYP |
| def : InstRW<[ORYONWrite_5Cyc_FP1], (instregex "^SM4E", |
| "^SM4EKEY")>; |
| |
| // 2,1 on CRYP |
| def : InstRW<[ORYONWrite_2Cyc_FP1], (instregex "^SHA1(H|SU0|SU1)", |
| "^SHA256SU0", |
| "^SHA512(SU0|SU1)")>; |
| |
| // 3,1 on CRYP |
| def : InstRW<[ORYONWrite_3Cyc_FP1], (instregex "^SHA256SU1", |
| "^SHA512(H|H2)")>; |
| |
| // 4,0.25 on CRYP |
| def : InstRW<[ORYONWrite_4Cyc_FP1_RC4], (instregex "^SHA1(C|P|M)", |
| "^SHA256(H|H2)")>; |
| |
| //========== |
| // SIMD v8.6 instructions |
| //========== |
| // 4,2 on IMLA |
| def : InstRW<[ORYONWrite_4Cyc_FP0123_RC], (instregex "^(S|U|US)MMLA$")>; |
| |
| // 4,0.5 on IMLA |
| def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMMLA$")>; |
| |
| // 4,0.5 on IMLA |
| def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMLAL(B|T)")>; |
| |
| // 3,4 |
| def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(US|SU)DOTv")>; |
| |
| // 3,1 |
| def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^BF(16)?DOTv")>; |
| |
| // 3,4 |
| def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^BFCVT(N|N2)?$")>; |
| |
| |
| } // SchedModel = OryonModel |