This patch has been committed to the master branch:
https://gcc.gnu.org/pipermail/gcc-patches/2021-April/569135.html
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=6efd040c301b06fae51657c8370ad940c5c3d513
Learn more about Built-in Functions about Performing Arithmetic with Overflow Checking:
https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html
Since these functions are pervasively used in the kernel, driver, etc. Adding patterns to the RISC-V backend should allow us to have some performance elevation.
Some example:
https://lwn.net/Articles/623368/
Let’s start with internal arithmetic functions handling in middle-end gimple:
gcc/internal-fn.c
static void expand_arith_overflow (enum tree_code code, gimple *stmt) { tree lhs = gimple_call_lhs (stmt); if (lhs == NULL_TREE) return; tree arg0 = gimple_call_arg (stmt, 0); tree arg1 = gimple_call_arg (stmt, 1); tree type = TREE_TYPE (TREE_TYPE (lhs)); int uns0_p = TYPE_UNSIGNED (TREE_TYPE (arg0)); int uns1_p = TYPE_UNSIGNED (TREE_TYPE (arg1)); int unsr_p = TYPE_UNSIGNED (type); int prec0 = TYPE_PRECISION (TREE_TYPE (arg0)); int prec1 = TYPE_PRECISION (TREE_TYPE (arg1)); int precres = TYPE_PRECISION (type); location_t loc = gimple_location (stmt); if (!uns0_p && get_range_pos_neg (arg0) == 1) uns0_p = true; if (!uns1_p && get_range_pos_neg (arg1) == 1) uns1_p = true; int pr = get_min_precision (arg0, uns0_p ? UNSIGNED : SIGNED); prec0 = MIN (prec0, pr); pr = get_min_precision (arg1, uns1_p ? UNSIGNED : SIGNED); prec1 = MIN (prec1, pr); /* If uns0_p && uns1_p, precop is minimum needed precision of unsigned type to hold the exact result, otherwise precop is minimum needed precision of signed type to hold the exact result. */ int precop; if (code == MULT_EXPR) precop = prec0 + prec1 + (uns0_p != uns1_p); else { if (uns0_p == uns1_p) precop = MAX (prec0, prec1) + 1; else if (uns0_p) precop = MAX (prec0 + 1, prec1) + 1; else precop = MAX (prec0, prec1 + 1) + 1; } int orig_precres = precres; do { if ((uns0_p && uns1_p) ? ((precop + !unsr_p) <= precres /* u1 - u2 -> ur can overflow, no matter what precision the result has. */ && (code != MINUS_EXPR || !unsr_p)) : (!unsr_p && precop <= precres)) { /* The infinity precision result will always fit into result. */ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); write_complex_part (target, const0_rtx, true); scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type); struct separate_ops ops; ops.code = code; ops.type = type; ops.op0 = fold_convert_loc (loc, type, arg0); ops.op1 = fold_convert_loc (loc, type, arg1); ops.op2 = NULL_TREE; ops.location = loc; rtx tem = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL); expand_arith_overflow_result_store (lhs, target, mode, tem); return; } /* For operations with low precision, if target doesn't have them, start with precres widening right away, otherwise do it only if the most simple cases can't be used. */ const int min_precision = targetm.min_arithmetic_precision (); if (orig_precres == precres && precres < min_precision) ; else if ((uns0_p && uns1_p && unsr_p && prec0 <= precres && prec1 <= precres) || ((!uns0_p || !uns1_p) && !unsr_p && prec0 + uns0_p <= precres && prec1 + uns1_p <= precres)) { arg0 = fold_convert_loc (loc, type, arg0); arg1 = fold_convert_loc (loc, type, arg1); switch (code) { case MINUS_EXPR: if (integer_zerop (arg0) && !unsr_p) { expand_neg_overflow (loc, lhs, arg1, false, NULL); return; } /* FALLTHRU */ case PLUS_EXPR: expand_addsub_overflow (loc, code, lhs, arg0, arg1, unsr_p, unsr_p, unsr_p, false, NULL); return; case MULT_EXPR: expand_mul_overflow (loc, lhs, arg0, arg1, unsr_p, unsr_p, unsr_p, false, NULL); return; default: gcc_unreachable (); } } /* MORE BELOW BUT LET'S JUST IGNORE THEM FOR NOW */ ...... }
RISC-V didn’t implement the targetm.min_arithmetic_precision, by cross-referencing sparc, I added:
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c index d489717b2a5..cf94f5c9658 100644 --- a/gcc/config/riscv/riscv.c +++ b/gcc/config/riscv/riscv.c @@ -351,6 +351,14 @@ static const struct riscv_tune_info riscv_tune_info_table[] = { { "size", generic, &optimize_size_tune_info }, }; +/* Implement TARGET_MIN_ARITHMETIC_PRECISION. */ + +static unsigned int +riscv_min_arithmetic_precision (void) +{ + return 32; +} + /* Return the riscv_tune_info entry for the given name string. */ static const struct riscv_tune_info * diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 172c7ca7c98..0521c8881ae 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -121,6 +121,10 @@ extern const char *riscv_default_mtune (int argc, const char **argv); #define MIN_UNITS_PER_WORD 4 #endif +/* Allows SImode op in builtin overflow pattern, see internal-fn.c. */ +#undef TARGET_MIN_ARITHMETIC_PRECISION +#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision + /* The `Q' extension is not yet supported. */ #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
This allows *.w instruction to be used under RV64 when 32bit operands are involved.
Then the overflowing pattern is handled in the following functions:
expand_addsub_overflow (location_t loc, tree_code code, tree lhs,
tree arg0, tree arg1, bool unsr_p, bool uns0_p,
bool uns1_p, bool is_ubsan, tree *datap)
- code: The expression
- arg0: op0
- arg1: op1
- unsr_p: boolean value, whether if result is unsigned.
- uns0_p: boolean value, whether if op0 is unsigned.
- uns1_p: boolean value, whether if op1 is unsigned.
Use add/sub overflow as an example,
...... /* u1 +- u2 -> ur */ if (uns0_p && uns1_p && unsr_p) { insn_code icode = optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab, mode); ...... /* s1 +- s2 -> sr */ do_signed: { insn_code icode = optab_handler (code == PLUS_EXPR ? addv4_optab : subv4_optab, mode); ......
if both arg0 arg1 and result types are unsigned, it falls into (u)add/usub4_optab, depends on its symbol, add/subv_optab otherwise.
These optabs are defined in:
gcc/optabs.def
... OPTAB_D (addv4_optab, "addv$I$a4") OPTAB_D (subv4_optab, "subv$I$a4") OPTAB_D (mulv4_optab, "mulv$I$a4") OPTAB_D (uaddv4_optab, "uaddv$I$a4") OPTAB_D (usubv4_optab, "usubv$I$a4") OPTAB_D (umulv4_optab, "umulv$I$a4") ...
Then you’re free to add those patterns in:
gcc/config/riscv/riscv.md
For addition:
signed addition (SImode in RV32 || DImode in RV64): add t0, t1, t2 slti t3, t2, 0 slt t4, t0, t1 bne t3, t4, overflow signed addition (SImode in RV64): add t0, t1, t2 addw t3, t1, t2 bne t0, t3, overflow
(define_expand "addv<mode>4" [(set (match_operand:GPR 0 "register_operand" "=r,r") (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") (match_operand:GPR 2 "arith_operand" " r,I"))) (label_ref (match_operand 3 "" ""))] "" { if (TARGET_64BIT && <MODE>mode == SImode) { rtx t3 = gen_reg_rtx (DImode); rtx t4 = gen_reg_rtx (DImode); rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); emit_insn (gen_addsi3 (operands[0], operands[1], operands[2])); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); else t4 = operands[1]; if (GET_CODE (operands[2]) != CONST_INT) emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); else t5 = operands[2]; emit_insn (gen_adddi3 (t3, t4, t5)); emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); riscv_expand_conditional_branch (operands[3], NE, t6, t3); } else { rtx t3 = gen_reg_rtx (<MODE>mode); rtx t4 = gen_reg_rtx (<MODE>mode); emit_insn (gen_add3_insn (operands[0], operands[1], operands[2])); rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx); emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx)); rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]); emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1])); riscv_expand_conditional_branch (operands[3], NE, t3, t4); } DONE; })
unsigned addition (SImode in RV32 || DImode in RV64): add t0, t1, t2 bltu t0, t1, overflow unsigned addition (SImode in RV64): sext.w t3, t1 addw t0, t1, t2 bltu t0, t3, overflow
(define_expand "uaddv<mode>4" [(set (match_operand:GPR 0 "register_operand" "=r,r") (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") (match_operand:GPR 2 "arith_operand" " r,I"))) (label_ref (match_operand 3 "" ""))] "" { if (TARGET_64BIT && <MODE>mode == SImode) { rtx t3 = gen_reg_rtx (DImode); rtx t4 = gen_reg_rtx (DImode); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); else t3 = operands[1]; emit_insn (gen_addsi3 (operands[0], operands[1], operands[2])); emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); riscv_expand_conditional_branch (operands[3], LTU, t4, t3); } else { emit_insn (gen_add3_insn (operands[0], operands[1], operands[2])); riscv_expand_conditional_branch (operands[3], LTU, operands[0], operands[1]); } DONE; })
For subtraction:
signed subtraction (SImode in RV32 || DImode in RV64): sub t0, t1, t2 slti t3, t2, 0 slt t4, t1, t0 bne t3, t4, overflow signed subtraction (SImode in RV64): sub t0, t1, t2 subw t3, t1, t2 bne t0, t3, overflow
(define_expand "subv<mode>4" [(set (match_operand:GPR 0 "register_operand" "= r") (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ") (match_operand:GPR 2 "register_operand" " r"))) (label_ref (match_operand 3 "" ""))] "" { if (TARGET_64BIT && <MODE>mode == SImode) { rtx t3 = gen_reg_rtx (DImode); rtx t4 = gen_reg_rtx (DImode); rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); emit_insn (gen_subsi3 (operands[0], operands[1], operands[2])); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); else t4 = operands[1]; if (GET_CODE (operands[2]) != CONST_INT) emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); else t5 = operands[2]; emit_insn (gen_subdi3 (t3, t4, t5)); emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); riscv_expand_conditional_branch (operands[3], NE, t6, t3); } else { rtx t3 = gen_reg_rtx (<MODE>mode); rtx t4 = gen_reg_rtx (<MODE>mode); emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2])); rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx); emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx)); rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]); emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0])); riscv_expand_conditional_branch (operands[3], NE, t3, t4); } DONE; })
unsigned subtraction (SImode in RV32 || DImode in RV64): add t0, t1, t2 bltu t1, t0, overflow unsigned subtraction (SImode in RV64): sext.w t3, t1 subw t0, t1, t2 bltu t0, t3, overflow
(define_expand "usubv<mode>4" [(set (match_operand:GPR 0 "register_operand" "= r") (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ") (match_operand:GPR 2 "register_operand" " r"))) (label_ref (match_operand 3 "" ""))] "" { if (TARGET_64BIT && <MODE>mode == SImode) { rtx t3 = gen_reg_rtx (DImode); rtx t4 = gen_reg_rtx (DImode); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); else t3 = operands[1]; emit_insn (gen_subsi3 (operands[0], operands[1], operands[2])); emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); riscv_expand_conditional_branch (operands[3], LTU, t3, t4); } else { emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2])); riscv_expand_conditional_branch (operands[3], LTU, operands[1], operands[0]); } DONE; })
For Multiplication:
signed multiplication (SImode in RV32 || DImode in RV64): mulh t3, t1, t2 mul t0, t1, t2 srai t4, t0, 31/63 (RV32/64) bne t3, t4, overflow signed multiplication (SImode in RV64): mul t0, t1, t2 sext.w t3, t0 bne t0, t3, overflow
(define_expand "mulv<mode>4" [(set (match_operand:GPR 0 "register_operand" "=r") (mult:GPR (match_operand:GPR 1 "register_operand" " r") (match_operand:GPR 2 "register_operand" " r"))) (label_ref (match_operand 3 "" ""))] "TARGET_MUL" { if (TARGET_64BIT && <MODE>mode == SImode) { rtx t3 = gen_reg_rtx (DImode); rtx t4 = gen_reg_rtx (DImode); rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); else t4 = operands[1]; if (GET_CODE (operands[2]) != CONST_INT) emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); else t5 = operands[2]; emit_insn (gen_muldi3 (t3, t4, t5)); emit_move_insn (operands[0], gen_lowpart (SImode, t3)); emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); riscv_expand_conditional_branch (operands[3], NE, t6, t3); } else { rtx hp = gen_reg_rtx (<MODE>mode); rtx lp = gen_reg_rtx (<MODE>mode); emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2])); emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2])); emit_insn (gen_ashr<mode>3 (lp, operands[0], GEN_INT (BITS_PER_WORD - 1))); riscv_expand_conditional_branch (operands[3], NE, hp, lp); } DONE; })
unsigned multiplication (SImode in RV32 || DImode in RV64 ): mulhu t3, t1, t2 mul t0, t1, t2 bne t3, 0, overflow unsigned multiplication (SImode in RV64): slli t0, t0, 32 slli t1, t1, 32 mulhu t2, t1, t1 srli t3, t2, 32 bne t3, 0, overflow sext.w t2, t2
(define_expand "umulv<mode>4" [(set (match_operand:GPR 0 "register_operand" "=r") (mult:GPR (match_operand:GPR 1 "register_operand" " r") (match_operand:GPR 2 "register_operand" " r"))) (label_ref (match_operand 3 "" ""))] "TARGET_MUL" { if (TARGET_64BIT && <MODE>mode == SImode) { rtx t3 = gen_reg_rtx (DImode); rtx t4 = gen_reg_rtx (DImode); rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); rtx t7 = gen_reg_rtx (DImode); rtx t8 = gen_reg_rtx (DImode); if (GET_CODE (operands[1]) != CONST_INT) emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); else t3 = operands[1]; if (GET_CODE (operands[2]) != CONST_INT) emit_insn (gen_extend_insn (t4, operands[2], DImode, SImode, 0)); else t4 = operands[2]; emit_insn (gen_ashldi3 (t5, t3, GEN_INT (32))); emit_insn (gen_ashldi3 (t6, t4, GEN_INT (32))); emit_insn (gen_umuldi3_highpart (t7, t5, t6)); emit_move_insn (operands[0], gen_lowpart (SImode, t7)); emit_insn (gen_lshrdi3 (t8, t7, GEN_INT (32))); riscv_expand_conditional_branch (operands[3], NE, t8, const0_rtx); } else { rtx hp = gen_reg_rtx (<MODE>mode); emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2])); emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2])); riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx); } DONE; })
Some test cases for reference (Written by Jim Wilson)
#include <stdlib.h> int sub_add_p (int i, int j) { int k; return __builtin_add_overflow_p (i, j, k); } int sub_sub_p (int i, int j) { int k; return __builtin_sub_overflow_p (i, j, k); } int sub_mul_p (int i, int j) { int k; return __builtin_mul_overflow_p (i, j, k); } long sub_add_p_long (long i, long j) { long k; return __builtin_add_overflow_p (i, j, k); } long sub_sub_p_long (long i, long j) { long k; return __builtin_sub_overflow_p (i, j, k); } long sub_mul_p_long (long i, long j) { long k; return __builtin_mul_overflow_p (i, j, k); } int sub_add (int i, int j) { int k; if (__builtin_sadd_overflow (i, j, &k)) abort (); return k; } int sub_sub (int i, int j) { int k; if (__builtin_ssub_overflow (i, j, &k)) abort (); return k; } int sub_mul (int i, int j) { int k; if (__builtin_smul_overflow (i, j, &k)) abort (); return k; } int sub_uadd (int i, int j) { int k; if (__builtin_uadd_overflow (i, j, &k)) abort (); return k; } int sub_usub (int i, int j) { int k; if (__builtin_usub_overflow (i, j, &k)) abort (); return k; } int sub_umul (int i, int j) { int k; if (__builtin_umul_overflow (i, j, &k)) abort (); return k; } long sub_add_long (long i, long j) { long k; if (__builtin_saddl_overflow (i, j, &k)) abort (); return k; } long sub_sub_long (long i, long j) { long k; if (__builtin_ssubl_overflow (i, j, &k)) abort (); return k; } long sub_mul_long (long i, long j) { long k; if (__builtin_smull_overflow (i, j, &k)) abort (); return k; } long sub_uadd_long (long i, long j) { long k; if (__builtin_uaddl_overflow (i, j, &k)) abort (); return k; } long sub_usub_long (long i, long j) { long k; if (__builtin_usubl_overflow (i, j, &k)) abort (); return k; } long sub_umul_long (long i, long j) { long k; if (__builtin_umull_overflow (i, j, &k)) abort (); return k; }
Whole patch:
From 6efd040c301b06fae51657c8370ad940c5c3d513 Mon Sep 17 00:00:00 2001 From: LevyHsu <[email protected]> Date: Thu, 29 Apr 2021 13:42:04 +0800 Subject: [PATCH] RISC-V: Add patterns for builtin overflow. gcc/ * config/riscv/riscv.c (riscv_min_arithmetic_precision): New. * config/riscv/riscv.h (TARGET_MIN_ARITHMETIC_PRECISION): New. * config/riscv/riscv.md (addv<mode>4, uaddv<mode>4): New. (subv<mode>4, usubv<mode>4, mulv<mode>4, umulv<mode>4): New. --- gcc/config/riscv/riscv.c | 8 ++ gcc/config/riscv/riscv.h | 4 + gcc/config/riscv/riscv.md | 245 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 257 insertions(+) diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c index 17cdf705c32..e1064e374eb 100644 --- a/gcc/config/riscv/riscv.c +++ b/gcc/config/riscv/riscv.c @@ -351,6 +351,14 @@ static const struct riscv_tune_info riscv_tune_info_table[] = { { "size", generic, &optimize_size_tune_info }, }; +/* Implement TARGET_MIN_ARITHMETIC_PRECISION. */ + +static unsigned int +riscv_min_arithmetic_precision (void) +{ + return 32; +} + /* Return the riscv_tune_info entry for the given name string. */ static const struct riscv_tune_info * diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index d17096e1dfa..f3e85723c85 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -146,6 +146,10 @@ ASM_MISA_SPEC #define MIN_UNITS_PER_WORD 4 #endif +/* Allows SImode op in builtin overflow pattern, see internal-fn.c. */ +#undef TARGET_MIN_ARITHMETIC_PRECISION +#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision + /* The `Q' extension is not yet supported. */ #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index c3687d57047..0e35960fefa 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -467,6 +467,81 @@ [(set_attr "type" "arith") (set_attr "mode" "DI")]) +(define_expand "addv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "=r,r") + (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") + (match_operand:GPR 2 "arith_operand" " r,I"))) + (label_ref (match_operand 3 "" ""))] + "" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + rtx t5 = gen_reg_rtx (DImode); + rtx t6 = gen_reg_rtx (DImode); + + emit_insn (gen_addsi3 (operands[0], operands[1], operands[2])); + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); + else + t4 = operands[1]; + if (GET_CODE (operands[2]) != CONST_INT) + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); + else + t5 = operands[2]; + emit_insn (gen_adddi3 (t3, t4, t5)); + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + + riscv_expand_conditional_branch (operands[3], NE, t6, t3); + } + else + { + rtx t3 = gen_reg_rtx (<MODE>mode); + rtx t4 = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_add3_insn (operands[0], operands[1], operands[2])); + rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx); + emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx)); + rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]); + + emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1])); + riscv_expand_conditional_branch (operands[3], NE, t3, t4); + } + DONE; +}) + +(define_expand "uaddv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "=r,r") + (plus:GPR (match_operand:GPR 1 "register_operand" " r,r") + (match_operand:GPR 2 "arith_operand" " r,I"))) + (label_ref (match_operand 3 "" ""))] + "" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); + else + t3 = operands[1]; + emit_insn (gen_addsi3 (operands[0], operands[1], operands[2])); + emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); + + riscv_expand_conditional_branch (operands[3], LTU, t4, t3); + } + else + { + emit_insn (gen_add3_insn (operands[0], operands[1], operands[2])); + riscv_expand_conditional_branch (operands[3], LTU, operands[0], + operands[1]); + } + + DONE; +}) + (define_insn "*addsi3_extended" [(set (match_operand:DI 0 "register_operand" "=r,r") (sign_extend:DI @@ -523,6 +598,85 @@ [(set_attr "type" "arith") (set_attr "mode" "SI")]) +(define_expand "subv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "= r") + (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ") + (match_operand:GPR 2 "register_operand" " r"))) + (label_ref (match_operand 3 "" ""))] + "" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + rtx t5 = gen_reg_rtx (DImode); + rtx t6 = gen_reg_rtx (DImode); + + emit_insn (gen_subsi3 (operands[0], operands[1], operands[2])); + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); + else + t4 = operands[1]; + if (GET_CODE (operands[2]) != CONST_INT) + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); + else + t5 = operands[2]; + emit_insn (gen_subdi3 (t3, t4, t5)); + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + + riscv_expand_conditional_branch (operands[3], NE, t6, t3); + } + else + { + rtx t3 = gen_reg_rtx (<MODE>mode); + rtx t4 = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2])); + + rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx); + emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx)); + + rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]); + emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0])); + + riscv_expand_conditional_branch (operands[3], NE, t3, t4); + } + + DONE; +}) + +(define_expand "usubv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "= r") + (minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ") + (match_operand:GPR 2 "register_operand" " r"))) + (label_ref (match_operand 3 "" ""))] + "" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); + else + t3 = operands[1]; + emit_insn (gen_subsi3 (operands[0], operands[1], operands[2])); + emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0)); + + riscv_expand_conditional_branch (operands[3], LTU, t3, t4); + } + else + { + emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2])); + riscv_expand_conditional_branch (operands[3], LTU, operands[1], + operands[0]); + } + + DONE; +}) + + (define_insn "*subsi3_extended" [(set (match_operand:DI 0 "register_operand" "= r") (sign_extend:DI @@ -614,6 +768,97 @@ [(set_attr "type" "imul") (set_attr "mode" "DI")]) +(define_expand "mulv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "=r") + (mult:GPR (match_operand:GPR 1 "register_operand" " r") + (match_operand:GPR 2 "register_operand" " r"))) + (label_ref (match_operand 3 "" ""))] + "TARGET_MUL" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + rtx t5 = gen_reg_rtx (DImode); + rtx t6 = gen_reg_rtx (DImode); + + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); + else + t4 = operands[1]; + if (GET_CODE (operands[2]) != CONST_INT) + emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); + else + t5 = operands[2]; + emit_insn (gen_muldi3 (t3, t4, t5)); + + emit_move_insn (operands[0], gen_lowpart (SImode, t3)); + emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + + riscv_expand_conditional_branch (operands[3], NE, t6, t3); + } + else + { + rtx hp = gen_reg_rtx (<MODE>mode); + rtx lp = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2])); + emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2])); + emit_insn (gen_ashr<mode>3 (lp, operands[0], + GEN_INT (BITS_PER_WORD - 1))); + + riscv_expand_conditional_branch (operands[3], NE, hp, lp); + } + + DONE; +}) + +(define_expand "umulv<mode>4" + [(set (match_operand:GPR 0 "register_operand" "=r") + (mult:GPR (match_operand:GPR 1 "register_operand" " r") + (match_operand:GPR 2 "register_operand" " r"))) + (label_ref (match_operand 3 "" ""))] + "TARGET_MUL" +{ + if (TARGET_64BIT && <MODE>mode == SImode) + { + rtx t3 = gen_reg_rtx (DImode); + rtx t4 = gen_reg_rtx (DImode); + rtx t5 = gen_reg_rtx (DImode); + rtx t6 = gen_reg_rtx (DImode); + rtx t7 = gen_reg_rtx (DImode); + rtx t8 = gen_reg_rtx (DImode); + + if (GET_CODE (operands[1]) != CONST_INT) + emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); + else + t3 = operands[1]; + if (GET_CODE (operands[2]) != CONST_INT) + emit_insn (gen_extend_insn (t4, operands[2], DImode, SImode, 0)); + else + t4 = operands[2]; + + emit_insn (gen_ashldi3 (t5, t3, GEN_INT (32))); + emit_insn (gen_ashldi3 (t6, t4, GEN_INT (32))); + emit_insn (gen_umuldi3_highpart (t7, t5, t6)); + emit_move_insn (operands[0], gen_lowpart (SImode, t7)); + emit_insn (gen_lshrdi3 (t8, t7, GEN_INT (32))); + + riscv_expand_conditional_branch (operands[3], NE, t8, const0_rtx); + } + else + { + rtx hp = gen_reg_rtx (<MODE>mode); + + emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2])); + emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2])); + + riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx); + } + + DONE; +}) + (define_insn "*mulsi3_extended" [(set (match_operand:DI 0 "register_operand" "=r") (sign_extend:DI -- 2.27.0 {"mode":"full","isActive":false}
Special Thanks to:
- Jim Wilson, for the throughout help and advice on gcc and gdb.
- Craig Topper, for pointing out the SImode operand needs sext.w for unsigned add/sub in RV64.
- Andrew Waterman. For better SImode signed add/sub and unsigned mul pattern in RV64.
- Kito Cheng, for patch submission.