[GCC] RISC-V: Add patterns for builtin overflow.

Posted on 1 May 202121 July 2021 by [email protected]

This patch has been committed to the master branch:

https://gcc.gnu.org/pipermail/gcc-patches/2021-April/569135.html

https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=6efd040c301b06fae51657c8370ad940c5c3d513

Learn more about Built-in Functions about Performing Arithmetic with Overflow Checking:

https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html

Since these functions are pervasively used in the kernel, driver, etc. Adding patterns to the RISC-V backend should allow us to have some performance elevation.

Some example:

https://android.googlesource.com/kernel/common.git/+/refs/tags/ASB-2019-03-05_4.9/include/linux/overflow.h

https://lwn.net/Articles/623368/

Let’s start with internal arithmetic functions handling in middle-end gimple:

gcc/internal-fn.c

static void
expand_arith_overflow (enum tree_code code, gimple *stmt)
{
  tree lhs = gimple_call_lhs (stmt);
  if (lhs == NULL_TREE)
    return;
  tree arg0 = gimple_call_arg (stmt, 0);
  tree arg1 = gimple_call_arg (stmt, 1);
  tree type = TREE_TYPE (TREE_TYPE (lhs));
  int uns0_p = TYPE_UNSIGNED (TREE_TYPE (arg0));
  int uns1_p = TYPE_UNSIGNED (TREE_TYPE (arg1));
  int unsr_p = TYPE_UNSIGNED (type);
  int prec0 = TYPE_PRECISION (TREE_TYPE (arg0));
  int prec1 = TYPE_PRECISION (TREE_TYPE (arg1));
  int precres = TYPE_PRECISION (type);
  location_t loc = gimple_location (stmt);
  if (!uns0_p && get_range_pos_neg (arg0) == 1)
    uns0_p = true;
  if (!uns1_p && get_range_pos_neg (arg1) == 1)
    uns1_p = true;
  int pr = get_min_precision (arg0, uns0_p ? UNSIGNED : SIGNED);
  prec0 = MIN (prec0, pr);
  pr = get_min_precision (arg1, uns1_p ? UNSIGNED : SIGNED);
  prec1 = MIN (prec1, pr);

  /* If uns0_p && uns1_p, precop is minimum needed precision
     of unsigned type to hold the exact result, otherwise
     precop is minimum needed precision of signed type to
     hold the exact result.  */
  int precop;
  if (code == MULT_EXPR)
    precop = prec0 + prec1 + (uns0_p != uns1_p);
  else
    {
      if (uns0_p == uns1_p)
	precop = MAX (prec0, prec1) + 1;
      else if (uns0_p)
	precop = MAX (prec0 + 1, prec1) + 1;
      else
	precop = MAX (prec0, prec1 + 1) + 1;
    }
  int orig_precres = precres;

  do
    {
      if ((uns0_p && uns1_p)
	  ? ((precop + !unsr_p) <= precres
	     /* u1 - u2 -> ur can overflow, no matter what precision
		the result has.  */
	     && (code != MINUS_EXPR || !unsr_p))
	  : (!unsr_p && precop <= precres))
	{
	  /* The infinity precision result will always fit into result.  */
	  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
	  write_complex_part (target, const0_rtx, true);
	  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
	  struct separate_ops ops;
	  ops.code = code;
	  ops.type = type;
	  ops.op0 = fold_convert_loc (loc, type, arg0);
	  ops.op1 = fold_convert_loc (loc, type, arg1);
	  ops.op2 = NULL_TREE;
	  ops.location = loc;
	  rtx tem = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL);
	  expand_arith_overflow_result_store (lhs, target, mode, tem);
	  return;
	}

      /* For operations with low precision, if target doesn't have them, start
	 with precres widening right away, otherwise do it only if the most
	 simple cases can't be used.  */
      const int min_precision = targetm.min_arithmetic_precision ();
      if (orig_precres == precres && precres < min_precision)
	;
      else if ((uns0_p && uns1_p && unsr_p && prec0 <= precres
		&& prec1 <= precres)
	  || ((!uns0_p || !uns1_p) && !unsr_p
	      && prec0 + uns0_p <= precres
	      && prec1 + uns1_p <= precres))
	{
	  arg0 = fold_convert_loc (loc, type, arg0);
	  arg1 = fold_convert_loc (loc, type, arg1);
	  switch (code)
	    {
	    case MINUS_EXPR:
	      if (integer_zerop (arg0) && !unsr_p)
		{
		  expand_neg_overflow (loc, lhs, arg1, false, NULL);
		  return;
		}
	      /* FALLTHRU */
	    case PLUS_EXPR:
	      expand_addsub_overflow (loc, code, lhs, arg0, arg1, unsr_p,
				      unsr_p, unsr_p, false, NULL);
	      return;
	    case MULT_EXPR:
	      expand_mul_overflow (loc, lhs, arg0, arg1, unsr_p,
				   unsr_p, unsr_p, false, NULL);
	      return;
	    default:
	      gcc_unreachable ();
	    }
	}
	/* MORE BELOW BUT LET'S JUST IGNORE THEM FOR NOW */
          ......
}

RISC-V didn’t implement the targetm.min_arithmetic_precision, by cross-referencing sparc, I added:

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index d489717b2a5..cf94f5c9658 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -351,6 +351,14 @@ static const struct riscv_tune_info riscv_tune_info_table[] = {
   { "size", generic, &optimize_size_tune_info },
 };
 
+/* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
+
+static unsigned int
+riscv_min_arithmetic_precision (void)
+{
+  return 32;
+}
+
 /* Return the riscv_tune_info entry for the given name string.  */
 
 static const struct riscv_tune_info *
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 172c7ca7c98..0521c8881ae 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -121,6 +121,10 @@ extern const char *riscv_default_mtune (int argc, const char **argv);
 #define MIN_UNITS_PER_WORD 4
 #endif
 
+/* Allows SImode op in builtin overflow pattern, see internal-fn.c.  */
+#undef TARGET_MIN_ARITHMETIC_PRECISION
+#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision
+
 /* The `Q' extension is not yet supported.  */
 #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)

This allows *.w instruction to be used under RV64 when 32bit operands are involved.

Then the overflowing pattern is handled in the following functions:

expand_addsub_overflow (location_t loc, tree_code code, tree lhs,
			tree arg0, tree arg1, bool unsr_p, bool uns0_p,
			bool uns1_p, bool is_ubsan, tree *datap)

code: The expression
arg0: op0
arg1: op1
unsr_p: boolean value, whether if result is unsigned.
uns0_p: boolean value, whether if op0 is unsigned.
uns1_p: boolean value, whether if op1 is unsigned.

Use add/sub overflow as an example,

          ......
/* u1 +- u2 -> ur  */
if (uns0_p && uns1_p && unsr_p)
    {
      insn_code icode = optab_handler (code == PLUS_EXPR ? uaddv4_optab
                                       : usubv4_optab, mode);
          ......
/* s1 +- s2 -> sr  */
 do_signed:
  {
    insn_code icode = optab_handler (code == PLUS_EXPR ? addv4_optab
				     : subv4_optab, mode);
          ......

if both arg0 arg1 and result types are unsigned, it falls into (u)add/usub4_optab, depends on its symbol, add/subv_optab otherwise.

These optabs are defined in:

gcc/optabs.def

...
OPTAB_D (addv4_optab, "addv$I$a4")
OPTAB_D (subv4_optab, "subv$I$a4")
OPTAB_D (mulv4_optab, "mulv$I$a4")
OPTAB_D (uaddv4_optab, "uaddv$I$a4")
OPTAB_D (usubv4_optab, "usubv$I$a4")
OPTAB_D (umulv4_optab, "umulv$I$a4")
...

Then you’re free to add those patterns in:

gcc/config/riscv/riscv.md

For addition:

signed addition (SImode in RV32 || DImode in RV64):
        add     t0, t1, t2
	slti    t3, t2, 0
	slt     t4, t0, t1
	bne     t3, t4, overflow

signed addition (SImode in RV64):
	add     t0, t1, t2
        addw    t3, t1, t2
        bne     t0, t3, overflow

(define_expand "addv<mode>4"
  [(set (match_operand:GPR           0 "register_operand" "=r,r")
	(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
		  (match_operand:GPR 2 "arith_operand"    " r,I")))
   (label_ref (match_operand 3 "" ""))]
  ""
{
  if (TARGET_64BIT && <MODE>mode == SImode)
    {
      rtx t3 = gen_reg_rtx (DImode);
      rtx t4 = gen_reg_rtx (DImode);
      rtx t5 = gen_reg_rtx (DImode);
      rtx t6 = gen_reg_rtx (DImode);

      emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
      if (GET_CODE (operands[1]) != CONST_INT)
	emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
      else
	t4 = operands[1];
      if (GET_CODE (operands[2]) != CONST_INT)
	emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
      else
	t5 = operands[2];
      emit_insn (gen_adddi3 (t3, t4, t5));
      emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));

      riscv_expand_conditional_branch (operands[3], NE, t6, t3);
    }
  else
    {
      rtx t3 = gen_reg_rtx (<MODE>mode);
      rtx t4 = gen_reg_rtx (<MODE>mode);

      emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
      rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
      emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
      rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]);

      emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1]));
      riscv_expand_conditional_branch (operands[3], NE, t3, t4);
    }
  DONE;
})

unsigned addition (SImode in RV32 || DImode in RV64):
        add     t0, t1, t2
        bltu    t0, t1, overflow

unsigned addition (SImode in RV64):
        sext.w  t3, t1
        addw	t0, t1, t2
        bltu	t0, t3, overflow

(define_expand "uaddv<mode>4"
  [(set (match_operand:GPR           0 "register_operand" "=r,r")
	(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
		  (match_operand:GPR 2 "arith_operand"    " r,I")))
   (label_ref (match_operand 3 "" ""))]
  ""
{
  if (TARGET_64BIT && <MODE>mode == SImode)
    {
      rtx t3 = gen_reg_rtx (DImode);
      rtx t4 = gen_reg_rtx (DImode);

      if (GET_CODE (operands[1]) != CONST_INT)
	emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
      else
	t3 = operands[1];
      emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
      emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));

      riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
    }
  else
    {
      emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
      riscv_expand_conditional_branch (operands[3], LTU, operands[0],
				       operands[1]);
    }

  DONE;
})

For subtraction:

signed subtraction (SImode in RV32 || DImode in RV64):
        sub     t0, t1, t2
        slti    t3, t2, 0
        slt     t4, t1, t0
        bne     t3, t4, overflow

signed subtraction (SImode in RV64):
	sub     t0, t1, t2
        subw    t3, t1, t2
        bne     t0, t3, overflow

(define_expand "subv<mode>4"
  [(set (match_operand:GPR            0 "register_operand" "= r")
	(minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
		   (match_operand:GPR 2 "register_operand" "  r")))
   (label_ref (match_operand 3 "" ""))]
  ""
{
  if (TARGET_64BIT && <MODE>mode == SImode)
    {
      rtx t3 = gen_reg_rtx (DImode);
      rtx t4 = gen_reg_rtx (DImode);
      rtx t5 = gen_reg_rtx (DImode);
      rtx t6 = gen_reg_rtx (DImode);

      emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
      if (GET_CODE (operands[1]) != CONST_INT)
	emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
      else
	t4 = operands[1];
      if (GET_CODE (operands[2]) != CONST_INT)
	emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
      else
	t5 = operands[2];
      emit_insn (gen_subdi3 (t3, t4, t5));
      emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));

      riscv_expand_conditional_branch (operands[3], NE, t6, t3);
    }
  else
    {
      rtx t3 = gen_reg_rtx (<MODE>mode);
      rtx t4 = gen_reg_rtx (<MODE>mode);

      emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));

      rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
      emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));

      rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]);
      emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0]));

      riscv_expand_conditional_branch (operands[3], NE, t3, t4);
    }

  DONE;
})

unsigned subtraction (SImode in RV32 || DImode in RV64):
        add     t0, t1, t2
        bltu    t1, t0, overflow

unsigned subtraction (SImode in RV64):
        sext.w  t3, t1
        subw	t0, t1, t2
        bltu    t0, t3, overflow

(define_expand "usubv<mode>4"
  [(set (match_operand:GPR            0 "register_operand" "= r")
	(minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
		   (match_operand:GPR 2 "register_operand" "  r")))
   (label_ref (match_operand 3 "" ""))]
  ""
{
  if (TARGET_64BIT && <MODE>mode == SImode)
    {
      rtx t3 = gen_reg_rtx (DImode);
      rtx t4 = gen_reg_rtx (DImode);

      if (GET_CODE (operands[1]) != CONST_INT)
	emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
      else
	t3 = operands[1];
      emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
      emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));

      riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
    }
  else
    {
      emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
      riscv_expand_conditional_branch (operands[3], LTU, operands[1],
				       operands[0]);
    }

  DONE;
})

For Multiplication:

signed multiplication (SImode in RV32 || DImode in RV64):
        mulh    t3, t1, t2
        mul	t0, t1, t2
        srai	t4, t0, 31/63 (RV32/64)
        bne     t3, t4, overflow

signed multiplication (SImode in RV64):
	mul     t0, t1, t2
        sext.w  t3, t0
        bne     t0, t3, overflow

(define_expand "mulv<mode>4"
  [(set (match_operand:GPR           0 "register_operand" "=r")
	(mult:GPR (match_operand:GPR 1 "register_operand" " r")
		  (match_operand:GPR 2 "register_operand" " r")))
   (label_ref (match_operand 3 "" ""))]
  "TARGET_MUL"
{
  if (TARGET_64BIT && <MODE>mode == SImode)
    {
      rtx t3 = gen_reg_rtx (DImode);
      rtx t4 = gen_reg_rtx (DImode);
      rtx t5 = gen_reg_rtx (DImode);
      rtx t6 = gen_reg_rtx (DImode);

      if (GET_CODE (operands[1]) != CONST_INT)
	emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
      else
	t4 = operands[1];
      if (GET_CODE (operands[2]) != CONST_INT)
	emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
      else
	t5 = operands[2];
      emit_insn (gen_muldi3 (t3, t4, t5));

      emit_move_insn (operands[0], gen_lowpart (SImode, t3));
      emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));

      riscv_expand_conditional_branch (operands[3], NE, t6, t3);
    }
  else
    {
      rtx hp = gen_reg_rtx (<MODE>mode);
      rtx lp = gen_reg_rtx (<MODE>mode);

      emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2]));
      emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
      emit_insn (gen_ashr<mode>3 (lp, operands[0],
				  GEN_INT (BITS_PER_WORD - 1)));

      riscv_expand_conditional_branch (operands[3], NE, hp, lp);
    }

  DONE;
})

unsigned multiplication (SImode in RV32 || DImode in RV64 ):
        mulhu   t3, t1, t2
        mul     t0, t1, t2
        bne     t3, 0,  overflow

unsigned multiplication (SImode in RV64):
        slli    t0, t0, 32
	slli	t1, t1, 32
	mulhu	t2, t1, t1
	srli	t3, t2, 32
	bne	t3, 0,  overflow
        sext.w  t2, t2

(define_expand "umulv<mode>4"
  [(set (match_operand:GPR           0 "register_operand" "=r")
	(mult:GPR (match_operand:GPR 1 "register_operand" " r")
		  (match_operand:GPR 2 "register_operand" " r")))
   (label_ref (match_operand 3 "" ""))]
  "TARGET_MUL"
{
  if (TARGET_64BIT && <MODE>mode == SImode)
    {
      rtx t3 = gen_reg_rtx (DImode);
      rtx t4 = gen_reg_rtx (DImode);
      rtx t5 = gen_reg_rtx (DImode);
      rtx t6 = gen_reg_rtx (DImode);
      rtx t7 = gen_reg_rtx (DImode);
      rtx t8 = gen_reg_rtx (DImode);

      if (GET_CODE (operands[1]) != CONST_INT)
	emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
      else
	t3 = operands[1];
      if (GET_CODE (operands[2]) != CONST_INT)
	emit_insn (gen_extend_insn (t4, operands[2], DImode, SImode, 0));
      else
	t4 = operands[2];

      emit_insn (gen_ashldi3 (t5, t3, GEN_INT (32)));
      emit_insn (gen_ashldi3 (t6, t4, GEN_INT (32)));
      emit_insn (gen_umuldi3_highpart (t7, t5, t6));
      emit_move_insn (operands[0], gen_lowpart (SImode, t7));
      emit_insn (gen_lshrdi3 (t8, t7, GEN_INT (32)));

      riscv_expand_conditional_branch (operands[3], NE, t8, const0_rtx);
    }
  else
    {
      rtx hp = gen_reg_rtx (<MODE>mode);

      emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2]));
      emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));

      riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx);
    }

  DONE;
})

Some test cases for reference (Written by Jim Wilson)

#include <stdlib.h>

int
sub_add_p (int i, int j)
{
  int k;
  return __builtin_add_overflow_p (i, j, k);
}

int
sub_sub_p (int i, int j)
{
  int k;
  return __builtin_sub_overflow_p (i, j, k);
}

int
sub_mul_p (int i, int j)
{
  int k;
  return __builtin_mul_overflow_p (i, j, k);
}

long
sub_add_p_long (long i, long j)
{
  long k;
  return __builtin_add_overflow_p (i, j, k);
}

long
sub_sub_p_long (long i, long j)
{
  long k;
  return __builtin_sub_overflow_p (i, j, k);
}

long
sub_mul_p_long (long i, long j)
{
  long k;
  return __builtin_mul_overflow_p (i, j, k);
}

int
sub_add (int i, int j)
{
  int k;
  if (__builtin_sadd_overflow (i, j, &k))
    abort ();
  return k;
}

int
sub_sub (int i, int j)
{
  int k;
  if (__builtin_ssub_overflow (i, j, &k))
    abort ();
  return k;
}

int
sub_mul (int i, int j)
{
  int k;
  if (__builtin_smul_overflow (i, j, &k))
    abort ();
  return k;
}

int
sub_uadd (int i, int j)
{
  int k;
  if (__builtin_uadd_overflow (i, j, &k))
    abort ();
  return k;
}

int
sub_usub (int i, int j)
{
  int k;
  if (__builtin_usub_overflow (i, j, &k))
    abort ();
  return k;
}

int
sub_umul (int i, int j)
{
  int k;
  if (__builtin_umul_overflow (i, j, &k))
    abort ();
  return k;
}

long
sub_add_long (long i, long j)
{
  long k;
  if (__builtin_saddl_overflow (i, j, &k))
    abort ();
  return k;
}

long
sub_sub_long (long i, long j)
{
  long k;
  if (__builtin_ssubl_overflow (i, j, &k))
    abort ();
  return k;
}

long
sub_mul_long (long i, long j)
{
  long k;
  if (__builtin_smull_overflow (i, j, &k))
    abort ();
  return k;
}

long
sub_uadd_long (long i, long j)
{
  long k;
  if (__builtin_uaddl_overflow (i, j, &k))
    abort ();
  return k;
}

long
sub_usub_long (long i, long j)
{
  long k;
  if (__builtin_usubl_overflow (i, j, &k))
    abort ();
  return k;
}

long
sub_umul_long (long i, long j)
{
  long k;
  if (__builtin_umull_overflow (i, j, &k))
    abort ();
  return k;
}

Whole patch:

From 6efd040c301b06fae51657c8370ad940c5c3d513 Mon Sep 17 00:00:00 2001
From: LevyHsu <[email protected]>
Date: Thu, 29 Apr 2021 13:42:04 +0800
Subject: [PATCH] RISC-V: Add patterns for builtin overflow.

	gcc/
	* config/riscv/riscv.c (riscv_min_arithmetic_precision): New.
	* config/riscv/riscv.h (TARGET_MIN_ARITHMETIC_PRECISION): New.
	* config/riscv/riscv.md (addv<mode>4, uaddv<mode>4): New.
	(subv<mode>4, usubv<mode>4, mulv<mode>4, umulv<mode>4): New.
---
 gcc/config/riscv/riscv.c  |   8 ++
 gcc/config/riscv/riscv.h  |   4 +
 gcc/config/riscv/riscv.md | 245 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 257 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 17cdf705c32..e1064e374eb 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -351,6 +351,14 @@ static const struct riscv_tune_info riscv_tune_info_table[] = {
   { "size", generic, &optimize_size_tune_info },
 };
 
+/* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
+
+static unsigned int
+riscv_min_arithmetic_precision (void)
+{
+  return 32;
+}
+
 /* Return the riscv_tune_info entry for the given name string.  */
 
 static const struct riscv_tune_info *
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index d17096e1dfa..f3e85723c85 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -146,6 +146,10 @@ ASM_MISA_SPEC
 #define MIN_UNITS_PER_WORD 4
 #endif
 
+/* Allows SImode op in builtin overflow pattern, see internal-fn.c.  */
+#undef TARGET_MIN_ARITHMETIC_PRECISION
+#define TARGET_MIN_ARITHMETIC_PRECISION riscv_min_arithmetic_precision
+
 /* The `Q' extension is not yet supported.  */
 #define UNITS_PER_FP_REG (TARGET_DOUBLE_FLOAT ? 8 : 4)
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index c3687d57047..0e35960fefa 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -467,6 +467,81 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
+(define_expand "addv<mode>4"
+  [(set (match_operand:GPR           0 "register_operand" "=r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
+		  (match_operand:GPR 2 "arith_operand"    " r,I")))
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+    {
+      rtx t3 = gen_reg_rtx (DImode);
+      rtx t4 = gen_reg_rtx (DImode);
+      rtx t5 = gen_reg_rtx (DImode);
+      rtx t6 = gen_reg_rtx (DImode);
+
+      emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
+      if (GET_CODE (operands[1]) != CONST_INT)
+	emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+      else
+	t4 = operands[1];
+      if (GET_CODE (operands[2]) != CONST_INT)
+	emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+      else
+	t5 = operands[2];
+      emit_insn (gen_adddi3 (t3, t4, t5));
+      emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+      riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+    }
+  else
+    {
+      rtx t3 = gen_reg_rtx (<MODE>mode);
+      rtx t4 = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
+      rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
+      emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
+      rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[0], operands[1]);
+
+      emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[0], operands[1]));
+      riscv_expand_conditional_branch (operands[3], NE, t3, t4);
+    }
+  DONE;
+})
+
+(define_expand "uaddv<mode>4"
+  [(set (match_operand:GPR           0 "register_operand" "=r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand" " r,r")
+		  (match_operand:GPR 2 "arith_operand"    " r,I")))
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+    {
+      rtx t3 = gen_reg_rtx (DImode);
+      rtx t4 = gen_reg_rtx (DImode);
+
+      if (GET_CODE (operands[1]) != CONST_INT)
+	emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
+      else
+	t3 = operands[1];
+      emit_insn (gen_addsi3 (operands[0], operands[1], operands[2]));
+      emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+
+      riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
+    }
+  else
+    {
+      emit_insn (gen_add3_insn (operands[0], operands[1], operands[2]));
+      riscv_expand_conditional_branch (operands[3], LTU, operands[0],
+				       operands[1]);
+    }
+
+  DONE;
+})
+
 (define_insn "*addsi3_extended"
   [(set (match_operand:DI               0 "register_operand" "=r,r")
 	(sign_extend:DI
@@ -523,6 +598,85 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
+(define_expand "subv<mode>4"
+  [(set (match_operand:GPR            0 "register_operand" "= r")
+	(minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
+		   (match_operand:GPR 2 "register_operand" "  r")))
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+    {
+      rtx t3 = gen_reg_rtx (DImode);
+      rtx t4 = gen_reg_rtx (DImode);
+      rtx t5 = gen_reg_rtx (DImode);
+      rtx t6 = gen_reg_rtx (DImode);
+
+      emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
+      if (GET_CODE (operands[1]) != CONST_INT)
+	emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+      else
+	t4 = operands[1];
+      if (GET_CODE (operands[2]) != CONST_INT)
+	emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+      else
+	t5 = operands[2];
+      emit_insn (gen_subdi3 (t3, t4, t5));
+      emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+      riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+    }
+  else
+    {
+      rtx t3 = gen_reg_rtx (<MODE>mode);
+      rtx t4 = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
+
+      rtx cmp1 = gen_rtx_LT (<MODE>mode, operands[2], const0_rtx);
+      emit_insn (gen_cstore<mode>4 (t3, cmp1, operands[2], const0_rtx));
+
+      rtx cmp2 = gen_rtx_LT (<MODE>mode, operands[1], operands[0]);
+      emit_insn (gen_cstore<mode>4 (t4, cmp2, operands[1], operands[0]));
+
+      riscv_expand_conditional_branch (operands[3], NE, t3, t4);
+    }
+
+  DONE;
+})
+
+(define_expand "usubv<mode>4"
+  [(set (match_operand:GPR            0 "register_operand" "= r")
+	(minus:GPR (match_operand:GPR 1 "reg_or_0_operand" " rJ")
+		   (match_operand:GPR 2 "register_operand" "  r")))
+   (label_ref (match_operand 3 "" ""))]
+  ""
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+    {
+      rtx t3 = gen_reg_rtx (DImode);
+      rtx t4 = gen_reg_rtx (DImode);
+
+      if (GET_CODE (operands[1]) != CONST_INT)
+	emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
+      else
+	t3 = operands[1];
+      emit_insn (gen_subsi3 (operands[0], operands[1], operands[2]));
+      emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+
+      riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
+    }
+  else
+    {
+      emit_insn (gen_sub3_insn (operands[0], operands[1], operands[2]));
+      riscv_expand_conditional_branch (operands[3], LTU, operands[1],
+				       operands[0]);
+    }
+
+  DONE;
+})
+
+
 (define_insn "*subsi3_extended"
   [(set (match_operand:DI               0 "register_operand" "= r")
 	(sign_extend:DI
@@ -614,6 +768,97 @@
   [(set_attr "type" "imul")
    (set_attr "mode" "DI")])
 
+(define_expand "mulv<mode>4"
+  [(set (match_operand:GPR           0 "register_operand" "=r")
+	(mult:GPR (match_operand:GPR 1 "register_operand" " r")
+		  (match_operand:GPR 2 "register_operand" " r")))
+   (label_ref (match_operand 3 "" ""))]
+  "TARGET_MUL"
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+    {
+      rtx t3 = gen_reg_rtx (DImode);
+      rtx t4 = gen_reg_rtx (DImode);
+      rtx t5 = gen_reg_rtx (DImode);
+      rtx t6 = gen_reg_rtx (DImode);
+
+      if (GET_CODE (operands[1]) != CONST_INT)
+	emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
+      else
+	t4 = operands[1];
+      if (GET_CODE (operands[2]) != CONST_INT)
+	emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0));
+      else
+	t5 = operands[2];
+      emit_insn (gen_muldi3 (t3, t4, t5));
+
+      emit_move_insn (operands[0], gen_lowpart (SImode, t3));
+      emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+
+      riscv_expand_conditional_branch (operands[3], NE, t6, t3);
+    }
+  else
+    {
+      rtx hp = gen_reg_rtx (<MODE>mode);
+      rtx lp = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_mul<mode>3_highpart (hp, operands[1], operands[2]));
+      emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+      emit_insn (gen_ashr<mode>3 (lp, operands[0],
+				  GEN_INT (BITS_PER_WORD - 1)));
+
+      riscv_expand_conditional_branch (operands[3], NE, hp, lp);
+    }
+
+  DONE;
+})
+
+(define_expand "umulv<mode>4"
+  [(set (match_operand:GPR           0 "register_operand" "=r")
+	(mult:GPR (match_operand:GPR 1 "register_operand" " r")
+		  (match_operand:GPR 2 "register_operand" " r")))
+   (label_ref (match_operand 3 "" ""))]
+  "TARGET_MUL"
+{
+  if (TARGET_64BIT && <MODE>mode == SImode)
+    {
+      rtx t3 = gen_reg_rtx (DImode);
+      rtx t4 = gen_reg_rtx (DImode);
+      rtx t5 = gen_reg_rtx (DImode);
+      rtx t6 = gen_reg_rtx (DImode);
+      rtx t7 = gen_reg_rtx (DImode);
+      rtx t8 = gen_reg_rtx (DImode);
+
+      if (GET_CODE (operands[1]) != CONST_INT)
+	emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
+      else
+	t3 = operands[1];
+      if (GET_CODE (operands[2]) != CONST_INT)
+	emit_insn (gen_extend_insn (t4, operands[2], DImode, SImode, 0));
+      else
+	t4 = operands[2];
+
+      emit_insn (gen_ashldi3 (t5, t3, GEN_INT (32)));
+      emit_insn (gen_ashldi3 (t6, t4, GEN_INT (32)));
+      emit_insn (gen_umuldi3_highpart (t7, t5, t6));
+      emit_move_insn (operands[0], gen_lowpart (SImode, t7));
+      emit_insn (gen_lshrdi3 (t8, t7, GEN_INT (32)));
+
+      riscv_expand_conditional_branch (operands[3], NE, t8, const0_rtx);
+    }
+  else
+    {
+      rtx hp = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_umul<mode>3_highpart (hp, operands[1], operands[2]));
+      emit_insn (gen_mul<mode>3 (operands[0], operands[1], operands[2]));
+
+      riscv_expand_conditional_branch (operands[3], NE, hp, const0_rtx);
+    }
+
+  DONE;
+})
+
 (define_insn "*mulsi3_extended"
   [(set (match_operand:DI              0 "register_operand" "=r")
 	(sign_extend:DI
-- 
2.27.0

{"mode":"full","isActive":false}

Special Thanks to:

Jim Wilson, for the throughout help and advice on gcc and gdb.
Craig Topper, for pointing out the SImode operand needs sext.w for unsigned add/sub in RV64.
Andrew Waterman. For better SImode signed add/sub and unsigned mul pattern in RV64.
Kito Cheng, for patch submission.

Leave a Reply Cancel reply