summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjonsykkel <jonrevold@gmail.com>2021-09-21 19:37:13 +0200
committerjonsykkel <jonrevold@gmail.com>2021-09-21 19:37:13 +0200
commit14d52c8b9690c34636f96523b657753c07e3559c (patch)
tree9d269bd2f86d695fbdddf53e9c3b0f05514775ec
parent6150b05af3dc19cafe86bf05ae0eeb829faed401 (diff)
downloadokeffa-14d52c8b9690c34636f96523b657753c07e3559c.tar.gz
turbo mod
-rw-r--r--calc/main.c2
-rw-r--r--inc/okeffa/fz_arith.h2
-rw-r--r--src/fz_div.c23
-rw-r--r--src/fz_mod.c6
4 files changed, 23 insertions, 10 deletions
diff --git a/calc/main.c b/calc/main.c
index f7f904e..9ce8f60 100644
--- a/calc/main.c
+++ b/calc/main.c
@@ -220,7 +220,7 @@ static void op_normal(char c){
case '%': //divide, keep only remainder
want(2);
mustnotzero(STACK(sp));
- fz_mod(STACK(sp-1),STACK(sp),STACK(sp-1),fl);
+ fz_mod(STACK(sp-1),STACK(sp),STACK(sp-1),fl,fl);
drop();
break;
diff --git a/inc/okeffa/fz_arith.h b/inc/okeffa/fz_arith.h
index ef09b4c..9845a41 100644
--- a/inc/okeffa/fz_arith.h
+++ b/inc/okeffa/fz_arith.h
@@ -11,7 +11,7 @@ void fz_mul_egypt (word_t *a,word_t *b,word_t *o_lo,word_t *o_hi
void fz_idiv (word_t *a,word_t *b,word_t *q,word_t *r,size_t len);
void fz_div (word_t *a,word_t *b,word_t *q,size_t len);
-void fz_mod (word_t *a,word_t *b,word_t *r,size_t len);
+void fz_mod (word_t *a,word_t *b,word_t *r,size_t a_len,size_t br_len);
void fz_mod_mul (word_t *a,word_t *b,word_t *m,word_t *o,size_t len);
void fz_mod_exp (word_t *b,word_t *e,word_t *m,word_t *o,size_t len);
diff --git a/src/fz_div.c b/src/fz_div.c
index 7be0989..bfa66a6 100644
--- a/src/fz_div.c
+++ b/src/fz_div.c
@@ -3,6 +3,8 @@
#include <okeffa/fz_shift.h>
#include <okeffa/fz_bitop.h>
#include <okeffa/w_op.h>
+#include <okeffa/w_shift.h>
+#include <okeffa/w_pred.h>
void fz_idiv(word_t *a,word_t *b,word_t *q_out,word_t *r_out,size_t len){
size_t qr_len = len*2;
@@ -33,8 +35,23 @@ void fz_div(word_t *a,word_t *b,word_t *q,size_t len){
fz_idiv(a,b,q,r,len);
}
-void fz_mod(word_t *a,word_t *b,word_t *r,size_t len){
- word_t q[len];
- fz_idiv(a,b,q,r,len);
+static void slice(word_t w,word_t *b,word_t *r,size_t cut){
+ wbool_t c;
+ wbool_t lso;
+ for(size_t x = 0;x < WORD_BITNESS;x++){
+ w = w_rol(w,1);
+ lso = fz_shl_i(r,r,cut,1,w & 1);
+ c = fz_sub(r,b,r,cut);
+ fz_add_gated(r,b,r,cut,c & wb_not(lso));
+ }
+}
+
+void fz_mod(word_t *a,word_t *b,word_t *r_out,size_t a_len,size_t br_len){
+ assert(a_len >= br_len);
+ word_t r[br_len];
+ fz_clear(r,br_len);
+ for(size_t x = 0 ;x < br_len-1;x++) slice(a[a_len-1-x],b,r,x+2 );
+ for(size_t x = br_len-1;x < a_len ;x++) slice(a[a_len-1-x],b,r,br_len);
+ fz_copy(r,r_out,br_len);
}
diff --git a/src/fz_mod.c b/src/fz_mod.c
index d70fc5b..9a55a05 100644
--- a/src/fz_mod.c
+++ b/src/fz_mod.c
@@ -6,15 +6,11 @@
void fz_mod_mul(word_t *a,word_t *b,word_t *m,word_t *o,size_t len){
size_t len2 = len*2;
word_t xy[len2];
- word_t m2[len2];
word_t *xy_lo = xy;
word_t *xy_hi = xy+len;
- fz_copy(m,m2,len);
- fz_clear(m2+len,len);
fz_mul_egypt(a,b,xy_lo,xy_hi,len);
- fz_mod(xy,m2,xy,len2);
- fz_copy(xy_lo,o,len);
+ fz_mod(xy,m,o,len2,len);
}
void fz_mod_exp(word_t *_b,word_t *_e,word_t *m,word_t *o,size_t len){