summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjonsykkel <jonrevold@gmail.com>2021-09-27 06:51:10 +0200
committerjonsykkel <jonrevold@gmail.com>2021-09-27 06:51:10 +0200
commita1412938d3f79679cb2065cae48c0b5f8e5b7eb1 (patch)
treeaef1b25e1dfcca2f6563b70b401b32d4d536fe7b
parenta4be952f2de6b4c261b696b9034647b149645836 (diff)
downloadokeffa-a1412938d3f79679cb2065cae48c0b5f8e5b7eb1.tar.gz
cocomba
-rw-r--r--calc/main.c2
-rw-r--r--inc/okeffa/fz_arith.h1
-rw-r--r--inc/okeffa/w_mul.h10
-rw-r--r--inc/okeffa/word.h13
-rw-r--r--makefile8
-rw-r--r--src/fz_mod.c2
-rw-r--r--src/fz_mul.c69
7 files changed, 83 insertions, 22 deletions
diff --git a/calc/main.c b/calc/main.c
index b38b09a..07c9b88 100644
--- a/calc/main.c
+++ b/calc/main.c
@@ -207,7 +207,7 @@ static void op_normal(char c){
case '*': //multiply
want(2);
- fz_mul_egypt(STACK(sp-1),STACK(sp),STACK(sp-1),STACK(sp),fl);
+ fz_mul_comba(STACK(sp-1),STACK(sp),STACK(sp-1),STACK(sp),fl);
break;
case '\\': //divide, keep quotient and remainder
diff --git a/inc/okeffa/fz_arith.h b/inc/okeffa/fz_arith.h
index 9845a41..2e75f7b 100644
--- a/inc/okeffa/fz_arith.h
+++ b/inc/okeffa/fz_arith.h
@@ -8,6 +8,7 @@ wbool_t fz_add_gated (word_t *a,word_t *b,word_t *o,size_t len,wboo
wbool_t fz_sub (word_t *a,word_t *b,word_t *o,size_t len); //returns borrow
void fz_mul_egypt (word_t *a,word_t *b,word_t *o_lo,word_t *o_hi,size_t len);
+void fz_mul_comba (word_t *a,word_t *b,word_t *o_lo,word_t *o_hi,size_t len);
void fz_idiv (word_t *a,word_t *b,word_t *q,word_t *r,size_t len);
void fz_div (word_t *a,word_t *b,word_t *q,size_t len);
diff --git a/inc/okeffa/w_mul.h b/inc/okeffa/w_mul.h
index b5dfe4f..5f1de4a 100644
--- a/inc/okeffa/w_mul.h
+++ b/inc/okeffa/w_mul.h
@@ -52,6 +52,11 @@ inline hword_t w_hi(word_t a){
}
inline void w_mul(word_t a,word_t b,word_t *o_lo,word_t *o_hi){
+#ifdef W_MUL_IRON
+ dword_t o = (dword_t)a*(dword_t)b;
+ *o_lo = o;
+ *o_hi = o>>WORD_BITNESS;
+#else
hword_t al = w_lo(a);
hword_t ah = w_hi(a);
hword_t bl = w_lo(b);
@@ -60,9 +65,10 @@ inline void w_mul(word_t a,word_t b,word_t *o_lo,word_t *o_hi){
word_t lh = hw_mul(al,bh);
word_t hl = hw_mul(ah,bl);
word_t hh = hw_mul(ah,bh);
- word_t cl = w_hi(w_hi(ll)+w_lo(lh)+w_lo(hl));
+ word_t cl = w_hi((word_t)w_hi(ll)+(word_t)w_lo(lh)+(word_t)w_lo(hl));
*o_lo = ll+((lh+hl)<<HWORD_BITNESS);
- *o_hi = hh+w_hi(hl)+w_hi(lh)+cl;
+ *o_hi = hh+(word_t)w_hi(hl)+(word_t)w_hi(lh)+cl;
+#endif
}
#endif
diff --git a/inc/okeffa/word.h b/inc/okeffa/word.h
index 1083061..18b4036 100644
--- a/inc/okeffa/word.h
+++ b/inc/okeffa/word.h
@@ -13,15 +13,22 @@
#define HWORD_BITNESSLOG2 (WORD_BITNESSLOG2/2)
#define HWORD_BYTENESS (WORD_BYTENESS/2)
+#define DWORD_BITNESS (WORD_BITNESS*2)
+#define DWORD_BITNESSLOG2 (WORD_BITNESSLOG2*2)
+#define DWORD_BYTENESS (WORD_BYTENESS*2)
+
#if WORD_BITNESS == 16
-typedef uint16_t word_t;
typedef uint8_t hword_t;
+typedef uint16_t word_t;
+typedef uint32_t dword_t;
#elif WORD_BITNESS == 32
-typedef uint32_t word_t;
typedef uint16_t hword_t;
+typedef uint32_t word_t;
+typedef uint64_t dword_t;
#elif WORD_BITNESS == 64
-typedef uint64_t word_t;
typedef uint32_t hword_t;
+typedef uint64_t word_t;
+typedef __uint128_t dword_t;
#else
#error cant into this bitness
#endif
diff --git a/makefile b/makefile
index 0a5fbbd..8db4579 100644
--- a/makefile
+++ b/makefile
@@ -1,7 +1,7 @@
#version static 5
out_name := ffa_calc
-build := static64
+build := release
inc_dir := inc
tmp_dir := tmp
bin_dir := bin
@@ -47,7 +47,7 @@ else ifeq ($(build),debug)
out := $(bin)
else ifeq ($(build),release)
src_dir += calc
- CFLAGS += -O3
+ CFLAGS += -O3 -DNDEBUG
link = $(dlink)
out := $(out_name)
else ifeq ($(build),analyze)
@@ -65,8 +65,8 @@ out := $(bin_dir)/$(out)
ifneq ($(run),)
run_cmd := @echo "run $(out)" 1>&2
- run_cmd += && ./$(out) 256 16
- #run_cmd += && time $(out) 2048 16 < prog/ch7
+ #run_cmd += && ./$(out) 256 16
+ run_cmd += && time $(out) 2048 16 < prog/ch7
#run_cmd += && prog/ch8.sh $(out)
endif
diff --git a/src/fz_mod.c b/src/fz_mod.c
index 683787f..b33e1ce 100644
--- a/src/fz_mod.c
+++ b/src/fz_mod.c
@@ -9,7 +9,7 @@ void fz_mod_mul(word_t *a,word_t *b,word_t *m,word_t *o,size_t len){
word_t *xy_lo = xy;
word_t *xy_hi = xy+len;
- fz_mul_egypt(a,b,xy_lo,xy_hi,len);
+ fz_mul_comba(a,b,xy_lo,xy_hi,len);
fz_mod(xy,m,o,len2,len);
}
diff --git a/src/fz_mul.c b/src/fz_mul.c
index 9c53308..84f9e71 100644
--- a/src/fz_mul.c
+++ b/src/fz_mul.c
@@ -4,27 +4,74 @@
#include <okeffa/fz_bitop.h>
#include <okeffa/fz_pred.h>
#include <okeffa/w_op.h>
+#include <okeffa/w_mul.h>
void fz_mul_egypt(word_t *a,word_t *b,word_t *o_lo,word_t *o_hi,size_t len){
size_t len2 = len*2;
- word_t xy[len2];
- word_t xs[len2];
+ word_t ab[len2];
+ word_t as[len2];
- fz_clear(xy,len2);
- fz_copy(a,xs,len);
- fz_clear(xs+len,len);
+ fz_clear(ab,len2);
+ fz_copy(a,as,len);
+ fz_clear(as+len,len);
for(size_t y = 0;y < len;y++){
word_t w = b[y];
size_t cut = len+1;
- word_t *xyc = xy+y;
- word_t *xsc = xs+y;
+ word_t *abc = ab+y;
+ word_t *asc = as+y;
for(size_t x = 0;x < WORD_BITNESS;x++){
- fz_add_gated(xyc,xsc,xyc,cut,w_odd(w));
+ fz_add_gated(abc,asc,abc,cut,w_odd(w));
w >>= 1;
- fz_shl(xsc,xsc,cut,1);
+ fz_shl(asc,asc,cut,1);
}
}
- fz_copy(xy, o_lo,len);
- fz_copy(xy+len,o_hi,len);
+ fz_copy(ab, o_lo,len);
+ fz_copy(ab+len,o_hi,len);
+}
+
+static word_t col(word_t *a,word_t *b,word_t *acc,size_t n,size_t u,size_t v){
+ word_t tmp;
+
+ for(size_t x = u;x <= v;x++){
+ word_t lo;
+ word_t hi;
+ word_t sum;
+ wbool_t c;
+
+ w_mul(a[x],b[n-x],&lo,&hi);
+
+ sum = acc[0]+lo;
+ c = w_carry(acc[0],lo,sum);
+ acc[0] = sum;
+
+ sum = acc[1]+hi+c;
+ c = w_carry(acc[1],hi,sum);
+ acc[1] = sum;
+
+ acc[2] += c;
+ }
+
+ tmp = acc[0];
+ acc[0] = acc[1];
+ acc[1] = acc[2];
+ acc[2] = 0;
+ return tmp;
+}
+
+void fz_mul_comba(word_t *a,word_t *b,word_t *o_lo,word_t *o_hi,size_t len){
+ size_t len2 = len*2;
+ word_t acc[3] = {0};
+ word_t ab[len2];
+
+ for(size_t x = 0;x < len;x++){
+ ab[x] = col(a,b,acc,x,0,x);
+ }
+ for(size_t x = len;x < len2-1;x++){
+ ab[x] = col(a,b,acc,x,x-len+1,len-1);
+ }
+ ab[len2-1] = acc[0];
+
+ fz_copy(ab, o_lo,len);
+ fz_copy(ab+len,o_hi,len);
}