/* jiw - 13 Aug 2006 Re: Counting the number of solutions of a^n + b^n + c^n + d^n + e^n = f^n with n=5, for example: 19^5+ 43^5+ 46^5+ 47^5+ 67^5 = 72^5 0^5+ 27^5+ 84^5+ 110^5+ 133^5 = 144^5 145^5+ 565^5+ 1105^5+ 1462^5+ 1990^5 = 2087^5 519^5+ 642^5+ 1026^5+ 1480^5+ 1990^5 = 2087^5 Compile via: gcc 515count.c -O6 -o 515count Copyright 2006 James Waldby. Offered without warranty under GPL terms as at http://www.gnu.org/licenses/gpl.html v6 - Uses heapsum techniques w/ 90-bit arithmetic. Also locates solutions with a=0, which v5 and earlier don't. The "equalities" number in v6c output counts equivalent and non-primitive solutions as well as valid solutions. For example, 19^5+43^5+46^5+47^5+67^5 = 72^5 may also be found as 43^5+19^5+46^5+47^5+67^5 = 72^5. "Non-primitive" means that gcd(a,b,c,d,e) > 1. v6c runs for a long time after finding its last solution, so termination checking may be flawed. Also, it makes over 50% more sumUp calls than sumDown calls, so hup3 setup probably can be improved. v5 - uses 90-bit arithmetic, ie, 3 32-bit words each with two bits spare for overflow, with O(n^5) nested loops. v5a with 90-bit arithmetic and unrolled inner loops runs about 2.3 times as long as v4. v5b (inner loop not unrolled) runs about 40% slower than that. v4 - Unrolled the inner loop to various depths, such as 3, 5, 8, 10; and settled on 10. ==================================================================== Selected timings with FMAX = 103, 223, 337, 1013, and 3019: At FMAX = 103: v2- There are 2 actual solutions in 101340876 tries, a 0.0% ratio. v3- There are 2 actual solutions in 78181680 tries, a 0.0% ratio. v5- There are 2 actual solutions in 94496709 tries, a 0.0% ratio. v6c-There are 2 actual solutions in 35 equalities, and 1 non-prim sols v2- user 0m16.354s v3- user 0m12.594s v4- user 0m7.971s for UNROLL 10 v5- user 0m18.346s for UNROLL 10 with 90-bit arithmetic v5b-user 0m23.937s for no UNROLL & with gcd check v6c-user 0m0.181s At FMAX = 223: v2- There are 7 actual solutions in 4699109520 tries, a 0.0% ratio. v3- There are 7 actual solutions in 3647119930 tries, a 0.0% ratio. v5- There are 7 actual solutions in 4408856597 tries, a 0.0% ratio. v5b-There are 3 actual solutions in 4408856597 tries, and 4 non-prim sols v6c-There are 4 actual solutions in 144 equalities, and 5 non-prim sols v2- user 12m52.556s v3- user 9m47.793s v4- user 5m45.290s for UNROLL 10. v5- user 13m43.134s v5b-user 18m7.575s v6c-user 0m2.315s At FMAX = 337: v2- There are 10 actual solutions in 36760655568 tries, a 0.0% ratio. v3- There are 10 actual solutions in 28298201848 tries, a 0.0% ratio. v5- There are 10 actual solutions in 34549290325 tries, a 0.0% ratio. v5b-There are 3 actual solutions in 34549290325 tries, and 7 non-prim sols v6c-There are 4 actual solutions in 217 equalities, and 9 non-prim sols v2- user 122m34.495s v3- user 93m10.782s v5- user 106m12.095s v5b-user 140m32.684s v6c-user 0m9.017s At FMAX = 1013: v6c-There are 30 actual solutions in 1303 equalities, and 43 non-prim sols v6c-user 5m12.742s At FMAX = 3019: v6c-There are 108 actual solutions in 5859 equalities, and 217 non-prim sols v6c-user 193m41.380s (At this rate of increase, FMAX=10007 would take about 5.4 days.) ==================================================================== Copyright 2006 James Waldby. Offered without warranty under GPL terms as at http://www.gnu.org/licenses/gpl.html FMAX is the upper bound for numbers we look at. */ #define FMAX 337 #define z fprintf(stderr,"%s@%3d \n",__FILE__,__LINE__); #include #include #include #include #define HITE 5 #define BASE 0x40000000 #define BASEMASK 0x3FFFFFFF #define BASESHIFT 30 typedef unsigned short int USI; typedef unsigned long long int ULLI; typedef ULLI* ULLIP; typedef unsigned int UI; typedef UI* UIP; typedef int VALU[3]; typedef VALU* VALUP; typedef VALUP* VALUPP; typedef int* IP; typedef int** IPP; typedef struct BENTRY { VALU bval; // Numeric value of b item USI key1, key2; // Reconstruction keys for b entry } BENTRY; typedef BENTRY* BEP; typedef BEP* BEPP; typedef struct HITEM { VALU aval; // Numeric value of a item VALU sum; // Numeric value of an a+b sum int key; // Addend key int mark; // Addend marker } HITEM; typedef HITEM* HIP; typedef HIP* HIPP; //================================================ ULLI nactsol, nonprim, nup, ndown, nequal; int keySum, keyDif, markSum, markDif; VALU minDif, minSum, BigNum={ BASEMASK, BASEMASK, BASEMASK }; VALU at,bt,ct,dt,et; VALUP powval; int a, b, c, d, e; //================================================ inline void addVal(VALU x, VALU y, VALU s) { int t = x[0] + y[0]; s[0] = t & BASEMASK; t = x[1] + y[1] + (t >> BASESHIFT); s[1] = t & BASEMASK; s[2] = x[2] + y[2] + (t >> BASESHIFT); } //================================================ inline void subVal(VALU s, VALU v, VALU t) { t[0] = s[0] - v[0]; // Let t = s - v t[1] = s[1] - v[1]; if (t[0] < 0) { --t[1]; // Borrow from s[1]? t[0] &= BASEMASK; } t[2] = s[2] - v[2]; if (t[1] < 0) { --t[2]; // Borrow from s[2]? t[1] &= BASEMASK; } } //================================================ inline int equalVal(VALU x, VALU y) { if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 1; return 0; } //================================================ inline int greaterVal(VALU x, VALU y) { if (x[2] > y[2]) return 1; if (x[2] == y[2]) { if (x[1] > y[1]) return 1; if (x[1] == y[1]) { if (x[0] > y[0]) return 1; } } return 0; } //================================================ inline void copyVal(VALU x, VALU y) { x[0]=y[0]; x[1]=y[1]; x[2]=y[2]; } //================================================ inline void swapI(IP x, IP y) { int t=*x; *x=*y; *y=t; } //================================================ inline void swapV(VALU x, VALU y) { swapI(x+0, y+0); swapI(x+1, y+1); swapI(x+2, y+2); } //================================================ inline void swapH(HIP x, HIP y) { swapI (&(x->key), &(y->key)); swapI (&(x->mark), &(y->mark)); swapV (x->aval, y->aval); swapV (x->sum, y->sum); } //================================================ // Compute power of x. We compute values using arithmetic // with long long int cells, then repack into 3 chunks. inline void powerVal(int x, VALU v) { ULLI phi, plo = (ULLI)x*x; int i; phi = plo >> BASESHIFT; plo = plo & BASEMASK; for (i=2; i> BASESHIFT); plo = plo & BASEMASK; } v[0] = (int)plo; v[1] = phi & BASEMASK; v[2] = phi >> BASESHIFT; } //================================================ // gcd(a,b) by Euclid's algorithm inline int gcd(int a, int b) { int r; if (!a || !b) return 0; r = a%b; while (r) { a=b; b=r; r=a%b; } return b; } //================================================ // The sumUp and sumDown routines are variations on the heapsum theme. // During main processing, calls to sumUp and sumDown are interleaved, // so they use separate sets of return variables. Note: mark values // in v6b point to b entries which contain key pairs as well as b // values. // -- sumUp first is used O(n^2) times to fill array twopow. Its // arithmetic is straightforward sums. Then it's used O(n^3) times to // compute a^h+b^h+c^h in ascending order. These calls to sumUp are // interleaved with calls to sumDown to maintain a^h+b^h+c^h // +d^h+e^h-f^h non-negative but close to zero. // -- sumDown is used to compute f^h-d^h-e^h in ascending order. The // marks for sumDown start out as follows: mark[i] points at entry k // in twopow with powval[i] > twopow[k] but not > twopow[k+1]. In // popArray3, we heapify hdown3 after setting all marks and // differences in it. This takes O(n) time so is faster as well as // simpler than sorting it. Heapify was not required in 313count // because h2 started out in ascending order, whereas hup3 starts out // in no particular order. Note: 515count's sumDown is different from // 313count's; we move markers backward in the b array, rather than // forward, as we compute a-b rather than b-a. //================================================ // Re-heapify: Exchange with smaller son if we exceed a son inline void heapify (HIP a, int na, int i) { int s; VALU Ls, Rs; do { s = 2*i+1; if (s >= na) return; // Does h[i] have a son? copyVal(Ls, a[s].sum); copyVal(Rs, a[s+1].sum); if (s > na-2) copyVal(Rs, BigNum); if (greaterVal(Ls, Rs)) ++s; // To swap with smaller son if (greaterVal(a[i].sum, Ls) || greaterVal(a[i].sum, Rs)) { swapH(a+i, a+s); i = s; } else i = 0; } while (i); } //================================================ // Deliver next-larger sum for heapsum of a and b. // Return minSum function value and keySum global. inline void sumUp (HIP a, int na, BEP b, int nb) { copyVal(minSum, a[0].sum); // set minSum keySum = a[0].key; // set keySum markSum = a[0].mark; // set markSum ++a[0].mark; // Move 0's marker if (a[0].mark < nb) { // Compute new sum addVal(a[0].aval, b[a[0].mark].bval, a[0].sum); } else copyVal(a[0].sum, BigNum); heapify (a, na, 0); // Re-heapify } //================================================ // Deliver next-larger sum for heapsum of a and -b. // Return minDif function value and keyDif global. inline void sumDown (HIP a, int na, BEP b, int nb) { copyVal(minDif, a[0].sum); // set minDif keyDif = a[0].key; // set keyDif markDif = a[0].mark; // set markDif --a[0].mark; // Move 0's marker if (a[0].mark > 0) { // Compute new dif subVal(a[0].aval, b[a[0].mark].bval, a[0].sum); } else copyVal(a[0].sum, BigNum); heapify (a, na, 0); // Re-heapify } //================================================ void allocArr1 (int n) { int b=n*sizeof(VALU); powval = malloc (b); if (!powval) { printf ("Error 1: a memory allocation failed\n"); exit(1); } } //================================================ void popArray1(int n) { int i; // Populate power array for (i=0; i 0 powerVal(i, powval[i]); // Save i^5 at offset i in powval } } //================================================ void allocArr2(HIPP hup2, BEPP twopow, int n, int ntp) { *hup2 = malloc (n*sizeof(HITEM)); *twopow = malloc (ntp*sizeof(BENTRY)); if (!*hup2 || !*twopow) { printf ("Error 2: a memory allocation failed\n"); exit(2); } } //================================================ // See comments above re sumUp routine, for background // info about initial values in hup2 and twopow. int popArray2(HIP hup2, BEP twopow, int n, int ntp) { int i; BENTRY bpow[FMAX]; memset (bpow, 0, sizeof(bpow)); // zero the unused bpow keys // Populate bpow for use with sumup for (i=0; i -1; --i) { heapify (hdown3, na, i); } } //================================================ inline void checkSol (BEP tp) { int a=keySum, b=tp[markSum].key1, c=tp[markSum].key2; int f=keyDif, d=tp[markDif].key1, e=tp[markDif].key2; int gab, gde, gov; ++nequal; if (a>b || b>c || c>d || d>e) return; gab = gcd(a,b); gde = gcd(d,e); gov = gab? gcd(gab, gcd(c, gde)) : gcd(b, gcd(c, gde)); if (gov != 1) { ++nonprim; return; } ++nactsol; printf ("%6d^5+ %6d^5+ %6d^5+ %6d^5+ %6d^5 - %6d^5\n", a,b,c,d,e,f); fflush(stdout); } //================================================ int main(int argc, char *argv[]) { int n=FMAX, ntwopow=(n+1)*(n+2)/2; HIP hup2, hup3, hdown3; BEP twopow; // Array of sums of two powers nactsol = nonprim = nup = ndown = nequal = 0; // Allocate & Populate heap, power, and root arrays allocArr1(n); // Create power and root arrays popArray1(n); allocArr2(&hup2, &twopow, n, ntwopow); // Create hup2 and twopow arrays ntwopow = popArray2( hup2, twopow, n, ntwopow); if(0)printf ("ntwopow revised from %d to %d\n", (n+1)*(n+2)/2, ntwopow); allocArr3(&hup3, &hdown3, n); // Create hup3 and hdown3 arrays popArray3(twopow, ntwopow, hup3, hdown3, n); // Loop exhaustively sumDown(hdown3, n, twopow, ntwopow); ++ndown; do { sumUp(hup3, n, twopow, ntwopow); ++nup; while (greaterVal(minSum, minDif)) { sumDown(hdown3, n, twopow, ntwopow); ++ndown; } if (equalVal(minSum, minDif)) checkSol(twopow); } while (greaterVal(BigNum, minSum) && greaterVal(BigNum, minDif)); printf ("There are %Ld actual solutions in %Ld equalities, and %Ld non-prim sols\n", nactsol, nequal, nonprim); printf ("%Ld sumUp calls and %Ld sumDown calls\n", nup, ndown); }