// Optimization of initial probability distribution for squarefree Erdos-Selfridge problem. // See https://arxiv.org/abs/1901.11465 for more details. // Uses the Gurobi linear programming optimization package. // // We consider a box [Z/3]x[Z/5]x[Z/7]x[Z/11] of size 3x5x7x11, partially covered by // non-trivial hyperplanes. Each hyperplane is of the form // [a_0,a_1,a_2,a_3]={(x_0,x_1,x_2,x_3):x_i=a_i mod p_i or x_i unrestricted if a_i=*}. // Codimension 1 hyperplanes are assumed to exist and have x_i=0 as their only non-trivial // constraint. Here p_0=3, p_1=5, p_2=7, p_3=11, although the program does not assume these to be prime. // // The program loops through possibilities for the other hyperplanes, then optimizes the probability // distribution on the uncovered set so that the quantity c_a(3)-S*c_a(1) is minimized, where S=0.75 // and c_a(k) = \sum_s c(s)k^{|s|}, where c(s)= maximum weight (probability) of a hyperplane with // s as its set of fixed coordinates. This sum is over all 16 subsets of the primes, in particular // c(\emptyset)=1. // // Points in box are represented by integers mod M=3.5.7.11, but *not* via the CRT: // in order to make the code applicable to non-prime dimensions, the point (x_0,x_1,x_2,x_3) // is represented by x_0+x_1p_0+x2p_0p_1+x_3p_0p_1p_2=\sum x_iq_i, where q_i=\prod_{j<i}p_j. // In the program, points of the box are denoted by variables x,y which are integers 0,...,M-1. // Also p_i=p[i] and q_i=q[i], and coordinates are represented by variables i,j. // // A subset s of primes is represented by the integer \sum_{p_i\in s} 2^i. // Variables s,t are used to denote subsets of primes and are integers 0,...,15 // // As codimension 1 hyperplanes are always taken to be x_i=0, the other hyperplanes are represented // by points (x_0,x_1,x_2,x_3) where x_i=0 indicates *, i.e., no restriction in that coordinate. // The hyperplane corresponding to the fixed set of directions s is a[s]. // Another variable used for an arbitrary hyperplane is h. // // The program loops through choices of each hyperplane corresponding to a choice of set s of fixed // coordinates with |s|>=2. The following optimizations are implemented. // 1. No hyperplane is a subset of a previous hyperplane. // 2. A hyperplane [a_0,a_1,a_2,a_3] should not have an a_i more than 1 larger than the a_i's of all // previous hyperplanes. Permuting x_i=a_i and x_i=a_i-1 in all points of the box shows that this // reduces to an earlier example. // 3. The probability assigned to [x_0,x_1,x_2,x_3] can be assumed by symmetry to be the same as // that with x_i replaced by x'_i, if both x_i and x'_i > a_i for all hyperplanes [a_0,a_1,a_2,a_3]. // This reduces the number of variables in the LP problem. // // The function c_a(3)-S*c_a(1) is calculated from the distribution on the uncovered set returned by Gurobi, // not from the corresponding hyperplane variables. The distribution is normalized to a probability // distribution and checked for non-negativity. Thus any errors/inaccuracies in Gurobi will not affect the // mathematical rigor of the output, although it might make the results suboptimal. #include <stdlib.h> #include <stdio.h> #include <time.h> #include <gurobi_c.h> #define N 4 // number of small primes used for initial optimization of measure #define M (3*5*7*11) // product of small primes #define S 0.75 // constant in function c_a(3)-S*c_a(1) to be minimized #define LM 9.018 // threshold on c_a(3)-S*c_a(1) for reporting configurations #define OPT 4 // omit up to the last OPT hyperplanes to reduce number of configurations #define DISP 4 // display details for configs exceeding threshold with <=DISP missing hyperplanes int p[N]={3,5,7,11}; // list of small primes/dimensions of box int q[N+1]; // products of first i small primes (q[0]=1, q[1]=3, q[2]=15, ..., q[N]=M) int a[1<<N]; // hyperplanes a[s]=[a_0,a_1,a_2,a_3] for fixed coordinates s (see above for encoding) int r[M]; // uncovered points x have r[x]=1 int fst[1<<N]; // fst[s] = first hyperplane with fixed set s (start of linked list) int nxt[M]; // nxt[a] = next hyperplane with same fixed set as a, =M when no more hyperplanes exist time_t start,end; // timing variables to record execution time int var[M+(1<<N)]; // variable list for linear constraints double val[M+(1<<N)]; // corresponding coefficients values / corresponding values of solution int vmap[M]; // variable map, if x is in point in box, vmap[x] is the corresponding variable int vcnt; // count of the number of variables used in LP int lpc[OPT+1]; // lpc[i]=count of lp instances with i missing hyperplanes int lpp[OPT+1]; // lpp[i]=count of lp instances with i missing hyperplanes exceeding threshold bound GRBenv *env=NULL; // Gurobi environment pointer GRBmodel *model=NULL; // Gurobi model pointer // Gurobi error reporting / exit program gracefully // e=1:report error, e=0:exit without error // void err(int e){ if(e)printf("Error: %s\n",GRBgeterrormsg(env)); GRBfreemodel(model); GRBfreeenv(env); //system("pause"); // stop command window just disappearing before one can see the results exit(0); } // Calculate 3^{|s|} // Recall that s is considered as a set = \sum_{i\in s} 2^i // int p3(int s){ int c; for(c=1;s!=0;s&=s-1)c*=3; // s&=s-1 removes smallest element (power of 2) from s return c; } // Print diagnostics // a[s] are the removed hyperplanes with fixed coordinates s, s=0,...,t-1 // Each a[s]=[a_0,a_2,a_2,a_3]=a_0+a_1p_0+a_2p_0p_1+a_3p_0p_1p_2 // val[s] is the maximum weight of any hyperplane with fixed coordinates s // lp is the bound given by solvelp(), should be the same as calculated here // void display(int t,double lp){ double u,v,pr; int i,s,c,x; printf("Hyperplanes\n"); for(s=1;s<t;s++){ for(i=0;i<N;i++){ if((s&(1<<i))==0)printf("*"); else printf("%d",(a[s]/q[i])%p[i]); } printf(" "); } c=0; for(x=0;x<M;x++)c+=r[x]; printf("\n%d uncovered points\n",c); printf("Max hyperplane weights by set of fixed coordinates\n"); u=v=0; for(s=0;s<(1<<N);s++){ for(i=0;i<N;i++)printf((s&(1<<i))!=0?"F":"*"); printf(" : %f\n",val[s]); u+=p3(s)*val[s]; v+=val[s]; } printf("c_5(3)-%fc_5(1)=%f-%f*%f=%f\n",S,u,S,v,u-S*v); pr=0; for(s=t;s<(1<<N);s++)pr+=val[s]; printf("Max weight of the %d omitted hyperplanes=%f\n",(1<<N)-t,pr); v=(u-S*v-(1-S)*pr)/(1-pr); printf("Adjusted bound=%f\n",v); // The following should not happen! if(v-lp>1e-10||lp-v>1e-10){printf("Inconsistency detected in program\n");err(0);} printf("\n"); } // Setup various arrays // void setup(){ int i,s,h; // Setup q[i]=\prod_{j<i} p_i and check q[N]=M q[0]=1; for(i=1;i<=N;i++)q[i]=p[i-1]*q[i-1]; if(M!=q[N]){printf("Error: M should be %d\n",q[N]);err(0);} // Clear reporting variables for(i=0;i<=OPT;i++)lpc[i]=lpp[i]=0; // Insert hyperplanes in linked lists according to their fixed sets. // Hyperplanes h are processed in reverse order so that the lists occur in regular increasing order. // (Not essential, but makes the output of program occur in sensible order.) for(s=0;s<(1<<N);s++)fst[s]=M; // initialize linked lists to empty (M denotes end of list) for(h=M-1;h>=0;h--){ s=0; // s will contain fixed coordinates for(i=0;i<N;i++){ if((h/q[i])%p[i]!=0)s+=1<<i; // if h_i>0 then i is a fixed coordinate } // Add the hyperplane h to the begining of the list of hyperplanes with set of fixed coordinates = s nxt[h]=fst[s]; fst[s]=h; } } // Setup and solve linear programming model // mx is the point whose i-coordinate is the maximum of all a_i's occuring as a fix coordinate in a hyperplane. // d is the number of hyperplanes we omit. // double solvelp(int d,int mx){ int i,k,s,c,x,y,h; double u,v; // We first assign variables v_0,...,v_{2^N-1} , where v_s is an upper bound on the total weight // on a hyperplane with fixed set of coordinates s. vcnt=1<<N; // Now we assign variables to the uncovered points of the box. These will be equal to the weight/probability of this point. // Due to symmetry consitions, two points that differ only in i-coordinate have the same weight // if the i-coordinates are both larger than any constraint of the form x_i=a_i definining any of the hyperplanes. // The variable assigned to point i will be vmap[i] if this is non-zero. for(x=0;x<M;x++){ if(r[x]==0)vmap[x]=0; // covered points do not need variables as probability is fixed to be 0 else{ y=x; // symmetry reduction, find earlier point equivalent to this one to save on variables for(i=0;i<N;i++){ k=(x/q[i])%p[i]-(mx/q[i])%p[i]-1; // if k>0 then coordinate x_i at least 2 more than any fixed value occuring in any hyperplane if(k>0)y-=k*q[i]; // point is therefore equivalent to one with smaller x_i } if(y<x)vmap[x]=vmap[y]; // if equivalent to previous point, use that variable else vmap[x]=vcnt++; // otherwise allocate a new variable } } // Now constuct the objective function to minimize: c_5(3)-S*c_5(1), c_5(k)=\sum_s k^{|s|} v_s, v_s=c(s) for(s=0;s<(1<<N);s++)val[s]=p3(s)-S; for(;s<vcnt;s++)val[s]=0; if(GRBnewmodel(env,&model,"cover",vcnt,val,NULL,NULL,NULL,NULL))err(1); // setup Gurobi model called "cover" // Now add constraints. For each hyperplane with fixed coordinates s, total weight is at most v_s for(s=0;s<(1<<N);s++){ for(h=fst[s];h<M;h=nxt[h]){ // loop through all hyperplanes with fixed coordinates = s for(k=0;k<vcnt;k++)val[k]=0; // initialize all coefficients to 0 val[s]=-1; // max hyperplane weight variable has coeff = -1 // The following needs some explanation. To quickly list the points in hyperplane h // (with set of zero-coordinates = s) we want to loop through values whose set of zero-coordinates // is the complement of s and add them to h (any other point in h would have a zero coordinate and so be // covered by a codimension 1 plane). Thus we can use the fst[]-nxt[] list starting with the complement // of s, which is just (1<<N)-1-s. for(c=0,k=fst[(1<<N)-1-s];k<M;k=nxt[k])if(r[h+k]){val[vmap[h+k]]+=1;c=1;} // uncovered points in hyperplane, coeff = 1 if(c>0){ // if c=0 then the constraint is vacuous since there are no uncovered points in the hyperplane // The next line "compresses" the constraint by omitting all variables with coefficient 0 for(c=k=0;k<vcnt;k++)if(val[k]!=0){val[c]=val[k];var[c]=k;c++;} if(GRBaddconstr(model,c,var,val,s?GRB_LESS_EQUAL:GRB_EQUAL,0.,NULL))err(1); // add constraint <=0 (s>0) or =0 (s=0) } } } // Add constraint that v_0=1. The s=0 case above then forces a probability distribution. var[0]=0;val[0]=1; if(GRBaddconstr(model,1,var,val,GRB_EQUAL,1.,NULL))err(1); // Now call Gurobi to perform optimization if(GRBoptimize(model))err(1); if(GRBgetintattr(model,"Status",&s))err(1); if(s!=GRB_OPTIMAL){printf("Not optimal\n");err(0);} // something went wrong // Now read solution into val[] if(GRBgetdblattrarray(model,"X",0,vcnt,val))err(1); if(GRBfreemodel(model))err(1); // don't need model anymore // We will now check that the solution is valid by calculating max weight of hyperplanes for(k=1<<N;k<vcnt;k++){ if(val[k]<0){printf("Error: negative weight found\n");err(0);} // this really should not happen } for(s=0;s<(1<<N);s++){ val[s]=0; // set max weight to 0 for(h=fst[s];h<M;h=nxt[h]){ // loop through hyperplanes with fixed set s v=0; // See above explanation on how to quickly loop through points in a hyperplane for(k=fst[(1<<N)-1-s];k<M;k=nxt[k])if(r[h+k])v+=val[vmap[h+k]]; // uncovered points in hyperplace if(v>val[s])val[s]=v; // update max weight if weight of current hyperplane larger } } // Check if total weight differs significantly from 1 if(val[0]-1>1e-8||1-val[0]>1e-8){printf("Error: Large error in lp\n");err(0);} // Normalize anyway - we now have a probability distribution accurate to almost full double precision accuracy for(i=1;i<(1<<N);i++)val[i]/=val[0]; val[0]=1; // Calculate objective function v=0; for(s=0;s<(1<<N);s++)v+=(p3(s)-S)*val[s]; // Now bound how much weight could lie in last d hyperplanes u=0; for(;d;d--)u+=val[(1<<N)-d]; // // The following bounds the objective function if we zero out the remaining d hyperplanes and renormalize // The objective function v will reduce by at least (1-S)*p as (1-S) is the coefficient of v_0=total mass // Normalizing then increases by factor at most 1/(1-p). // Now note that (v-(1-S)*p)/(1-p) <= (v-(1-S)*u)/(1-u) when p<=u. return (v-(1-S)*u)/(1-u); } // Check if point x is in hyperplane a[s] // Can also check if hyperplane h=x is a subset of a[s] // int inplane(int x,int s){ int i; for(i=0;i<N;i++){ if((s&(1<<i))!=0 && (x/q[i])%p[i]!=(a[s]/q[i])%p[i])return 0; } return 1; } // Set up array r[] of uncovered points // r[x]=1 if x is not covered by any of the hyperplanes a[1],...,a[t-1]. // void remain(int t){ int x,s; for(x=0;x<M;x++){ r[x]=1; for(s=1;s<t;s++)if(inplane(x,s))r[x]=0; } } // Recursively generate hyperplanes and then run LP to find // optimal probability distribution // s=current fixed set of coordinates, mx=maximum coordinates used. // void recurse(int s,int mx){ double lc; int nmx,i,t; if(s>=(1<<N)-4){ remain(s); lc=solvelp((1<<N)-s,mx); lpc[(1<<N)-s]++; if(lc<LM)return; lpp[(1<<N)-s]++; if(s>=(1<<N)-DISP)display(s,lc); if(s==(1<<N))return; } while((s&(s-1))==0){a[s]=0;s++;} // skip codim 1 planes for(a[s]=fst[s];a[s]<M;a[s]=nxt[a[s]]){ for(t=s&(s-1);t;t=(t-1)&s)if(inplane(a[s],t))break; if(t)continue; // hyperplane subset of a previous one nmx=mx; for(i=0;i<N;i++){ if((s&(1<<i))!=0){ if((a[s]/q[i])%p[i]==0)break; // 0 fixed coordinate - this should not happen if((a[s]/q[i])%p[i]>1+(mx/q[i])%p[i])break; // coord > 1 more than max previous if((a[s]/q[i])%p[i]>(mx/q[i])%p[i])nmx+=q[i]; // increase max } } if(i==N)recurse(s+1,nmx); // a[s] is valid, now recursively generate next one } } main(){ int i; time(&start); // start timer if(GRBloadenv(&env,NULL))err(1); // load gurobi environment if(GRBsetintparam(env,"OutputFlag",0))err(1); // do not print Gurobi diagnostics setup(); printf("Finding configurations with c_a(3)-%fc_a(1)>%f\n\n",S,LM); recurse(1,0); for(i=4;i>=0;i--){ printf("%d / %d configurations with %d missing hyperplanes exceeded threshold\n",lpp[i],lpc[i],i); } time(&end); printf("Time taken %d seconds\n",(int)(end-start)); // print elapsed time err(0); // exit gracefully }