// Optimization of initial probability distribution for squarefree Erdos-Selfridge problem.
// See https://arxiv.org/abs/1901.11465 for more details.
// Uses the Gurobi linear programming optimization package.
//
// We consider a box [Z/3]x[Z/5]x[Z/7]x[Z/11] of size 3x5x7x11, partially covered by
// non-trivial hyperplanes. Each hyperplane is of the form
// [a_0,a_1,a_2,a_3]={(x_0,x_1,x_2,x_3):x_i=a_i mod p_i or x_i unrestricted if a_i=*}.
// Codimension 1 hyperplanes are assumed to exist and have x_i=0 as their only non-trivial
// constraint. Here p_0=3, p_1=5, p_2=7, p_3=11, although the program does not assume these to be prime.
//
// The program loops through possibilities for the other hyperplanes, then optimizes the probability
// distribution on the uncovered set so that the quantity c_a(3)-S*c_a(1) is minimized, where S=0.75
// and c_a(k) = \sum_s c(s)k^{|s|}, where c(s)= maximum weight (probability) of a hyperplane with
// s as its set of fixed coordinates. This sum is over all 16 subsets of the primes, in particular
// c(\emptyset)=1.
//
// Points in box are represented by integers mod M=3.5.7.11, but *not* via the CRT:
// in order to make the code applicable to non-prime dimensions, the point (x_0,x_1,x_2,x_3)
// is represented by x_0+x_1p_0+x2p_0p_1+x_3p_0p_1p_2=\sum x_iq_i, where q_i=\prod_{j<i}p_j.
// In the program, points of the box are denoted by variables x,y which are integers 0,...,M-1.
// Also p_i=p[i] and q_i=q[i], and coordinates are represented by variables i,j.
//
// A subset s of primes is represented by the integer \sum_{p_i\in s} 2^i.
// Variables s,t are used to denote subsets of primes and are integers 0,...,15
//
// As codimension 1 hyperplanes are always taken to be x_i=0, the other hyperplanes are represented
// by points (x_0,x_1,x_2,x_3) where x_i=0 indicates *, i.e., no restriction in that coordinate.
// The hyperplane corresponding to the fixed set of directions s is a[s].
// Another variable used for an arbitrary hyperplane is h.
//
// The program loops through choices of each hyperplane corresponding to a choice of set s of fixed
// coordinates with |s|>=2. The following optimizations are implemented.
// 1. No hyperplane is a subset of a previous hyperplane. 
// 2. A hyperplane [a_0,a_1,a_2,a_3] should not have an a_i more than 1 larger than the a_i's of all
//    previous hyperplanes. Permuting x_i=a_i and x_i=a_i-1 in all points of the box shows that this
//    reduces to an earlier example.
// 3. The probability assigned to [x_0,x_1,x_2,x_3] can be assumed by symmetry to be the same as
//    that with x_i replaced by x'_i, if both x_i and x'_i > a_i for all hyperplanes [a_0,a_1,a_2,a_3].
//    This reduces the number of variables in the LP problem.
//
// The function c_a(3)-S*c_a(1) is calculated from the distribution on the uncovered set returned by Gurobi,
// not from the corresponding hyperplane variables. The distribution is normalized to a probability
// distribution and checked for non-negativity. Thus any errors/inaccuracies in Gurobi will not affect the 
// mathematical rigor of the output, although it might make the results suboptimal.

#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <gurobi_c.h>

#define N 4				// number of small primes used for initial optimization of measure
#define M (3*5*7*11)	// product of small primes
#define S 0.75			// constant in function c_a(3)-S*c_a(1) to be minimized
#define LM 9.018		// threshold on c_a(3)-S*c_a(1) for reporting configurations
#define OPT 4			// omit up to the last OPT hyperplanes to reduce number of configurations
#define DISP 4			// display details for configs exceeding threshold with <=DISP missing hyperplanes

int p[N]={3,5,7,11};	// list of small primes/dimensions of box
int q[N+1];				// products of first i small primes (q[0]=1, q[1]=3, q[2]=15, ..., q[N]=M)
int a[1<<N];			// hyperplanes a[s]=[a_0,a_1,a_2,a_3] for fixed coordinates s (see above for encoding)
int r[M];				// uncovered points x have r[x]=1
int fst[1<<N];			// fst[s] = first hyperplane with fixed set s (start of linked list)
int nxt[M];				// nxt[a] = next hyperplane with same fixed set as a, =M when no more hyperplanes exist
time_t start,end;		// timing variables to record execution time
int var[M+(1<<N)];		// variable list for linear constraints
double val[M+(1<<N)];	// corresponding coefficients values / corresponding values of solution
int vmap[M];			// variable map, if x is in point in box, vmap[x] is the corresponding variable
int vcnt;				// count of the number of variables used in LP
int lpc[OPT+1];			// lpc[i]=count of lp instances with i missing hyperplanes 
int lpp[OPT+1];			// lpp[i]=count of lp instances with i missing hyperplanes exceeding threshold bound
GRBenv *env=NULL;		// Gurobi environment pointer
GRBmodel *model=NULL;	// Gurobi model pointer

// Gurobi error reporting / exit program gracefully
// e=1:report error, e=0:exit without error
//
void err(int e){
 if(e)printf("Error: %s\n",GRBgeterrormsg(env));
 GRBfreemodel(model);
 GRBfreeenv(env);
 //system("pause");		// stop command window just disappearing before one can see the results
 exit(0);
}

// Calculate 3^{|s|}
// Recall that s is considered as a set = \sum_{i\in s} 2^i
//
int p3(int s){
 int c;
 for(c=1;s!=0;s&=s-1)c*=3;	// s&=s-1 removes smallest element (power of 2) from s
 return c;
}

// Print diagnostics
// a[s] are the removed hyperplanes with fixed coordinates s, s=0,...,t-1 
// Each a[s]=[a_0,a_2,a_2,a_3]=a_0+a_1p_0+a_2p_0p_1+a_3p_0p_1p_2
// val[s] is the maximum weight of any hyperplane with fixed coordinates s
// lp is the bound given by solvelp(), should be the same as calculated here
//
void display(int t,double lp){
 double u,v,pr;
 int i,s,c,x;
 printf("Hyperplanes\n");
 for(s=1;s<t;s++){
  for(i=0;i<N;i++){
   if((s&(1<<i))==0)printf("*");
   else printf("%d",(a[s]/q[i])%p[i]);
  }
  printf(" ");
 }
 c=0;
 for(x=0;x<M;x++)c+=r[x];
 printf("\n%d uncovered points\n",c);
 printf("Max hyperplane weights by set of fixed coordinates\n");
 u=v=0;
 for(s=0;s<(1<<N);s++){
  for(i=0;i<N;i++)printf((s&(1<<i))!=0?"F":"*");
  printf(" : %f\n",val[s]);
  u+=p3(s)*val[s];
  v+=val[s];
 }
 printf("c_5(3)-%fc_5(1)=%f-%f*%f=%f\n",S,u,S,v,u-S*v);
 pr=0;
 for(s=t;s<(1<<N);s++)pr+=val[s]; 
 printf("Max weight of the %d omitted hyperplanes=%f\n",(1<<N)-t,pr);
 v=(u-S*v-(1-S)*pr)/(1-pr);
 printf("Adjusted bound=%f\n",v); 
// The following should not happen!
 if(v-lp>1e-10||lp-v>1e-10){printf("Inconsistency detected in program\n");err(0);}
 printf("\n");
}

// Setup various arrays
//
void setup(){
 int i,s,h;
// Setup q[i]=\prod_{j<i} p_i and check q[N]=M
 q[0]=1;
 for(i=1;i<=N;i++)q[i]=p[i-1]*q[i-1];
 if(M!=q[N]){printf("Error: M should be %d\n",q[N]);err(0);}
// Clear reporting variables
 for(i=0;i<=OPT;i++)lpc[i]=lpp[i]=0;
// Insert hyperplanes in linked lists according to their fixed sets.
// Hyperplanes h are processed in reverse order so that the lists occur in regular increasing order.
// (Not essential, but makes the output of program occur in sensible order.)
 for(s=0;s<(1<<N);s++)fst[s]=M;				// initialize linked lists to empty (M denotes end of list)
 for(h=M-1;h>=0;h--){
  s=0;										// s will contain fixed coordinates
  for(i=0;i<N;i++){
   if((h/q[i])%p[i]!=0)s+=1<<i;				// if h_i>0 then i is a fixed coordinate
  }
// Add the hyperplane h to the begining of the list of hyperplanes with set of fixed coordinates = s
  nxt[h]=fst[s];
  fst[s]=h;
 }
}

// Setup and solve linear programming model
// mx is the point whose i-coordinate is the maximum of all a_i's occuring as a fix coordinate in a hyperplane.
// d is the number of hyperplanes we omit.
//
double solvelp(int d,int mx){
 int i,k,s,c,x,y,h;
 double u,v;
// We first assign variables v_0,...,v_{2^N-1} , where v_s is an upper bound on the total weight
// on a hyperplane with fixed set of coordinates s.
 vcnt=1<<N;
// Now we assign variables to the uncovered points of the box. These will be equal to the weight/probability of this point.
// Due to symmetry consitions, two points that differ only in i-coordinate have the same weight
// if the i-coordinates are both larger than any constraint of the form x_i=a_i definining any of the hyperplanes.
// The variable assigned to point i will be vmap[i] if this is non-zero.
 for(x=0;x<M;x++){
  if(r[x]==0)vmap[x]=0;			// covered points do not need variables as probability is fixed to be 0
  else{
   y=x;							// symmetry reduction, find earlier point equivalent to this one to save on variables
   for(i=0;i<N;i++){
    k=(x/q[i])%p[i]-(mx/q[i])%p[i]-1; // if k>0 then coordinate x_i at least 2 more than any fixed value occuring in any hyperplane
    if(k>0)y-=k*q[i];			// point is therefore equivalent to one with smaller x_i 
   }   
   if(y<x)vmap[x]=vmap[y];		// if equivalent to previous point, use that variable
   else vmap[x]=vcnt++;			// otherwise allocate a new variable
  }
 }
// Now constuct the objective function to minimize: c_5(3)-S*c_5(1), c_5(k)=\sum_s k^{|s|} v_s, v_s=c(s)
 for(s=0;s<(1<<N);s++)val[s]=p3(s)-S;
 for(;s<vcnt;s++)val[s]=0;
 if(GRBnewmodel(env,&model,"cover",vcnt,val,NULL,NULL,NULL,NULL))err(1);	// setup Gurobi model called "cover"
// Now add constraints. For each hyperplane with fixed coordinates s, total weight is at most v_s
 for(s=0;s<(1<<N);s++){
  for(h=fst[s];h<M;h=nxt[h]){	// loop through all hyperplanes with fixed coordinates = s
   for(k=0;k<vcnt;k++)val[k]=0;	// initialize all coefficients to 0
   val[s]=-1;					// max hyperplane weight variable has coeff = -1
// The following needs some explanation. To quickly list the points in hyperplane h
// (with set of zero-coordinates = s) we want to loop through values whose set of zero-coordinates
// is the complement of s and add them to h (any other point in h would have a zero coordinate and so be
// covered by a codimension 1 plane). Thus we can use the fst[]-nxt[] list starting with the complement
// of s, which is just (1<<N)-1-s.
   for(c=0,k=fst[(1<<N)-1-s];k<M;k=nxt[k])if(r[h+k]){val[vmap[h+k]]+=1;c=1;} // uncovered points in hyperplane, coeff = 1
   if(c>0){	// if c=0 then the constraint is vacuous since there are no uncovered points in the hyperplane 
// The next line "compresses" the constraint by omitting all variables with coefficient 0 
	for(c=k=0;k<vcnt;k++)if(val[k]!=0){val[c]=val[k];var[c]=k;c++;}
    if(GRBaddconstr(model,c,var,val,s?GRB_LESS_EQUAL:GRB_EQUAL,0.,NULL))err(1); // add constraint <=0 (s>0) or =0 (s=0)
   }
  }
 }
// Add constraint that v_0=1. The s=0 case above then forces a probability distribution.
 var[0]=0;val[0]=1;
 if(GRBaddconstr(model,1,var,val,GRB_EQUAL,1.,NULL))err(1);
// Now call Gurobi to perform optimization
 if(GRBoptimize(model))err(1);
 if(GRBgetintattr(model,"Status",&s))err(1);
 if(s!=GRB_OPTIMAL){printf("Not optimal\n");err(0);} // something went wrong
// Now read solution into val[]
 if(GRBgetdblattrarray(model,"X",0,vcnt,val))err(1);
 if(GRBfreemodel(model))err(1); // don't need model anymore
 // We will now check that the solution is valid by calculating max weight of hyperplanes
 for(k=1<<N;k<vcnt;k++){
  if(val[k]<0){printf("Error: negative weight found\n");err(0);} // this really should not happen
 }
 for(s=0;s<(1<<N);s++){
  val[s]=0;					// set max weight to 0
  for(h=fst[s];h<M;h=nxt[h]){ // loop through hyperplanes with fixed set s
   v=0;
// See above explanation on how to quickly loop through points in a hyperplane
   for(k=fst[(1<<N)-1-s];k<M;k=nxt[k])if(r[h+k])v+=val[vmap[h+k]]; // uncovered points in hyperplace
   if(v>val[s])val[s]=v;	// update max weight if weight of current hyperplane larger
  }
 }
// Check if total weight differs significantly from 1
 if(val[0]-1>1e-8||1-val[0]>1e-8){printf("Error: Large error in lp\n");err(0);} 
// Normalize anyway - we now have a probability distribution accurate to almost full double precision accuracy
 for(i=1;i<(1<<N);i++)val[i]/=val[0];
 val[0]=1;
// Calculate objective function
 v=0;
 for(s=0;s<(1<<N);s++)v+=(p3(s)-S)*val[s];
// Now bound how much weight could lie in last d hyperplanes
 u=0;
 for(;d;d--)u+=val[(1<<N)-d];	//
// The following bounds the objective function if we zero out the remaining d hyperplanes and renormalize
// The objective function v will reduce by at least (1-S)*p as (1-S) is the coefficient of v_0=total mass
// Normalizing then increases by factor at most 1/(1-p).
// Now note that (v-(1-S)*p)/(1-p) <= (v-(1-S)*u)/(1-u) when p<=u.
 return (v-(1-S)*u)/(1-u);
}

// Check if point x is in hyperplane a[s]
// Can also check if hyperplane h=x is a subset of a[s]
//
int inplane(int x,int s){
 int i;
 for(i=0;i<N;i++){
  if((s&(1<<i))!=0 && (x/q[i])%p[i]!=(a[s]/q[i])%p[i])return 0;
 }
 return 1;
}

// Set up array r[] of uncovered points
// r[x]=1 if x is not covered by any of the hyperplanes a[1],...,a[t-1].
//
void remain(int t){
 int x,s;
 for(x=0;x<M;x++){
  r[x]=1;
  for(s=1;s<t;s++)if(inplane(x,s))r[x]=0;
 }
}

// Recursively generate hyperplanes and then run LP to find
// optimal probability distribution
// s=current fixed set of coordinates, mx=maximum coordinates used.
//
void recurse(int s,int mx){
 double lc;
 int nmx,i,t;
 if(s>=(1<<N)-4){
  remain(s);
  lc=solvelp((1<<N)-s,mx);
  lpc[(1<<N)-s]++;
  if(lc<LM)return;
  lpp[(1<<N)-s]++;
  if(s>=(1<<N)-DISP)display(s,lc);
  if(s==(1<<N))return;
 }
 while((s&(s-1))==0){a[s]=0;s++;}	// skip codim 1 planes
 for(a[s]=fst[s];a[s]<M;a[s]=nxt[a[s]]){
  for(t=s&(s-1);t;t=(t-1)&s)if(inplane(a[s],t))break;
  if(t)continue;			// hyperplane subset of a previous one
  nmx=mx;
  for(i=0;i<N;i++){
   if((s&(1<<i))!=0){
    if((a[s]/q[i])%p[i]==0)break; // 0 fixed coordinate - this should not happen
    if((a[s]/q[i])%p[i]>1+(mx/q[i])%p[i])break; // coord > 1 more than max previous
    if((a[s]/q[i])%p[i]>(mx/q[i])%p[i])nmx+=q[i]; // increase max
   }
  }
  if(i==N)recurse(s+1,nmx); // a[s] is valid, now recursively generate next one
 }
}

main(){
 int i;
 time(&start);									// start timer
 if(GRBloadenv(&env,NULL))err(1);				// load gurobi environment
 if(GRBsetintparam(env,"OutputFlag",0))err(1);	// do not print Gurobi diagnostics
 setup();
 printf("Finding configurations with c_a(3)-%fc_a(1)>%f\n\n",S,LM);
 recurse(1,0);
 for(i=4;i>=0;i--){
  printf("%d / %d configurations with %d missing hyperplanes exceeded threshold\n",lpp[i],lpc[i],i);
 }
 time(&end);
 printf("Time taken %d seconds\n",(int)(end-start)); // print elapsed time 
 err(0);										// exit gracefully
}