#include "defns.i" #include "extern.i" /* Compute the gain from literals of the form R(A) or ~R(A). If better than the best so far, save. NB: The various counts have components with the following meanings: Pos plus tuples Tot all tuples T obtained from R(A) F obtained using ~R(A) Now number of current tuples Orig number of tuples in original training set New referring to training set if use R(A) or ~R(A) */ /* This version uses the same kind of pruning cutoffs as args.c */ float ComputeGain(R, A, LitBits, CanUse) /* ----------- */ Relation R; Vars A; float LitBits; Boolean CanUse; { int Instances = 1, /* ground tuples when all vars bound */ TInstances, FInstances, /* no of them in/out wrt relation */ TOInstances, /* all non-F instances */ NowTPos = 0, TPos = 0, TTot = 0, /* params if use R(A) as literal */ NowFPos = 0, NowFNeg = 0, NowFTot, /* ditto if use ~R(A) */ OrigTPos = 0, OrigFPos = 0, /* no original pos tuples that would pass */ OrigTTot = 0, OrigFTot = 0, /* no original tuples that would pass */ Best, N, i, j, UnboundVars = 0, Number(); Var V, W, Unbound[MAXARITY+1], New[MAXARITY*MAXARITY], Previous[MAXARITY*MAXARITY]; int Col, PossibleDuplicateVars = 0; Const PreviousVal, FirstBinding[MAXARITY+1]; Tuple *TSP, Case; float Gain, PosGain, NegGain, MinUsefulGain, NowTPosThresh, NowFNegThresh, Worth(), Info(); Boolean NewVar[MAXARITY+1], WeakPos, WeakNeg, RuleOutT, RuleOutF, Determinate, IdenticalBindings, OK; /* Table of constraints: TInstances + FInstances + OptInstances = Instances NewFTot/Pos = NowFTot/Pos NowTPos + NowFPos = Pos */ N = R->Arity; /* Compute number of ground tuples corresponding to args A */ memset(NewVar, true, MAXARITY+1); ForEach(i, 1, N) { if ( (V = A[i]) > MaxVar && NewVar[V] ) { Unbound[++UnboundVars] = i; Instances *= Type[R->Type[i]]->NValues; NewVar[V] = false; ForEach(W, 1, MaxVar) { New[PossibleDuplicateVars] = i; Previous[PossibleDuplicateVars++] = W; } } } ClearBits; IdenticalBindings = Determinate = DETERMINATE && UnboundVars > 0; /* The minimum gain that would be of interest is just enough to give a literal a chance to be saved by the backup procedure or, if there are determinate literals, to reach the required fraction of the maximum possible gain */ RuleOutT = false; RuleOutF = ! NEGLITERALS; MinUsefulGain = NPossible < MAXPOSSLIT ? MINALTFRAC * BestLitGain : Max(Possible[MAXPOSSLIT]->Gain, MINALTFRAC * BestLitGain); if ( NDeterminate && MinUsefulGain < DETERMINATE * MaxPossibleGain ) { MinUsefulGain = DETERMINATE * MaxPossibleGain; } NowTPosThresh = MinUsefulGain / BaseInfo - 0.001; NowFNegThresh = (Pos + 1) * (pow(2.0, BaseInfo - MinUsefulGain / Pos) - 1) + 0.001; TSP = TrainingSet; while ( Case = *TSP++ ) { if ( Join(R->Pos, R->PosIndex, A, Case, N, false) ) { TInstances = NFound; if ( ! TTot ) { memcpy(FirstBinding, Found[0], (N+1) * sizeof(Const)); } else for ( i = 1 ; IdenticalBindings && i <= UnboundVars ; i++ ) { V = Unbound[i]; IdenticalBindings = FirstBinding[V] == Found[0][V]; } for ( i = 0 ; Positive(Case) && i < PossibleDuplicateVars ; ) { Col = New[i]; PreviousVal = Case[Previous[i]]; OK = true; for ( j = 0 ; OK && j < NFound ; j++ ) { OK = Found[j][Col] == PreviousVal; } if ( OK ) { i++; } else { PossibleDuplicateVars--; for ( j = i ; j < PossibleDuplicateVars ; j++ ) { New[j] = New[j+1]; Previous[j] = Previous[j+1]; } } } } else { TInstances = 0; } /*if ( R->Neg && ( USEOPT || ! TInstances ) )*/ if ( USEOPT && R->Neg && ! TInstances ) { Join(R->Neg, R->NegIndex, A, Case, N, false); FInstances = NFound; } else { FInstances = Instances - TInstances; } TOInstances = ( USEOPT ? Instances - FInstances : TInstances ); Determinate &= ( Positive(Case) ? TInstances == 1 : TInstances <= 1 ); if ( TOInstances ) { TTot += TOInstances; if ( Positive(Case) ) { NowTPos++; TPos += TOInstances; } if ( ! TestBit(Case[0]&Mask, TrueBit) ) { SetBit(Case[0]&Mask, TrueBit); OrigTTot++; if ( Positive(Case) ) OrigTPos++; } } if ( ! TInstances ) { if ( Positive(Case) ) { NowFPos++; if ( Pos - NowFPos < NowTPosThresh && ! RuleOutT ) { RuleOutT = true; if ( RuleOutF ) { VERBOSE(2) { printf("\t"); PrintLiteral(R, true, A); printf("\tTrue pos <= %d -- abandoned\n", Pos - NowFPos); } return 0.0; } } } else { NowFNeg++; if ( NowFNeg > NowFNegThresh && ! RuleOutF ) { RuleOutF = true; if ( RuleOutT ) { VERBOSE(2) { printf("\t"); PrintLiteral(R, true, A); printf("\tFalse neg >= %d -- abandoned\n", NowFNeg); } return 0.0; } } } if ( ! TestBit(Case[0]&Mask, FalseBit) ) { SetBit(Case[0]&Mask, FalseBit); OrigFTot++; if ( Positive(Case) ) OrigFPos++; } } } NowFTot = NowFPos + NowFNeg; PosGain = Worth(NowTPos, TPos, TTot, UnboundVars); WeakPos = PosGain < 0.001 || NowFTot <= 0; NegGain = NEGLITERALS ? Worth(NowFPos, NowFPos, NowFTot, 0) : 0.0; WeakNeg = NegGain < 0.001 || NowFTot >= Tot; /* Weak literal sequence check */ if ( ! Determinate && CanUse && WeakPos && WeakNeg && WeakLiterals >= PATIENCE ) { VERBOSE(2) { printf("\t"); PrintLiteral(R, true, A); printf("\ttoo many weak literals\n"); } return 0.0; } /* Encoding length check */ if ( CanUse && ( PosGain > NegGain && (Best = OrigTPos) || NegGain > 0 && (Best = OrigFPos) ) && Except(CycleTot, Best) < UsedSoFar + LitBits ) { VERBOSE(1) { printf("\t"); PrintLiteral(R, true, A); printf("\tTrue %d, False %d, Covers %d: coding voilation\n", NowTPos, NowFPos, Best); } return 0.0; } VERBOSE(1) if ( PosGain > 0 || NegGain > 0 || Determinate || Verbosity >= 2 ) { if ( Determinate ) putchar('#'); if ( IdenticalBindings) putchar('X'); if ( PossibleDuplicateVars ) printf(" %c=%c", Variable[A[New[0]]], Variable[Previous[0]]); printf("\t"); PrintLiteral(R, true, A); printf("\tTrue %d[%d,%d]: gain %.2f", NowTPos, TPos, TTot, PosGain); if ( NEGLITERALS ) { printf("; False %d,%d: gain %.2f", NowFPos, NowFTot, NegGain); } newline; } if ( PossibleDuplicateVars /*|| Determinate && IdenticalBindings && TPos < Tot*/ ) return 0.0; Gain = Max(PosGain, NegGain); if ( CanUse ) { if ( Determinate && ! IdenticalBindings && Gain < DETERMINATE * MaxPossibleGain && ProposeDeterminateLiteral(R, A, LitBits, UnboundVars) ) { return 0.001; } if ( PosGain > 1E-6 ) { ProposeLiteral(R, true, A, TTot, LitBits, OrigTPos, OrigTTot, PosGain); } if ( NegGain > 1E-6 ) { ProposeLiteral(R, false, A, NowFTot, LitBits, OrigFPos, OrigFTot, NegGain); } } return Gain; } /* Compute aggregate gain from a test on relation R, tuple T. The Basic gain is the number of positive tuples * information gained regarding each; but there is a minor adjustment: - a literal that has some positive tuples and no gain, but introduces one or more unbound variables, is given a slight gain */ float Worth(N, P, T, UV) /* ----- */ int N, P, T, UV; { float G, TG, Info(); if ( T < MAXTUPLES ) { TG = N * (G = BaseInfo - Info(P, T)); if ( /*! DETERMINATE &&*/ G < 1E-6 && N && UV ) { return 0.0009 + UV * 0.0001; /* very small notional gain */ } else { return TG; } } else { return -0.001; } } float Info(N, T) /* ---- */ int N, T; { return Log2(T+1) - Log2(N+1); }