/*
 * Decompiled with CFR 0.152.
 */
package weka.clusterers;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;
import weka.clusterers.RandomizableClusterer;
import weka.core.AlgVector;
import weka.core.Capabilities;
import weka.core.DistanceFunction;
import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.core.neighboursearch.KDTree;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;

public class XMeans
extends RandomizableClusterer
implements TechnicalInformationHandler {
    private static final long serialVersionUID = -7941793078404132616L;
    protected Instances m_Instances = null;
    protected Instances m_Model = null;
    protected ReplaceMissingValues m_ReplaceMissingFilter;
    protected double m_BinValue = 1.0;
    protected double m_Bic = Double.MIN_VALUE;
    protected double[] m_Mle = null;
    protected int m_MaxIterations = 1;
    protected int m_MaxKMeans = 1000;
    protected int m_MaxKMeansForChildren = 1000;
    protected int m_NumClusters = 2;
    protected int m_MinNumClusters = 2;
    protected int m_MaxNumClusters = 4;
    protected DistanceFunction m_DistanceF = new EuclideanDistance();
    protected Instances m_ClusterCenters;
    protected File m_InputCenterFile = new File(System.getProperty("user.dir"));
    protected Reader m_DebugVectorsInput = null;
    protected int m_DebugVectorsIndex = 0;
    protected Instances m_DebugVectors = null;
    protected File m_DebugVectorsFile = new File(System.getProperty("user.dir"));
    protected Reader m_CenterInput = null;
    protected File m_OutputCenterFile = new File(System.getProperty("user.dir"));
    protected PrintWriter m_CenterOutput = null;
    protected int[] m_ClusterAssignments;
    protected double m_CutOffFactor = 0.5;
    public static int R_LOW = 0;
    public static int R_HIGH = 1;
    public static int R_WIDTH = 2;
    protected KDTree m_KDTree = new KDTree();
    protected boolean m_UseKDTree = false;
    protected int m_IterationCount = 0;
    protected int m_KMeansStopped = 0;
    protected int m_NumSplits = 0;
    protected int m_NumSplitsDone = 0;
    protected int m_NumSplitsStillDone = 0;
    protected int m_DebugLevel = 0;
    public static int D_PRINTCENTERS = 1;
    public static int D_FOLLOWSPLIT = 2;
    public static int D_CONVCHCLOSER = 3;
    public static int D_RANDOMVECTOR = 4;
    public static int D_KDTREE = 5;
    public static int D_ITERCOUNT = 6;
    public static int D_METH_MISUSE = 80;
    public static int D_CURR = 88;
    public static int D_GENERAL = 99;
    public boolean m_CurrDebugFlag = true;

    public XMeans() {
        this.m_SeedDefault = 10;
        this.setSeed(this.m_SeedDefault);
    }

    public String globalInfo() {
        return "Cluster data using the X-means algorithm.\n\nX-Means is K-Means extended by an Improve-Structure part In this part of the algorithm the centers are attempted to be split in its region. The decision between the children of each center and itself is done comparing the BIC-values of the two structures.\n\nFor more information see:\n\n" + this.getTechnicalInformation().toString();
    }

    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result = new TechnicalInformation(TechnicalInformation.Type.INPROCEEDINGS);
        result.setValue(TechnicalInformation.Field.AUTHOR, "Dan Pelleg and Andrew W. Moore");
        result.setValue(TechnicalInformation.Field.TITLE, "X-means: Extending K-means with Efficient Estimation of the Number of Clusters");
        result.setValue(TechnicalInformation.Field.BOOKTITLE, "Seventeenth International Conference on Machine Learning");
        result.setValue(TechnicalInformation.Field.YEAR, "2000");
        result.setValue(TechnicalInformation.Field.PAGES, "727-734");
        result.setValue(TechnicalInformation.Field.PUBLISHER, "Morgan Kaufmann");
        return result;
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enable(Capabilities.Capability.NO_CLASS);
        result.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        result.enable(Capabilities.Capability.DATE_ATTRIBUTES);
        result.enable(Capabilities.Capability.MISSING_VALUES);
        return result;
    }

    @Override
    public void buildClusterer(Instances data) throws Exception {
        this.getCapabilities().testWithFail(data);
        if (this.m_MinNumClusters > this.m_MaxNumClusters) {
            throw new Exception("XMeans: min number of clusters can't be greater than max number of clusters!");
        }
        this.m_NumSplits = 0;
        this.m_NumSplitsDone = 0;
        this.m_NumSplitsStillDone = 0;
        this.m_ReplaceMissingFilter = new ReplaceMissingValues();
        this.m_ReplaceMissingFilter.setInputFormat(data);
        this.m_Instances = Filter.useFilter(data, this.m_ReplaceMissingFilter);
        Random random0 = new Random(this.m_Seed);
        this.m_NumClusters = this.m_MinNumClusters;
        if (this.m_DistanceF == null) {
            this.m_DistanceF = new EuclideanDistance();
        }
        this.m_DistanceF.setInstances(this.m_Instances);
        this.checkInstances();
        if (this.m_DebugVectorsFile.exists() && this.m_DebugVectorsFile.isFile()) {
            this.initDebugVectorsInput();
        }
        int[] allInstList = new int[this.m_Instances.numInstances()];
        for (int i = 0; i < this.m_Instances.numInstances(); ++i) {
            allInstList[i] = i;
        }
        this.m_Model = new Instances(this.m_Instances, 0);
        if (this.m_CenterInput != null) {
            this.m_ClusterCenters = new Instances(this.m_CenterInput);
            this.m_NumClusters = this.m_ClusterCenters.numInstances();
        } else {
            this.m_ClusterCenters = this.makeCentersRandomly(random0, this.m_Instances, this.m_NumClusters);
        }
        this.PFD(D_FOLLOWSPLIT, "\n*** Starting centers ");
        for (int k = 0; k < this.m_ClusterCenters.numInstances(); ++k) {
            this.PFD(D_FOLLOWSPLIT, "Center " + k + ": " + this.m_ClusterCenters.instance(k));
        }
        this.PrCentersFD(D_PRINTCENTERS);
        boolean finished = false;
        if (this.m_UseKDTree) {
            this.m_KDTree.setInstances(this.m_Instances);
        }
        this.m_IterationCount = 0;
        while (!finished && !this.stopIteration(this.m_IterationCount, this.m_MaxIterations)) {
            this.PFD(D_FOLLOWSPLIT, "\nBeginning of main loop - centers:");
            this.PrCentersFD(D_FOLLOWSPLIT);
            this.PFD(D_ITERCOUNT, "\n*** 1. Improve-Params " + this.m_IterationCount + ". time");
            ++this.m_IterationCount;
            boolean converged = false;
            this.m_ClusterAssignments = this.initAssignments(this.m_Instances.numInstances());
            int[][] instOfCent = new int[this.m_ClusterCenters.numInstances()][];
            int kMeansIteration = 0;
            this.PFD(D_FOLLOWSPLIT, "\nConverge in K-Means:");
            while (!converged && !this.stopKMeansIteration(kMeansIteration, this.m_MaxKMeans)) {
                converged = true;
                converged = this.assignToCenters(this.m_UseKDTree ? this.m_KDTree : null, this.m_ClusterCenters, instOfCent, allInstList, this.m_ClusterAssignments, ++kMeansIteration);
                this.PFD(D_FOLLOWSPLIT, "\nMain loop - Assign - centers:");
                this.PrCentersFD(D_FOLLOWSPLIT);
                converged = this.recomputeCenters(this.m_ClusterCenters, instOfCent, this.m_Model);
                this.PFD(D_FOLLOWSPLIT, "\nMain loop - Recompute - centers:");
                this.PrCentersFD(D_FOLLOWSPLIT);
            }
            this.PFD(D_FOLLOWSPLIT, "");
            this.PFD(D_FOLLOWSPLIT, "End of Part: 1. Improve-Params - conventional K-means");
            this.m_Mle = this.distortion(instOfCent, this.m_ClusterCenters);
            this.m_Bic = this.calculateBIC(instOfCent, this.m_ClusterCenters, this.m_Mle);
            this.PFD(D_FOLLOWSPLIT, "m_Bic " + this.m_Bic);
            int currNumCent = this.m_ClusterCenters.numInstances();
            Instances splitCenters = new Instances(this.m_ClusterCenters, currNumCent * 2);
            double[] pbic = new double[currNumCent];
            double[] cbic = new double[currNumCent];
            for (int i = 0; i < currNumCent; ++i) {
                this.PFD(D_FOLLOWSPLIT, "\nsplit center " + i + " " + this.m_ClusterCenters.instance(i));
                Instance currCenter = this.m_ClusterCenters.instance(i);
                int[] currInstList = instOfCent[i];
                int currNumInst = instOfCent[i].length;
                if (currNumInst <= 2) {
                    pbic[i] = Double.MAX_VALUE;
                    cbic[i] = 0.0;
                    splitCenters.add(currCenter);
                    splitCenters.add(currCenter);
                    continue;
                }
                double variance = this.m_Mle[i] / (double)currNumInst;
                Instances children = this.splitCenter(random0, currCenter, variance, this.m_Model);
                int[] oneCentAssignments = this.initAssignments(currNumInst);
                int[][] instOfChCent = new int[2][];
                converged = false;
                int kMeansForChildrenIteration = 0;
                this.PFD(D_FOLLOWSPLIT, "\nConverge, K-Means for children: " + i);
                while (!converged && !this.stopKMeansIteration(kMeansForChildrenIteration, this.m_MaxKMeansForChildren)) {
                    ++kMeansForChildrenIteration;
                    converged = this.assignToCenters(children, instOfChCent, currInstList, oneCentAssignments);
                    if (converged) continue;
                    this.recomputeCentersFast(children, instOfChCent, this.m_Model);
                }
                splitCenters.add(children.instance(0));
                splitCenters.add(children.instance(1));
                this.PFD(D_FOLLOWSPLIT, "\nconverged cildren ");
                this.PFD(D_FOLLOWSPLIT, " " + children.instance(0));
                this.PFD(D_FOLLOWSPLIT, " " + children.instance(1));
                pbic[i] = this.calculateBIC(currInstList, currCenter, this.m_Mle[i], this.m_Model);
                double[] chMLE = this.distortion(instOfChCent, children);
                cbic[i] = this.calculateBIC(instOfChCent, children, chMLE);
            }
            Instances newClusterCenters = null;
            newClusterCenters = this.newCentersAfterSplit(pbic, cbic, this.m_CutOffFactor, splitCenters);
            int newNumClusters = newClusterCenters.numInstances();
            if (newNumClusters != this.m_NumClusters) {
                this.PFD(D_FOLLOWSPLIT, "Compare with non-split");
                int[] newClusterAssignments = this.initAssignments(this.m_Instances.numInstances());
                int[][] newInstOfCent = new int[newClusterCenters.numInstances()][];
                converged = this.assignToCenters(this.m_UseKDTree ? this.m_KDTree : null, newClusterCenters, newInstOfCent, allInstList, newClusterAssignments, this.m_IterationCount);
                double[] newMle = this.distortion(newInstOfCent, newClusterCenters);
                double newBic = this.calculateBIC(newInstOfCent, newClusterCenters, newMle);
                this.PFD(D_FOLLOWSPLIT, "newBic " + newBic);
                if (newBic > this.m_Bic) {
                    this.PFD(D_FOLLOWSPLIT, "*** decide for new clusters");
                    this.m_Bic = newBic;
                    this.m_ClusterCenters = newClusterCenters;
                    this.m_ClusterAssignments = newClusterAssignments;
                } else {
                    this.PFD(D_FOLLOWSPLIT, "*** keep old clusters");
                }
            }
            if ((newNumClusters = this.m_ClusterCenters.numInstances()) >= this.m_MaxNumClusters || newNumClusters == this.m_NumClusters) {
                finished = true;
            }
            this.m_NumClusters = newNumClusters;
        }
    }

    public boolean checkForNominalAttributes(Instances data) {
        int i = 0;
        while (i < data.numAttributes()) {
            if (i == data.classIndex() || !data.attribute(i++).isNominal()) continue;
            return true;
        }
        return false;
    }

    protected int[] initAssignments(int[] ass) {
        for (int i = 0; i < ass.length; ++i) {
            ass[i] = -1;
        }
        return ass;
    }

    protected int[] initAssignments(int numInstances) {
        int[] ass = new int[numInstances];
        for (int i = 0; i < numInstances; ++i) {
            ass[i] = -1;
        }
        return ass;
    }

    boolean[] initBoolArray(int len) {
        boolean[] boolArray = new boolean[len];
        for (int i = 0; i < len; ++i) {
            boolArray[i] = false;
        }
        return boolArray;
    }

    protected Instances newCentersAfterSplit(double[] pbic, double[] cbic, double cutoffFactor, Instances splitCenters) {
        boolean splitPerCutoff = false;
        boolean takeSomeAway = false;
        boolean[] splitWon = this.initBoolArray(this.m_ClusterCenters.numInstances());
        int numToSplit = 0;
        Instances newCenters = null;
        for (int i = 0; i < cbic.length; ++i) {
            if (cbic[i] > pbic[i]) {
                splitWon[i] = true;
                ++numToSplit;
                this.PFD(D_FOLLOWSPLIT, "Center " + i + " decide for children");
                continue;
            }
            this.PFD(D_FOLLOWSPLIT, "Center " + i + " decide for parent");
        }
        if (numToSplit == 0 && cutoffFactor > 0.0) {
            splitPerCutoff = true;
            numToSplit = (int)((double)this.m_ClusterCenters.numInstances() * this.m_CutOffFactor);
        }
        double[] diff = new double[this.m_NumClusters];
        for (int j = 0; j < diff.length; ++j) {
            diff[j] = pbic[j] - cbic[j];
        }
        int[] sortOrder = Utils.sort(diff);
        int possibleToSplit = this.m_MaxNumClusters - this.m_NumClusters;
        if (possibleToSplit > numToSplit) {
            possibleToSplit = numToSplit;
        } else {
            takeSomeAway = true;
        }
        if (splitPerCutoff) {
            for (int j = 0; j < possibleToSplit && cbic[sortOrder[j]] > 0.0; ++j) {
                splitWon[sortOrder[j]] = true;
            }
            this.m_NumSplitsStillDone += possibleToSplit;
        } else if (takeSomeAway) {
            int j;
            int count = 0;
            for (j = 0; j < splitWon.length && count < possibleToSplit; ++j) {
                if (!splitWon[sortOrder[j]]) continue;
                ++count;
            }
            while (j < splitWon.length) {
                splitWon[sortOrder[j]] = false;
                ++j;
            }
        }
        newCenters = possibleToSplit > 0 ? this.newCentersAfterSplit(splitWon, splitCenters) : this.m_ClusterCenters;
        return newCenters;
    }

    protected Instances newCentersAfterSplit(boolean[] splitWon, Instances splitCenters) {
        Instances newCenters = new Instances(splitCenters, 0);
        int sIndex = 0;
        for (int i = 0; i < splitWon.length; ++i) {
            if (splitWon[i]) {
                ++this.m_NumSplitsDone;
                newCenters.add(splitCenters.instance(sIndex++));
                newCenters.add(splitCenters.instance(sIndex++));
                continue;
            }
            ++sIndex;
            ++sIndex;
            newCenters.add(this.m_ClusterCenters.instance(i));
        }
        return newCenters;
    }

    protected boolean stopKMeansIteration(int iterationCount, int max) {
        boolean stopIterate = false;
        if (max >= 0) {
            boolean bl = stopIterate = iterationCount >= max;
        }
        if (stopIterate) {
            ++this.m_KMeansStopped;
        }
        return stopIterate;
    }

    protected boolean stopIteration(int iterationCount, int max) {
        boolean stopIterate = false;
        if (max >= 0) {
            stopIterate = iterationCount >= max;
        }
        return stopIterate;
    }

    protected boolean recomputeCenters(Instances centers, int[][] instOfCent, Instances model) {
        boolean converged = true;
        for (int i = 0; i < centers.numInstances(); ++i) {
            for (int j = 0; j < model.numAttributes(); ++j) {
                double val = this.meanOrMode(this.m_Instances, instOfCent[i], j);
                for (int k = 0; k < instOfCent[i].length; ++k) {
                    if (!converged || this.m_ClusterCenters.instance(i).value(j) == val) continue;
                    converged = false;
                }
                if (converged) continue;
                this.m_ClusterCenters.instance(i).setValue(j, val);
            }
        }
        return converged;
    }

    protected void recomputeCentersFast(Instances centers, int[][] instOfCentIndexes, Instances model) {
        for (int i = 0; i < centers.numInstances(); ++i) {
            for (int j = 0; j < model.numAttributes(); ++j) {
                double val = this.meanOrMode(this.m_Instances, instOfCentIndexes[i], j);
                centers.instance(i).setValue(j, val);
            }
        }
    }

    protected double meanOrMode(Instances instances, int[] instList, int attIndex) {
        int numInst = instList.length;
        if (instances.attribute(attIndex).isNumeric()) {
            double found = 0.0;
            double result = 0.0;
            for (int j = 0; j < numInst; ++j) {
                Instance currInst = instances.instance(instList[j]);
                if (currInst.isMissing(attIndex)) continue;
                found += currInst.weight();
                result += currInst.weight() * currInst.value(attIndex);
            }
            if (Utils.eq(found, 0.0)) {
                return 0.0;
            }
            return result / found;
        }
        if (instances.attribute(attIndex).isNominal()) {
            int[] counts = new int[instances.attribute(attIndex).numValues()];
            for (int j = 0; j < numInst; ++j) {
                Instance currInst = instances.instance(instList[j]);
                if (currInst.isMissing(attIndex)) continue;
                int n = (int)currInst.value(attIndex);
                counts[n] = (int)((double)counts[n] + currInst.weight());
            }
            return Utils.maxIndex(counts);
        }
        return 0.0;
    }

    protected boolean assignToCenters(KDTree tree, Instances centers, int[][] instOfCent, int[] allInstList, int[] assignments, int iterationCount) throws Exception {
        boolean converged = true;
        converged = tree != null ? this.assignToCenters(tree, centers, instOfCent, assignments, iterationCount) : this.assignToCenters(centers, instOfCent, allInstList, assignments);
        return converged;
    }

    protected boolean assignToCenters(KDTree kdtree, Instances centers, int[][] instOfCent, int[] assignments, int iterationCount) throws Exception {
        int i;
        int numCent = centers.numInstances();
        int numInst = this.m_Instances.numInstances();
        int[] oldAssignments = new int[numInst];
        if (assignments == null) {
            assignments = new int[numInst];
            for (i = 0; i < numInst; ++i) {
                assignments[0] = -1;
            }
        }
        if (instOfCent == null) {
            instOfCent = new int[numCent][];
        }
        for (i = 0; i < assignments.length; ++i) {
            oldAssignments[i] = assignments[i];
        }
        kdtree.centerInstances(centers, assignments, Math.pow(0.8, iterationCount));
        boolean converged = true;
        for (int i2 = 0; converged && i2 < assignments.length; ++i2) {
            boolean bl = converged = oldAssignments[i2] == assignments[i2];
            if (assignments[i2] != -1) continue;
            throw new Exception("Instance " + i2 + " has not been assigned to cluster.");
        }
        if (!converged) {
            int i3;
            int[] numInstOfCent = new int[numCent];
            for (i3 = 0; i3 < numCent; ++i3) {
                numInstOfCent[i3] = 0;
            }
            for (i3 = 0; i3 < numInst; ++i3) {
                int n = assignments[i3];
                numInstOfCent[n] = numInstOfCent[n] + 1;
            }
            for (i3 = 0; i3 < numCent; ++i3) {
                instOfCent[i3] = new int[numInstOfCent[i3]];
            }
            for (i3 = 0; i3 < numCent; ++i3) {
                int index = -1;
                for (int j = 0; j < numInstOfCent[i3]; ++j) {
                    instOfCent[i3][j] = index = this.nextAssignedOne(i3, index, assignments);
                }
            }
        }
        return converged;
    }

    protected boolean assignToCenters(Instances centers, int[][] instOfCent, int[] allInstList, int[] assignments) throws Exception {
        int i;
        boolean converged = true;
        int numInst = allInstList.length;
        int numCent = centers.numInstances();
        int[] numInstOfCent = new int[numCent];
        for (i = 0; i < numCent; ++i) {
            numInstOfCent[i] = 0;
        }
        if (assignments == null) {
            assignments = new int[numInst];
            for (i = 0; i < numInst; ++i) {
                assignments[i] = -1;
            }
        }
        if (instOfCent == null) {
            instOfCent = new int[numCent][];
        }
        for (i = 0; i < numInst; ++i) {
            Instance inst = this.m_Instances.instance(allInstList[i]);
            int newC = this.clusterProcessedInstance(inst, centers);
            if (converged && newC != assignments[i]) {
                converged = false;
            }
            int n = newC;
            numInstOfCent[n] = numInstOfCent[n] + 1;
            if (converged) continue;
            assignments[i] = newC;
        }
        if (!converged) {
            this.PFD(D_FOLLOWSPLIT, "assignToCenters -> it has NOT converged");
            for (i = 0; i < numCent; ++i) {
                instOfCent[i] = new int[numInstOfCent[i]];
            }
            for (i = 0; i < numCent; ++i) {
                int index = -1;
                for (int j = 0; j < numInstOfCent[i]; ++j) {
                    index = this.nextAssignedOne(i, index, assignments);
                    instOfCent[i][j] = allInstList[index];
                }
            }
        } else {
            this.PFD(D_FOLLOWSPLIT, "assignToCenters -> it has converged");
        }
        return converged;
    }

    protected int nextAssignedOne(int cent, int lastIndex, int[] assignments) {
        int len = assignments.length;
        for (int index = lastIndex + 1; index < len; ++index) {
            if (assignments[index] != cent) continue;
            return index;
        }
        return -1;
    }

    protected Instances splitCenter(Random random, Instance center, double variance, Instances model) throws Exception {
        ++this.m_NumSplits;
        AlgVector r = null;
        Instances children = new Instances(model, 2);
        if (this.m_DebugVectorsFile.exists() && this.m_DebugVectorsFile.isFile()) {
            Instance nextVector = this.getNextDebugVectorsInstance(model);
            this.PFD(D_RANDOMVECTOR, "Random Vector from File " + nextVector);
            r = new AlgVector(nextVector);
        } else {
            r = new AlgVector(model, random);
        }
        r.changeLength(Math.pow(variance, 0.5));
        this.PFD(D_RANDOMVECTOR, "random vector *variance " + r);
        AlgVector c = new AlgVector(center);
        AlgVector c2 = (AlgVector)c.clone();
        c = c.add(r);
        Instance newCenter = c.getAsInstance(model, random);
        children.add(newCenter);
        this.PFD(D_FOLLOWSPLIT, "first child " + newCenter);
        c2 = c2.substract(r);
        newCenter = c2.getAsInstance(model, random);
        children.add(newCenter);
        this.PFD(D_FOLLOWSPLIT, "second child " + newCenter);
        return children;
    }

    protected Instances splitCenters(Random random, Instances instances, Instances model) {
        Instances children = new Instances(model, 2);
        int instIndex = Math.abs(random.nextInt()) % instances.numInstances();
        children.add(instances.instance(instIndex));
        int instIndex2 = instIndex;
        for (int count = 0; instIndex2 == instIndex && count < 10; ++count) {
            instIndex2 = Math.abs(random.nextInt()) % instances.numInstances();
        }
        children.add(instances.instance(instIndex2));
        return children;
    }

    protected Instances makeCentersRandomly(Random random0, Instances model, int numClusters) {
        Instances clusterCenters = new Instances(model, numClusters);
        this.m_NumClusters = numClusters;
        for (int i = 0; i < numClusters; ++i) {
            int instIndex = Math.abs(random0.nextInt()) % this.m_Instances.numInstances();
            clusterCenters.add(this.m_Instances.instance(instIndex));
        }
        return clusterCenters;
    }

    protected double calculateBIC(int[] instList, Instance center, double mle, Instances model) {
        int[][] w1 = new int[1][instList.length];
        for (int i = 0; i < instList.length; ++i) {
            w1[0][i] = instList[i];
        }
        double[] m = new double[]{mle};
        Instances w2 = new Instances(model, 1);
        w2.add(center);
        return this.calculateBIC(w1, w2, m);
    }

    protected double calculateBIC(int[][] instOfCent, Instances centers, double[] mle) {
        double loglike = 0.0;
        int numInstTotal = 0;
        int numCenters = centers.numInstances();
        int numDimensions = centers.numAttributes();
        int numParameters = numCenters - 1 + numCenters * numDimensions + numCenters;
        for (int i = 0; i < centers.numInstances(); ++i) {
            loglike += this.logLikelihoodEstimate(instOfCent[i].length, centers.instance(i), mle[i], centers.numInstances() * 2);
            numInstTotal += instOfCent[i].length;
        }
        loglike -= (double)numInstTotal * Math.log(numInstTotal);
        return loglike -= (double)numParameters / 2.0 * Math.log(numInstTotal);
    }

    protected double logLikelihoodEstimate(int numInst, Instance center, double distortion, int numCent) {
        double loglike = 0.0;
        if (numInst > 1) {
            double variance = distortion / ((double)numInst - 1.0);
            double p1 = -((double)numInst / 2.0) * Math.log(Math.PI * 2);
            double p2 = (double)(-(numInst * center.numAttributes()) / 2) * Math.log(variance);
            double p3 = -((double)numInst - 1.0) / 2.0;
            double p4 = (double)numInst * Math.log(numInst);
            loglike = p1 + p2 + p3 + p4;
        }
        return loglike;
    }

    protected double[] distortion(int[][] instOfCent, Instances centers) {
        double[] distortion = new double[centers.numInstances()];
        for (int i = 0; i < centers.numInstances(); ++i) {
            distortion[i] = 0.0;
            for (int j = 0; j < instOfCent[i].length; ++j) {
                int n = i;
                distortion[n] = distortion[n] + this.m_DistanceF.distance(this.m_Instances.instance(instOfCent[i][j]), centers.instance(i));
            }
        }
        return distortion;
    }

    protected int clusterProcessedInstance(Instance instance, Instances centers) {
        double minDist = 2.147483647E9;
        int bestCluster = 0;
        for (int i = 0; i < centers.numInstances(); ++i) {
            double dist = this.m_DistanceF.distance(instance, centers.instance(i));
            if (!(dist < minDist)) continue;
            minDist = dist;
            bestCluster = i;
        }
        return bestCluster;
    }

    protected int clusterProcessedInstance(Instance instance) {
        double minDist = 2.147483647E9;
        int bestCluster = 0;
        for (int i = 0; i < this.m_NumClusters; ++i) {
            double dist = this.m_DistanceF.distance(instance, this.m_ClusterCenters.instance(i));
            if (!(dist < minDist)) continue;
            minDist = dist;
            bestCluster = i;
        }
        return bestCluster;
    }

    @Override
    public int clusterInstance(Instance instance) throws Exception {
        this.m_ReplaceMissingFilter.input(instance);
        Instance inst = this.m_ReplaceMissingFilter.output();
        return this.clusterProcessedInstance(inst);
    }

    @Override
    public int numberOfClusters() {
        return this.m_NumClusters;
    }

    @Override
    public Enumeration listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tmaximum number of overall iterations\n\t(default 1).", "I", 1, "-I <num>"));
        result.addElement(new Option("\tmaximum number of iterations in the kMeans loop in\n\tthe Improve-Parameter part \n\t(default 1000).", "M", 1, "-M <num>"));
        result.addElement(new Option("\tmaximum number of iterations in the kMeans loop\n\tfor the splitted centroids in the Improve-Structure part \n\t(default 1000).", "J", 1, "-J <num>"));
        result.addElement(new Option("\tminimum number of clusters\n\t(default 2).", "L", 1, "-L <num>"));
        result.addElement(new Option("\tmaximum number of clusters\n\t(default 4).", "H", 1, "-H <num>"));
        result.addElement(new Option("\tdistance value for binary attributes\n\t(default 1.0).", "B", 1, "-B <value>"));
        result.addElement(new Option("\tUses the KDTree internally\n\t(default no).", "use-kdtree", 0, "-use-kdtree"));
        result.addElement(new Option("\tFull class name of KDTree class to use, followed\n\tby scheme options.\n\teg: \"weka.core.neighboursearch.kdtrees.KDTree -P\"\n\t(default no KDTree class used).", "K", 1, "-K <KDTree class specification>"));
        result.addElement(new Option("\tcutoff factor, takes the given percentage of the splitted \n\tcentroids if none of the children win\n\t(default 0.0).", "C", 1, "-C <value>"));
        result.addElement(new Option("\tFull class name of Distance function class to use, followed\n\tby scheme options.\n\t(default weka.core.EuclideanDistance).", "D", 1, "-D <distance function class specification>"));
        result.addElement(new Option("\tfile to read starting centers from (ARFF format).", "N", 1, "-N <file name>"));
        result.addElement(new Option("\tfile to write centers to (ARFF format).", "O", 1, "-O <file name>"));
        result.addElement(new Option("\tThe debug level.\n\t(default 0)", "U", 1, "-U <int>"));
        result.addElement(new Option("\tThe debug vectors file.", "Y", 1, "-Y <file name>"));
        Enumeration en = super.listOptions();
        while (en.hasMoreElements()) {
            result.addElement((Option)en.nextElement());
        }
        return result.elements();
    }

    public String minNumClustersTipText() {
        return "set minimum number of clusters";
    }

    public void setMinNumClusters(int n) {
        this.m_MinNumClusters = n;
    }

    public int getMinNumClusters() {
        return this.m_MinNumClusters;
    }

    public String maxNumClustersTipText() {
        return "set maximum number of clusters";
    }

    public void setMaxNumClusters(int n) {
        if (n >= this.m_MinNumClusters) {
            this.m_MaxNumClusters = n;
        }
    }

    public int getMaxNumClusters() {
        return this.m_MaxNumClusters;
    }

    public String maxIterationsTipText() {
        return "the maximum number of iterations to perform";
    }

    public void setMaxIterations(int i) throws Exception {
        if (i < 0) {
            throw new Exception("Only positive values for iteration number allowed (Option I).");
        }
        this.m_MaxIterations = i;
    }

    public int getMaxIterations() {
        return this.m_MaxIterations;
    }

    public String maxKMeansTipText() {
        return "the maximum number of iterations to perform in KMeans";
    }

    public void setMaxKMeans(int i) {
        this.m_MaxKMeans = i;
        this.m_MaxKMeansForChildren = i;
    }

    public int getMaxKMeans() {
        return this.m_MaxKMeans;
    }

    public String maxKMeansForChildrenTipText() {
        return "the maximum number of iterations KMeans that is performed on the child centers";
    }

    public void setMaxKMeansForChildren(int i) {
        this.m_MaxKMeansForChildren = i;
    }

    public int getMaxKMeansForChildren() {
        return this.m_MaxKMeansForChildren;
    }

    public String cutOffFactorTipText() {
        return "the cut-off factor to use";
    }

    public void setCutOffFactor(double i) {
        this.m_CutOffFactor = i;
    }

    public double getCutOffFactor() {
        return this.m_CutOffFactor;
    }

    public String binValueTipText() {
        return "Set the value that represents true in the new attributes.";
    }

    public double getBinValue() {
        return this.m_BinValue;
    }

    public void setBinValue(double value) {
        this.m_BinValue = value;
    }

    public String distanceFTipText() {
        return "The distance function to use.";
    }

    public void setDistanceF(DistanceFunction distanceF) {
        this.m_DistanceF = distanceF;
    }

    public DistanceFunction getDistanceF() {
        return this.m_DistanceF;
    }

    protected String getDistanceFSpec() {
        DistanceFunction d = this.getDistanceF();
        if (d instanceof OptionHandler) {
            return d.getClass().getName() + " " + Utils.joinOptions(d.getOptions());
        }
        return d.getClass().getName();
    }

    public String debugVectorsFileTipText() {
        return "The file containing the debug vectors (only for debugging!).";
    }

    public void setDebugVectorsFile(File value) {
        this.m_DebugVectorsFile = value;
    }

    public File getDebugVectorsFile() {
        return this.m_DebugVectorsFile;
    }

    public void initDebugVectorsInput() throws Exception {
        this.m_DebugVectorsInput = new BufferedReader(new FileReader(this.m_DebugVectorsFile));
        this.m_DebugVectors = new Instances(this.m_DebugVectorsInput);
        this.m_DebugVectorsIndex = 0;
    }

    public Instance getNextDebugVectorsInstance(Instances model) throws Exception {
        if (this.m_DebugVectorsIndex >= this.m_DebugVectors.numInstances()) {
            throw new Exception("no more prefabricated Vectors");
        }
        Instance nex = this.m_DebugVectors.instance(this.m_DebugVectorsIndex);
        nex.setDataset(model);
        ++this.m_DebugVectorsIndex;
        return nex;
    }

    public String inputCenterFileTipText() {
        return "The file to read the list of centers from.";
    }

    public void setInputCenterFile(File value) {
        this.m_InputCenterFile = value;
    }

    public File getInputCenterFile() {
        return this.m_InputCenterFile;
    }

    public String outputCenterFileTipText() {
        return "The file to write the list of centers to.";
    }

    public void setOutputCenterFile(File value) {
        this.m_OutputCenterFile = value;
    }

    public File getOutputCenterFile() {
        return this.m_OutputCenterFile;
    }

    public String KDTreeTipText() {
        return "The KDTree to use.";
    }

    public void setKDTree(KDTree k) {
        this.m_KDTree = k;
    }

    public KDTree getKDTree() {
        return this.m_KDTree;
    }

    public String useKDTreeTipText() {
        return "Whether to use the KDTree.";
    }

    public void setUseKDTree(boolean value) {
        this.m_UseKDTree = value;
    }

    public boolean getUseKDTree() {
        return this.m_UseKDTree;
    }

    protected String getKDTreeSpec() {
        KDTree c = this.getKDTree();
        if (c instanceof OptionHandler) {
            return c.getClass().getName() + " " + Utils.joinOptions(c.getOptions());
        }
        return c.getClass().getName();
    }

    public String debugLevelTipText() {
        return "The debug level to use.";
    }

    public void setDebugLevel(int d) {
        this.m_DebugLevel = d;
    }

    public int getDebugLevel() {
        return this.m_DebugLevel;
    }

    protected void checkInstances() {
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        String funcName;
        String[] funcSpec;
        String funcString;
        String optionString = Utils.getOption('I', options);
        if (optionString.length() != 0) {
            this.setMaxIterations(Integer.parseInt(optionString));
        } else {
            this.setMaxIterations(1);
        }
        optionString = Utils.getOption('M', options);
        if (optionString.length() != 0) {
            this.setMaxKMeans(Integer.parseInt(optionString));
        } else {
            this.setMaxKMeans(1000);
        }
        optionString = Utils.getOption('J', options);
        if (optionString.length() != 0) {
            this.setMaxKMeansForChildren(Integer.parseInt(optionString));
        } else {
            this.setMaxKMeansForChildren(1000);
        }
        optionString = Utils.getOption('L', options);
        if (optionString.length() != 0) {
            this.setMinNumClusters(Integer.parseInt(optionString));
        } else {
            this.setMinNumClusters(2);
        }
        optionString = Utils.getOption('H', options);
        if (optionString.length() != 0) {
            this.setMaxNumClusters(Integer.parseInt(optionString));
        } else {
            this.setMaxNumClusters(4);
        }
        optionString = Utils.getOption('B', options);
        if (optionString.length() != 0) {
            this.setBinValue(Double.parseDouble(optionString));
        } else {
            this.setBinValue(1.0);
        }
        this.setUseKDTree(Utils.getFlag("use-kdtree", options));
        if (this.getUseKDTree()) {
            funcString = Utils.getOption('K', options);
            if (funcString.length() != 0) {
                funcSpec = Utils.splitOptions(funcString);
                if (funcSpec.length == 0) {
                    throw new Exception("Invalid function specification string");
                }
                funcName = funcSpec[0];
                funcSpec[0] = "";
                this.setKDTree((KDTree)Utils.forName(KDTree.class, funcName, funcSpec));
            } else {
                this.setKDTree(new KDTree());
            }
        } else {
            this.setKDTree(new KDTree());
        }
        optionString = Utils.getOption('C', options);
        if (optionString.length() != 0) {
            this.setCutOffFactor(Double.parseDouble(optionString));
        } else {
            this.setCutOffFactor(0.0);
        }
        funcString = Utils.getOption('D', options);
        if (funcString.length() != 0) {
            funcSpec = Utils.splitOptions(funcString);
            if (funcSpec.length == 0) {
                throw new Exception("Invalid function specification string");
            }
            funcName = funcSpec[0];
            funcSpec[0] = "";
            this.setDistanceF((DistanceFunction)Utils.forName(DistanceFunction.class, funcName, funcSpec));
        } else {
            this.setDistanceF(new EuclideanDistance());
        }
        optionString = Utils.getOption('N', options);
        if (optionString.length() != 0) {
            this.setInputCenterFile(new File(optionString));
            this.m_CenterInput = new BufferedReader(new FileReader(optionString));
        } else {
            this.setInputCenterFile(new File(System.getProperty("user.dir")));
            this.m_CenterInput = null;
        }
        optionString = Utils.getOption('O', options);
        if (optionString.length() != 0) {
            this.setOutputCenterFile(new File(optionString));
            this.m_CenterOutput = new PrintWriter(new FileOutputStream(optionString));
        } else {
            this.setOutputCenterFile(new File(System.getProperty("user.dir")));
            this.m_CenterOutput = null;
        }
        optionString = Utils.getOption('U', options);
        int debugLevel = 0;
        if (optionString.length() != 0) {
            try {
                debugLevel = Integer.parseInt(optionString);
            }
            catch (NumberFormatException e) {
                throw new Exception(optionString + "is an illegal value for option -U");
            }
        }
        this.setDebugLevel(debugLevel);
        optionString = Utils.getOption('Y', options);
        if (optionString.length() != 0) {
            this.setDebugVectorsFile(new File(optionString));
        } else {
            this.setDebugVectorsFile(new File(System.getProperty("user.dir")));
            this.m_DebugVectorsInput = null;
            this.m_DebugVectors = null;
        }
        super.setOptions(options);
    }

    @Override
    public String[] getOptions() {
        int dL;
        Vector<String> result = new Vector<String>();
        result.add("-I");
        result.add("" + this.getMaxIterations());
        result.add("-M");
        result.add("" + this.getMaxKMeans());
        result.add("-J");
        result.add("" + this.getMaxKMeansForChildren());
        result.add("-L");
        result.add("" + this.getMinNumClusters());
        result.add("-H");
        result.add("" + this.getMaxNumClusters());
        result.add("-B");
        result.add("" + this.getBinValue());
        if (this.getUseKDTree()) {
            result.add("-use-kdtree");
            result.add("-K");
            result.add("" + this.getKDTreeSpec());
        }
        result.add("-C");
        result.add("" + this.getCutOffFactor());
        if (this.getDistanceF() != null) {
            result.add("-D");
            result.add("" + this.getDistanceFSpec());
        }
        if (this.getInputCenterFile().exists() && this.getInputCenterFile().isFile()) {
            result.add("-N");
            result.add("" + this.getInputCenterFile());
        }
        if (this.getOutputCenterFile().exists() && this.getOutputCenterFile().isFile()) {
            result.add("-O");
            result.add("" + this.getOutputCenterFile());
        }
        if ((dL = this.getDebugLevel()) > 0) {
            result.add("-U");
            result.add("" + this.getDebugLevel());
        }
        if (this.getDebugVectorsFile().exists() && this.getDebugVectorsFile().isFile()) {
            result.add("-Y");
            result.add("" + this.getDebugVectorsFile());
        }
        String[] options = super.getOptions();
        for (int i = 0; i < options.length; ++i) {
            result.add(options[i]);
        }
        return result.toArray(new String[result.size()]);
    }

    public String toString() {
        StringBuffer temp = new StringBuffer();
        temp.append("\nXMeans\n======\n");
        temp.append("Requested iterations            : " + this.m_MaxIterations + "\n");
        temp.append("Iterations performed            : " + this.m_IterationCount + "\n");
        if (this.m_KMeansStopped > 0) {
            temp.append("kMeans did not converge\n");
            temp.append("  but was stopped by max-loops " + this.m_KMeansStopped + " times (max kMeans-iter)\n");
        }
        temp.append("Splits prepared                 : " + this.m_NumSplits + "\n");
        temp.append("Splits performed                : " + this.m_NumSplitsDone + "\n");
        temp.append("Cutoff factor                   : " + this.m_CutOffFactor + "\n");
        double perc = this.m_NumSplitsDone > 0 ? (double)this.m_NumSplitsStillDone / (double)this.m_NumSplitsDone * 100.0 : 0.0;
        temp.append("Percentage of splits accepted \nby cutoff factor                : " + Utils.doubleToString(perc, 2) + " %\n");
        temp.append("------\n");
        temp.append("Cutoff factor                   : " + this.m_CutOffFactor + "\n");
        temp.append("------\n");
        temp.append("\nCluster centers                 : " + this.m_NumClusters + " centers\n");
        for (int i = 0; i < this.m_NumClusters; ++i) {
            temp.append("\nCluster " + i + "\n           ");
            for (int j = 0; j < this.m_ClusterCenters.numAttributes(); ++j) {
                if (this.m_ClusterCenters.attribute(j).isNominal()) {
                    temp.append(" " + this.m_ClusterCenters.attribute(j).value((int)this.m_ClusterCenters.instance(i).value(j)));
                    continue;
                }
                temp.append(" " + this.m_ClusterCenters.instance(i).value(j));
            }
        }
        if (this.m_Mle != null) {
            temp.append("\n\nDistortion: " + Utils.doubleToString(Utils.sum(this.m_Mle), 6) + "\n");
        }
        temp.append("BIC-Value : " + Utils.doubleToString(this.m_Bic, 6) + "\n");
        return temp.toString();
    }

    protected void PrCentersFD(int debugLevel) {
        if (debugLevel == this.m_DebugLevel) {
            for (int i = 0; i < this.m_ClusterCenters.numInstances(); ++i) {
                System.out.println(this.m_ClusterCenters.instance(i));
            }
        }
    }

    protected boolean TFD(int debugLevel) {
        return debugLevel == this.m_DebugLevel;
    }

    protected void PFD(int debugLevel, String output) {
        if (debugLevel == this.m_DebugLevel) {
            System.out.println(output);
        }
    }

    protected void PFD_CURR(String output) {
        if (this.m_CurrDebugFlag) {
            System.out.println(output);
        }
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 5488 $");
    }

    public static void main(String[] argv) {
        XMeans.runClusterer(new XMeans(), argv);
    }
}

