Repository /Rseslib/rseslib-3.0.2.jar:rseslib.processing.classification.parameterised.pca.LocalPcaClassifier


Back

No file description

Source code

/*
 * $RCSfile: LocalPcaClassifier.java,v $
 * $Revision: 1.27 $
 * $Date: 2007/06/30 17:30:33 $
 * $Author: wojna $
 * 
 * Copyright (C) 2002 - 2007 Logic Group, Institute of Mathematics, Warsaw University
 * 
 *  This file is part of Rseslib.
 *
 *  Rseslib is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  Rseslib is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


package rseslib.processing.classification.parameterised.pca;

import java.util.ArrayList;
import java.util.Properties;

import rseslib.processing.classification.parameterised.AbstractParameterisedClassifier;
import rseslib.structure.attribute.BadHeaderException;
import rseslib.structure.attribute.NominalAttribute;
import rseslib.structure.data.DoubleData;
import rseslib.structure.data.DoubleDataWithDecision;
import rseslib.structure.table.DoubleDataTable;
import rseslib.structure.vector.Vector;
import rseslib.structure.vector.VectorForDoubleData;
import rseslib.structure.vector.subspace.PCASubspace;
import rseslib.system.PropertyConfigurationException;
import rseslib.system.Report;
import rseslib.system.progress.EmptyProgress;
import rseslib.system.progress.Progress;

/**
 * Classifier that assigns the decision
 * calculated on the basis of principal
 * subspaces of clusters in decision classes.
 *
 * @author      Rafal Falkowski
 */
public class LocalPcaClassifier extends AbstractParameterisedClassifier
{
    /** Parameter name. */
    public static final String PRINCIPAL_SUBSPACE_DIM = "principalSubspaceDim";
    /** Parameter name. */
    private static final String OPTIMAL_DIM = "optimalDimension";
    /** Name of the property that indicates number of local linear models for each decision class. */
    public static final String NUMBER_OF_CLUSTERS = "noOfLocalLinearModels";
    /** The thereshold of the relative error of clusterization. */
    public static final double TAU = 0.000001;
    /** Parameter of clusterization distance. */
    public static final double ALFA = 1.5;
    /** Maximal number of iterations of clusterization. */
    public static final int MAX_IT = 100;
    /** The default decision defined by the largest support in a training data set. */
    private int m_nDefaultDec;
    /** Array of principal subspaces for each decision class. */
    private PCASubspace[] m_nSubspaces;
    /** Number of decisions. */
    private int m_nNoOfDec;
    /** Decision attribute. */
    NominalAttribute m_DecisionAttribute;
    /** The dimension of the data space, i.e. number of attributes of data without decision attribute. */
    private int m_nDim = 0;
    /** The number of local linear models for each decision class. */
    private int m_nNoOfClusters = 1;
    /** Parameter value, i.e. the dimension of the principal subspace. */
    private int m_nPar;
    /** Optimal dimension of the principal subspace. */
    private int m_nOptimal;

    /**
     * Constructor computes principal subspaces for each cluster in decision classes.
     *
     * @param prop                Parameters of this clasifier. If null,
     *                            parameters are loaded from the configuration directory.
     * @param trainTable          Table used to train classifier.
     * @param prog                Progress object for training process.
     * @throws InterruptedException when the user interrupts the execution.
     */
        public LocalPcaClassifier(Properties prop, DoubleDataTable trainTable, Progress prog) throws PropertyConfigurationException, BadHeaderException, InterruptedException
        {
                super(prop, OPTIMAL_DIM);
                boolean numericNotFound = true;
                for (int at = 0; numericNotFound && at < trainTable.attributes().noOfAttr(); at++)
                    if (trainTable.attributes().isConditional(at) && trainTable.attributes().isNumeric(at))
                        numericNotFound = false;
                if (numericNotFound) throw new BadHeaderException("Local pca classifier requires numerical attributes");
                prog.set("Learning LPCA classifier from training table", 1);
                NominalAttribute decAttr = trainTable.attributes().nominalDecisionAttribute();
                int[] decDistr = trainTable.getDecisionDistribution();
                m_nDefaultDec = 0;
                m_DecisionAttribute = decAttr;
                m_nNoOfDec = m_DecisionAttribute.noOfValues();
                m_nNoOfClusters = getIntProperty(NUMBER_OF_CLUSTERS);
                m_nPar = getIntProperty(PRINCIPAL_SUBSPACE_DIM);

                for (int dec = 1; dec < m_nNoOfDec; dec++)
                        if (decDistr[dec] > decDistr[m_nDefaultDec]) m_nDefaultDec = dec;

                // Okreslenie ile jest atrybutow warunkowych.
                for (int att = 0; att < trainTable.attributes().noOfAttr(); att++)
                        if (trainTable.attributes().isConditional(att))	m_nDim++;

                // Podzial tabeli na klasy decyzyjne.
//                HashMap decName = new HashMap(); // nazwy decyzji, wg ktorych klasyfikujemy obiekty
                ArrayList<DoubleData>[] decisionDistr = new ArrayList[m_nNoOfDec];
                for (int i = 0; i < m_nNoOfDec; i++)
                        decisionDistr[i] = new ArrayList<DoubleData>();
                for (DoubleData dObj : trainTable.getDataObjects())
                {
/*                        Integer decNam = new Integer(((DoubleDataWithDecision)dObj).getDecision());
                        if (!decName.containsKey(decNam))
                        {
                                decName.put(decNam, new Integer(dec));
                                dec++;
                        }
                        decisionDistr[((Integer)decName.get(decNam)).intValue()].add(dObj);*/
                        decisionDistr[trainTable.attributes().nominalDecisionAttribute().localValueCode(((DoubleDataWithDecision)dObj).getDecision())].add(dObj);
                }

                // Wygenerowanie podprzestrzeni glownych i ich macierzy projekcji dla poszczegolnych klas decyzyjnych.
                m_nSubspaces = new PCASubspace[m_nNoOfDec*m_nNoOfClusters];
                for (int i = 0; i < m_nNoOfDec; i++) // dla kazdej decyzji
                {
                        // przygotowanie duzej petli w metodzie
                        ArrayList<DoubleData>[] compUnit = new ArrayList[m_nNoOfClusters]; // agenci obliczeniowi
                        Vector[] centroid = new VectorForDoubleData[m_nNoOfClusters]; // centroidy klastrow
                        for (int cl = 0; cl < m_nNoOfClusters; cl++) //	inicjalizacja klastrow
                        {
                          compUnit[cl] = new ArrayList<DoubleData>();
                          // losowo wybrane wektory z i-tej klasy decyzyjnej staja sie centoidami
                          int pom = (int)(Math.random()*decisionDistr[i].size());
                          centroid[cl] = new VectorForDoubleData((DoubleData)decisionDistr[i].get(pom));
                        }
                        int noOfEpoch = 0;
                        double clusterErr = 0;
                        // DUZA PETLA W METODZIE:
                        // odleglosc najblizszego klastra z poprzedniej epoki dla kazdego obiektu inna!!!
                        double[] rhoCentroidPrev = new double[decisionDistr[i].size()];
                        double[] rhoPrev = new double[decisionDistr[i].size()];
                        for (int k = 0; k < decisionDistr[i].size(); k++)
                        {
                          rhoCentroidPrev[k] = -1;
                          rhoPrev[k] = -1;
                        }
                        // numer najblizszego klastra z poprzedniej epoki dla kazdego obiektu inny!!!
                        int[] bestClNo = new int[decisionDistr[i].size()];

                        while (1 > 0)
                        {
                          noOfEpoch++;
                          double prevClusterErr = clusterErr;
                          clusterErr = 0;
                          // licznik obiektow
                          int k = 0;
                          for (DoubleData dObj : decisionDistr[i])
                          {
                            double rhoCurr = -1;
                            int bCN = 0;
                            if (noOfEpoch == 1)
                            {
                              for (int cl = 0; cl < m_nNoOfClusters; cl++)
                              {
                                Vector dVec = new VectorForDoubleData(dObj);
                                dVec.subtract(centroid[cl]);
                                double rho1 = dVec.euclideanNorm();
                                if (rhoCurr < 0 || rhoCurr > rho1)
                                {
                                  rhoCurr = rho1;
                                  rhoCentroidPrev[k] = rho1;
                                  rhoPrev[k] = rho1;
                                  bestClNo[k] = cl;
                                }
                              }
                            } else
                            {
                                double rhoCentr = 0;
                                for (int cl = 0; cl < m_nNoOfClusters; cl++)
                                {
                                  Vector dVec = new VectorForDoubleData(dObj);
                                  double rho1 = m_nSubspaces[i*m_nNoOfClusters+cl].euclideanDist(dVec);
                                  if (rhoCurr < 0 || rhoCurr > rho1)
                                  {
                                    rhoCurr = rho1;
                                    bCN = cl;
                                    dVec.subtract(centroid[cl]);
                                    rhoCentr = dVec.euclideanNorm();
                                  }
                                }
                                if (rhoCentr < ALFA*rhoCentroidPrev[k])
                                {
                                  rhoCentroidPrev[k] = rhoCentr;
                                  bestClNo[k] = bCN;
                                  rhoPrev[k] = rhoCurr;
                                }
                              }
                              clusterErr += rhoPrev[k];
                              compUnit[bestClNo[k]].add(dObj);
                              k++;
                            }
                            for (int cl = 0; cl < m_nNoOfClusters; cl++)
                            {
                              if (compUnit[cl].isEmpty())
                              {
                                int pom = (int)(Math.random()*decisionDistr[i].size());
                                DoubleData dObj = (DoubleData)decisionDistr[i].get(pom);
                                centroid[cl] = new VectorForDoubleData(dObj);
                                compUnit[cl].add(dObj);
                                rhoCentroidPrev[pom] = 0;
                                rhoPrev[pom] = 0;
                                bestClNo[pom] = cl;
                              }
                              m_nSubspaces[i * m_nNoOfClusters + cl] = new PCASubspace(compUnit[cl], m_nDim, m_nPar);
                              centroid[cl] = new VectorForDoubleData((Vector)m_nSubspaces[i * m_nNoOfClusters + cl].getCentroid());
                            }
                            if (Math.abs(prevClusterErr - clusterErr)/clusterErr < TAU || noOfEpoch >= MAX_IT) break;
                            for (int cl = 0; cl < m_nNoOfClusters; cl++)
                              compUnit[cl].clear();
                        }
                }
                learnOptimalParameterValue(trainTable, new EmptyProgress());
                m_nOptimal = getIntProperty(OPTIMAL_DIM);
                prog.step();
    }

        /**
         * Classifies a test object on the basis of the dimension of the principal subspace
         * and number of clusters in each decision class.
         *
         * @param dObj         Test object.
         * @return             Array of assigned decisions, indices correspond to parameters values.
         */

        public double[] classifyWithParameter(DoubleData dObj)
        {
                double[] decisions = new double[m_nPar];
                int[] bestDec = new int[decisions.length];
                double[] resid = new double[decisions.length];
                double[] minResid = new double[decisions.length];
                for (int n = 1; n < decisions.length; n++)
                {
                        resid[n] = 0;
                        minResid[n] = -1;
                        bestDec[n] = m_nDefaultDec;
                        decisions[n] = m_DecisionAttribute.globalValueCode(bestDec[n]);;
                }
                Vector x = new VectorForDoubleData(dObj);
                for (int i = 0; i < m_nNoOfDec*m_nNoOfClusters; i++)
                {
                        Vector[] px = m_nSubspaces[i].projections(new VectorForDoubleData(dObj));
                        for (int n = 1; n < decisions.length; n++)
                        {
                                px[n].subtract(x);
                                resid[n] = px[n].squareEuclideanNorm();
                                if (minResid[n] < 0 || minResid[n] > resid[n])
                                {
                                        minResid[n] = resid[n];
                                        bestDec[n] = i / m_nNoOfClusters;
                                }
                                decisions[n] = m_DecisionAttribute.globalValueCode(bestDec[n]);
                        }
                }
                return decisions;
        }


        /**
         * Assigns a decision to a single test object.
         *
         * @param dObj  Test object.
         * @return      Assigned decision.
         */
        public double classify(DoubleData dObj)
        {
            return classifyWithParameter(dObj)[m_nOptimal];
        }

        /**
          * Calculates statistics.
          */
        public void calculateStatistics()
        {
            try
            {
                addToStatistics("Number of clusters in each decision class", Integer.toString(getIntProperty(NUMBER_OF_CLUSTERS)));
                addToStatistics("Principal subspace dimension in each cluster", Integer.toString(m_nOptimal));
            }
            catch (PropertyConfigurationException e)
            {
                Report.exception(e);
            }
        }

        /**
         * Resets statistics.
         */
        public void resetStatistics()
        {
        }
}

Copyright © 2008-2011 by TunedIT
Design by luksite