Repository /Rseslib/rseslib-3.0.2.jar:rseslib.processing.missing.RoughSetNonInvasiveImputation


Back

No file description

Source code

/*
 * $RCSfile: RoughSetNonInvasiveImputation.java,v $
 * $Revision: 1.1 $
 * $Date: 2007/08/10 12:00:24 $
 * $Author: wojna $
 * 
 * Copyright (C) 2002 - 2007 Logic Group, Institute of Mathematics, Warsaw University
 * 
 *  This file is part of Rseslib.
 *
 *  Rseslib is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  Rseslib is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


package rseslib.processing.missing;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Properties;

import rseslib.structure.data.DoubleData;
import rseslib.structure.indiscernibility.Indiscernibility;
import rseslib.structure.indiscernibility.SymmetricSimilarityIndiscernibility;
import rseslib.structure.table.DoubleDataTable;
import rseslib.system.ConfigurationWithStatistics;
import rseslib.system.PropertyConfigurationException;

/**
 * Non-invasive data imputation based on rough sets. See:
 * G. Gediga, I. Duentsch,
 * "Rough Set Data Analysis (RDA): Non invasive data imputation".
 * 
 * @author Rafal Latkowski
 */
public class RoughSetNonInvasiveImputation  extends ConfigurationWithStatistics implements Imputation
{
    /**
     * Indiscernibility relation used
     */
    private Indiscernibility m_oIndRelation = new SymmetricSimilarityIndiscernibility();

    /**
     * Limit maximal iterations
     */
    private static final String s_strMaxIterationLimit = "MAX_ITERATION_LIMIT";
    private int m_nMaxIterationLimit;

	/* Statistics */
    private int m_nIterations=0;
    private ArrayList<Integer> m_alIterationStatistics = new ArrayList<Integer>();

    /**
     * Constructor.
     * 
     * @param prop		Properties.
     * @throws PropertyConfigurationException
     */
    public RoughSetNonInvasiveImputation(Properties prop) throws PropertyConfigurationException
    {
        super(prop);
        m_nMaxIterationLimit=getIntProperty(s_strMaxIterationLimit);
    }

	/**
	 * Returns a table with imputed missing values.
	 * 
	 * @param aTable	Original data table with missing values.
	 * @return			Table with imputed missing values.
	 */
    public DoubleDataTable imputation(DoubleDataTable table)
    {
        boolean flag = true;
        while (flag && m_nIterations<m_nMaxIterationLimit)
        {
            m_nIterations++;
            DoubleDataTable imput = (DoubleDataTable)table.clone();
            flag = imputationIteration(imput,table);
            table = imput;
        }
        return table;
    }

    private boolean imputationIteration(DoubleDataTable imputationTable,DoubleDataTable referenceTable)
    {
        boolean imputed_flag = false;
        int cnt = 0;
        int decision = imputationTable.attributes().decision();
        for (DoubleData object : imputationTable.getDataObjects())
        {
            boolean has_missing = false;
            for (int i=0;i<object.attributes().noOfAttr();i++)
                if (i!=decision)
                if (Double.isNaN(object.get(i))) has_missing=true;
            // if an object contain a missing value
            if (has_missing)
            {
                // create temporary dictionaries for values 
                HashSet<Double> values[] = new HashSet[object.attributes().noOfAttr()];
                for (int i=0;i<object.attributes().noOfAttr();i++)
                    values[i] = new HashSet<Double>();
                // for each similar object fill its values into dictionaries
                for (DoubleData object2 : referenceTable.getDataObjects())
                    if (m_oIndRelation.similar(object,object2))
                    {
                        for (int i=0;i<object.attributes().noOfAttr();i++)
                            if (!Double.isNaN(object2.get(i)))
                            values[i].add(object2.get(i));                        
                    }
                // if value is missing and dictionary is consistent (size=1) then impute
                for (int i=0;i<object.attributes().noOfAttr();i++)
                    if (i!=decision)
                    if (Double.isNaN(object.get(i)))
                    if (values[i].size()==1)
                    {
                        object.set(i,values[i].iterator().next());
                        cnt++;
                        imputed_flag=true;
                    }
            }
        }
        m_alIterationStatistics.add(cnt);
        return imputed_flag;
    }
    
    public int getNumberOfIterations()
    {
        return m_nIterations;
    }
    
    public ArrayList getImputedValueCounts()
    {
        return m_alIterationStatistics;
    }

    public void calculateStatistics()
    {
    }

    public void resetStatistics()
    {
        m_nIterations=0;
        m_alIterationStatistics=new ArrayList<Integer>();
    }
}

Copyright © 2008-2011 by TunedIT
Design by luksite