Repository /Rseslib/rseslib-3.0.1.jar:rseslib.processing.discretization.HistogramDiscretizationProvider


Back

No file description

Source code

/*
 * $RCSfile: HistogramDiscretizationProvider.java,v $
 * $Revision: 1.15 $
 * $Date: 2007/06/30 17:30:33 $
 * $Author: wojna $
 * 
 * Copyright (C) 2002 - 2007 Logic Group, Institute of Mathematics, Warsaw University
 * 
 *  This file is part of Rseslib.
 *
 *  Rseslib is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  Rseslib is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


package rseslib.processing.discretization;

import java.util.Arrays;
import java.util.Collection;

import rseslib.processing.filtering.MissingValuesFilter;
import rseslib.processing.transformation.TransformationProvider;
import rseslib.structure.data.DoubleData;
import rseslib.structure.table.DoubleDataTable;

/**
 * HistogramDiscretizationProvider generates discretization based on attribute histogram.
 * Frequencies (centiles) of a numeric and conditional attribute are 
 * calculated and values of attributes are divided into a specified number
 * of equally sized intervals. First and last interval are opened up to the
 * negative and positive infinity, respectively.
 * 
 * @author Rafal Latkowski
 */
public class HistogramDiscretizationProvider extends AbstractDiscretizationProvider implements TransformationProvider
{
    /**
     * Constructs initial object for generating discretization.
     * This object do not require any initialization. Only a default
     * number of intervals is set.
     * 
     * @param number_of_intervals default number of intervals
     */
    public HistogramDiscretizationProvider(int number_of_intervals)
    {
        super(number_of_intervals);
    }

    /**
     * Creates discretization cuts for one attribute.
     * Main method of this discretization provider.
     * 
     * @param attribute				Selected attribute for discretization.
     * @param number_of_intervals 	Desired number of intervals.
     * @param table data			Used for estimating the best cuts.
     * @return Discretization cuts with specified number of intervals
     */
    double[] generateCuts(int attribute, int number_of_intervals, DoubleDataTable table)
    {
    	Collection<DoubleData> filtered = MissingValuesFilter.select(table.getDataObjects(), attribute);
        double data[] = new double[filtered.size()];
        int i = 0;
        for (DoubleData obj : filtered) data[i++] = obj.get(attribute);
        Arrays.sort(data);
    	double[] cuts = new double[number_of_intervals-1];
        for (i=0; i<cuts.length; i++)
            cuts[i] = data[(i+1)*data.length/number_of_intervals];
        return cuts;
    }
}

Copyright © 2008-2011 by TunedIT
Design by luksite