Repository /Rseslib/rseslib-3.0.2.jar:rseslib.processing.discretization.OneRuleDiscretizationProvider


Back

No file description

Source code

/*
 * $RCSfile: OneRuleDiscretizationProvider.java,v $
 * $Revision: 1.1 $
 * $Date: 2009/05/14 10:10:13 $
 * $Author: wojna $
 * 
 * Copyright (C) 2002 - 2007 Logic Group, Institute of Mathematics, Warsaw University
 * 
 *  This file is part of Rseslib.
 *
 *  Rseslib is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  Rseslib is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


package rseslib.processing.discretization;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.TreeMap;

import rseslib.structure.data.DoubleData;
import rseslib.structure.table.DoubleDataTable;
/**
 * This class represents Holte's One Rule discretization algorithm that
 * discretize numeric attributes.
 * 
 * @author Marcin Jałmużna
 */
public class OneRuleDiscretizationProvider extends
		AbstractDiscretizationProvider {
	private int minFreq;
	public OneRuleDiscretizationProvider(int minFreq) {
		super(0);
		this.minFreq=minFreq;
	}
    /**
     * Creates discretization cuts for one attribute.
     * Main method of this discretization provider.
     * 
     * @param attribute				Selected attribute for discretization.
     * @param number_of_intervals 	Not important
     * @param table 				Data used for estimating the best cuts.
     * @return Discretization cuts 
     */
	@Override
	double[] generateCuts(int attribute, int number_of_intervals,
			DoubleDataTable table) {
		TreeMap<Double, HashMap<Double, Integer>> possibleCuts=new TreeMap<Double, HashMap<Double, Integer>>();
		for (DoubleData dd : table.getDataObjects()) {
			Double val=dd.get(attribute);
			Double dec = dd.get(dd.attributes().decision());
			if(possibleCuts.containsKey(val)){
				if(possibleCuts.get(val).containsKey(dec)){
					possibleCuts.get(val).put(dec, possibleCuts.get(val).get(dec)+1);
				}
				else{
					possibleCuts.get(val).put(dec, 1);
				}
			}
			else{
				HashMap<Double, Integer> tmp= new HashMap<Double, Integer>();
				tmp.put(dec, 1);
				possibleCuts.put(val, tmp);
			}
		}
		ArrayList<Double> cuts=new ArrayList<Double>();
		
		
		HashMap<Double, Integer> sum = new HashMap<Double, Integer>();
		Double decision=null;
		boolean enough=false;
		for(Double val: possibleCuts.keySet()){
			HashMap<Double, Integer> tmp = possibleCuts.get(val);			
			if(enough){
				if((tmp.keySet().size()!=1)||(!tmp.containsKey(decision))){
					cuts.add((val+possibleCuts.lowerKey(val))/2.0);
					enough=false;
					sum=tmp;	
				}
			}
			else{
				add(tmp, sum);
			}
			if(!enough){
				for(Double dec: sum.keySet()){
					if(sum.get(dec)>=minFreq){
						enough=true;
						decision=dec;
						break;
					}
				}
			}		
		}
		double ret[] = new double[cuts.size()];
		for(int i=0; i<cuts.size(); i++){
			ret[i]=cuts.get(i);
			//System.out.println("attribute: " + attribute + " cut: " + cuts.get(i));
		}
		return ret;
	}
	/** 
	 * Join of two maps.
	 * 
	 * @param x first Map
	 * @param y second Map
	 */
	private void add(HashMap<Double, Integer> x, HashMap<Double, Integer> y)
	{
		for(Double dec: x.keySet()){
			if(y.containsKey(dec)){
				y.put(dec, y.get(dec)+x.get(dec));
			}
			else{
				y.put(dec, x.get(dec));
			}
		}
	}
}

Copyright © 2008-2011 by TunedIT
Design by luksite