Repository /Rseslib/rseslib-3.0.2.jar:rseslib.processing.discretization.MDGlobalDiscretizationProvider


Back

No file description

Source code

/*
 * $RCSfile: MDGlobalDiscretizationProvider.java,v $
 * $Revision: 1.1 $
 * $Date: 2009/05/14 10:10:13 $
 * $Author: wojna $
 * 
 * Copyright (C) 2002 - 2007 Logic Group, Institute of Mathematics, Warsaw University
 * 
 *  This file is part of Rseslib.
 *
 *  Rseslib is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  Rseslib is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */


package rseslib.processing.discretization;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.TreeSet;

import rseslib.processing.transformation.FunctionBasedAttributeTransformer;
import rseslib.processing.transformation.TransformationProvider;
import rseslib.processing.transformation.Transformer;
import rseslib.structure.attribute.ArrayHeader;
import rseslib.structure.attribute.Attribute;
import rseslib.structure.attribute.Header;
import rseslib.structure.attribute.NumericAttribute;
import rseslib.structure.data.DoubleData;
import rseslib.structure.function.doubleval.AttributeDoubleFunction;
import rseslib.structure.table.DoubleDataTable;

/**
 * This class represents a method of discretizing continuous attributes based on
 * a local MD heuristic.
 * 
 * @author Marcin Jałmużna
 */
public class MDGlobalDiscretizationProvider implements TransformationProvider {

	private HashMap<Integer, ArrayList<Double>> cuts;
	private ArrayList<Record> intervals;
	private HashMap<Integer, TreeSet<Double>> possibleCuts;

    /**
     * Creates discretization cuts for one attribute.
     * Main method of this discretization provider.
     * 
     * @param attribute				Selected attribute for discretization.
     * @param number_of_intervals 	Not important
     * @param table 				Data used for estimating the best cuts.
     * @return Discretization cuts 
     */
	private void discretize(DoubleDataTable table) {
		
		int nOfAttr = table.attributes().noOfAttr();
		int decAttr = table.attributes().decision();
		

		possibleCuts=new HashMap<Integer, TreeSet<Double>>();
		for (int i = 0; i < nOfAttr; i++)
			if (i != decAttr) {				
				TreeSet<Double> tmp = new TreeSet<Double>();
				for (DoubleData dd : table.getDataObjects()) {
					tmp.add(dd.get(i));
				}
				possibleCuts.put(i, tmp);
			}

		intervals = new ArrayList<Record>();
		for (int i = 0; i < table.noOfObjects(); i++){
			Double dec1 = table.getDataObjects().get(i).get(decAttr);
			for (int j = i + 1; j < table.noOfObjects(); j++) {				
				Double dec2 = table.getDataObjects().get(j).get(decAttr);				
				if (!dec1.equals(dec2)) {
					Record r = new Record();
					r.setStart(new HashMap<Integer, Double>());
					r.setEnd(new HashMap<Integer, Double>());

					for (int i2 = 0; i2 < nOfAttr; i2++) {
						if (i2 != decAttr) {
							Double val_i = table.getDataObjects().get(i)
									.get(i2);
							Double val_j = table.getDataObjects().get(j)
									.get(i2);
							if (val_i > val_j) {
								Double t = val_i;
								val_i = val_j;
								val_j = t;
							}
							r.getStart().put(i2, val_i);
							r.getEnd().put(i2, val_j);
						}
					}
					intervals.add(r);
				}
			}
		}

		while(intervals.size()>0){
			int maxVal=0;
			int maxAttr=-1;
			Double maxCut=Double.MIN_VALUE;
			for(Integer i: possibleCuts.keySet()){
				for(Double d:possibleCuts.get(i)){
					int sum=0;
					for(Record r:intervals)
						if((r.getStart().get(i)<=d)&&(r.getEnd().get(i)>d))
							sum++;						
					if(sum>maxVal){
						maxVal=sum;
						maxAttr=i;
						maxCut=d;
					}					
				}
			}
			// if exist indiscernible records 
			if(maxVal==0)
				break;
			Double higher =possibleCuts.get(maxAttr).higher(maxCut);
			cuts.get(maxAttr).add((maxCut+higher)/2.0);
			possibleCuts.get(maxAttr).remove(maxCut);
			
			ArrayList<Record> tmp = new ArrayList<Record>();
			for(Record r:intervals)
				if((r.getStart().get(maxAttr)<=maxCut)&&(r.getEnd().get(maxAttr)>maxCut))
					tmp.add(r);
			intervals.removeAll(tmp);
	
		}
	}

	private class Record {
		private HashMap<Integer, Double> start = new HashMap<Integer, Double>();
		private HashMap<Integer, Double> end = new HashMap<Integer, Double>();

		public void setStart(HashMap<Integer, Double> start) {
			this.start = start;
		}

		public HashMap<Integer, Double> getStart() {
			return start;
		}

		public void setEnd(HashMap<Integer, Double> end) {
			this.end = end;
		}

		public HashMap<Integer, Double> getEnd() {
			return end;
		}

	}
	
    /**
     * Method that generate discretization based on data table.
     * 
     * @param table 		Data table to estimate the discretization.
     * @return attribute 	Discretization estimated on data table.
     */
	public Transformer generateTransformer(DoubleDataTable table) {
		Attribute[] attributes = new Attribute[table.attributes().noOfAttr()];
		AttributeDoubleFunction discr_table[] = new AttributeDoubleFunction[table
				.attributes().noOfAttr()];

		cuts = new HashMap<Integer, ArrayList<Double>>();
		for (int i = 0; i < table.attributes().noOfAttr(); i++) {
			if (table.attributes().isConditional(i)
					&& table.attributes().isNumeric(i)) {
				cuts.put(i, new ArrayList<Double>());
			}
		}
		discretize(table);

		for (int i = 0; i < attributes.length; i++)
			if (table.attributes().isConditional(i)
					&& table.attributes().isNumeric(i)) {
				Collections.sort(cuts.get(i));

				double[] c = new double[cuts.get(i).size()];
				for (int j = 0; j < c.length; j++) {
					c[j] = cuts.get(i).get(j);
					//System.out.println("attribute: " + i + " cut: " + c[j]);
				}
				NumericAttributeDiscretization disc = new NumericAttributeDiscretization(
						i, (NumericAttribute) table.attributes().attribute(i),
						c);
				attributes[i] = disc.getAttribute();
				discr_table[i] = disc;
			} else
				attributes[i] = table.attributes().attribute(i);

		Header new_header = new ArrayHeader(attributes, table.attributes()
				.missing());

		return new FunctionBasedAttributeTransformer(new_header, discr_table);
	}
}
Copyright © 2008-2011 by TunedIT
Design by luksite