Repository /Examples/MajorityClassifier_debellor.jar:org.tunedit.examples.debellor.MajorityClassifier


Back

No file description

Source code

/*
 *  Copyright (C) 2009 by TunedIT
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package org.tunedit.examples.debellor;

import java.util.*;
import org.debellor.core.*;
import org.debellor.core.data.SymbolicFeature;

/**
 * Example implementation of a majority classifier in Debellor architecture.
 * The classifier assigns always the same decision - most frequent in training data.
 */
public class MajorityClassifier extends Cell {

	/**
	 * Specification of type of decisions supplied with training data.
	 * May contain a dictionary of possible values. 
	 */
	private DataType decisionType;

	/**
	 * The most frequent decision occuring in training data.
	 * It will be assigned to new samples in onNext(). 
	 */
	private SymbolicFeature decision;

	/** Stream of input data used in open/next/close methods.  */
	private Stream input;

	public MajorityClassifier() {
		super(true);	 // yes, this cell is trainable
	}

	@Override
	protected void onLearn() throws Exception {
		// Open stream of training samples. Check if data type is correct
		Stream input = openInputStream();
		decisionType = input.sampleType.decision;
		if (decisionType.dataClass != SymbolicFeature.class)
			throw new Exception(
					"MajorityClassifier can handle only symbolic decisions");

		Map<String, Integer> counts = new HashMap<String, Integer>();

		// Scan all training samples and count occurences of different decisions.
		Sample s;
		while ((s = input.next()) != null) {
			if (s.decision == null)
				continue;
			SymbolicFeature symb = s.decision.asSymbolicFeature();
			Integer count = counts.get(symb.value);
			if (count == null)
				count = 0;
			counts.put(symb.value, count + 1);
		}
		input.close();

		// Find the decision with the biggest count.
		int bestCount = 0;
		String bestDecision = null;
		for (Map.Entry<String, Integer> stats : counts.entrySet()) {
			if (stats.getValue() > bestCount) {
				bestDecision = stats.getKey();
				bestCount = stats.getValue();
			}
		}

		if (bestCount == 0)
			throw new Exception(
					"No training samples or all of them have missing decision");

		decision = new SymbolicFeature(bestDecision, decisionType);
	}

	@Override
	protected Sample.SampleType onOpen() throws Exception {
		input = openInputStream();
		return input.sampleType.setDecision(decisionType);
	}

	@Override
	protected Sample onNext() throws Exception {
		Sample s = input.next();
		if (s == null)
			return null;
		return s.setDecision(decision);
	}

	@Override
	protected void onClose() throws Exception {
		input.close();
	}

	@Override
	protected void onErase() throws Exception {
		decisionType = null;
		decision = null;
	}

}
Copyright © 2008-2011 by TunedIT
Design by luksite