/*
* Copyright (C) 2009 by TunedIT
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package org.tunedit.examples.debellor;
import java.util.*;
import org.debellor.core.*;
import org.debellor.core.data.SymbolicFeature;
/**
* Example implementation of a majority classifier in Debellor architecture.
* The classifier assigns always the same decision - most frequent in training data.
*/
public class MajorityClassifier extends Cell {
/**
* Specification of type of decisions supplied with training data.
* May contain a dictionary of possible values.
*/
private DataType decisionType;
/**
* The most frequent decision occuring in training data.
* It will be assigned to new samples in onNext().
*/
private SymbolicFeature decision;
/** Stream of input data used in open/next/close methods. */
private Stream input;
public MajorityClassifier() {
super(true); // yes, this cell is trainable
}
@Override
protected void onLearn() throws Exception {
// Open stream of training samples. Check if data type is correct
Stream input = openInputStream();
decisionType = input.sampleType.decision;
if (decisionType.dataClass != SymbolicFeature.class)
throw new Exception(
"MajorityClassifier can handle only symbolic decisions");
Map<String, Integer> counts = new HashMap<String, Integer>();
// Scan all training samples and count occurences of different decisions.
Sample s;
while ((s = input.next()) != null) {
if (s.decision == null)
continue;
SymbolicFeature symb = s.decision.asSymbolicFeature();
Integer count = counts.get(symb.value);
if (count == null)
count = 0;
counts.put(symb.value, count + 1);
}
input.close();
// Find the decision with the biggest count.
int bestCount = 0;
String bestDecision = null;
for (Map.Entry<String, Integer> stats : counts.entrySet()) {
if (stats.getValue() > bestCount) {
bestDecision = stats.getKey();
bestCount = stats.getValue();
}
}
if (bestCount == 0)
throw new Exception(
"No training samples or all of them have missing decision");
decision = new SymbolicFeature(bestDecision, decisionType);
}
@Override
protected Sample.SampleType onOpen() throws Exception {
input = openInputStream();
return input.sampleType.setDecision(decisionType);
}
@Override
protected Sample onNext() throws Exception {
Sample s = input.next();
if (s == null)
return null;
return s.setDecision(decision);
}
@Override
protected void onClose() throws Exception {
input.close();
}
@Override
protected void onErase() throws Exception {
decisionType = null;
decision = null;
}
}