/*
* Debellor
*
* Copyright (C) 2008-2009 by Marcin Wojnarski
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package org.debellor.weka;
import org.debellor.core.Cell;
import org.debellor.core.DataObject;
import org.debellor.core.DataType;
import org.debellor.core.Sample;
import org.debellor.core.Sample.SampleType;
import org.debellor.core.data.NumericFeature;
import org.debellor.core.data.SymbolicFeature;
import org.debellor.core.exception.DebellorException;
import weka.classifiers.Classifier;
import weka.core.Instance;
import weka.core.Instances;
/**
* Debellor wrapper for Weka classifiers
* - subclasses of <code>weka.classifiers.Classifier</code>.
* Weka class implementing the classifier is identified by its name
* given in long (with package specification) or short form.
* This name must exactly match (including case)
* the corresponding Weka class name.
* Available names can be found by running Weka GUI (Explorer or Experimenter)
* and opening the selection list of classifiers.
* Alternatively, you can browse Weka code
* (subpackages of <code>weka.classifiers</code> package).
*
* <p>Parameters (or <i>options</i> in Weka's terminology)
* can be passed to Weka classifier in a standard
* Debellor's way, by calling {@link #set} or {@link #setParameters}.
* Parameters should have names of corresponding Weka options,
* exactly in the same form as is printed in Weka GUI
* in the dialog for setting classifier options.
* <p>
* For example, the "J48" classifier, which implements C4.5 decision tree,
* has options "binarySplits", "confidenceFactor", "debug", ...
* You can set their values in the following way:
*
* <pre>
* Cell learner = new WekaClassifier("J48");
* learner.set("binarySplits", "true");
* learner.set("confidenceFactor", "0.45");
* </pre>
*
* If you don't specify some option, its default value will be used.
*
* @author Marcin Wojnarski
*
*/
public class WekaClassifier extends Cell {
private Class<? extends Classifier> learnerClass;
private Classifier learner;
SampleType type;
DataType decisionType;
/** Empty data set, just to hold Weka representation of attribute types */
Instances instances;
protected Stream input;
public WekaClassifier(Class<? extends Classifier> learnerClass) throws Exception {
init(learnerClass);
}
/**
* @param className Name of Weka class implementing the classifier algorithm,
* given in long (with package specification) or short form.
* @throws Exception
*/
@SuppressWarnings("unchecked")
public WekaClassifier(String className) throws Exception {
String[] paths = {"",
"weka.classifiers.",
"weka.classifiers.lazy.",
"weka.classifiers.trees.",
"weka.classifiers.trees.lmt.",
"weka.classifiers.trees.m5.",
"weka.classifiers.bayes.",
"weka.classifiers.bayes.net.",
"weka.classifiers.rules.",
"weka.classifiers.misc.",
"weka.classifiers.functions.",
"weka.classifiers.meta.",
};
boolean found = false;
Class<? extends Classifier> cls = null;
for(String p : paths) {
try {
cls = (Class<? extends Classifier>) Class.forName(p + className);
if(!Classifier.class.isAssignableFrom(cls))
throw new Exception();
if(found) // matching class was already found, this one is the 2nd
throw new AmbiguousClassNameException(className, this.getClass());
found = true;
} catch(AmbiguousClassNameException e) {
throw e;
} catch(Exception e) {}
}
if(!found)
throw new IllegalArgumentException("Weka classifier class not found: " + className);
init(cls);
}
private void init(Class<? extends Classifier> learnerClass) throws Exception {
this.learnerClass = learnerClass;
setAvailableParams(ParamConverter.listParameters(learnerClass.newInstance()));
}
// @Override
// public Parameters listParams() {
// Parameters par = new Parameters();
// Enumeration options;
// try {
// options = learnerClass.newInstance().listOptions();
// } catch(Exception e) {
// throw new DebellorError(e);
// }
// while(options.hasMoreElements()) {
// Option opt = (Option) options.nextElement();
// par.set(opt.name(), "");
// // TODO defaultParameters()
// }
// return par;
// }
@Override
protected void onLearn() throws Exception {
learner = learnerClass.newInstance();
ParamConverter.setWekaOptions(learner, parameters);
input = openInputStream();
type = input.sampleType;
decisionType = (DataType) type.decision;
instances = DataConverter.instancesFrom(type);
// read samples
Sample s;
while((s = input.next()) != null)
instances.add(DataConverter.instanceFrom(s, type));
input.close();
learner.buildClassifier(instances);
// remove all training samples, keep only info on attribute types for use during testing
instances.delete();
}
@Override
protected void onErase() throws DebellorException {
learner = null;
type = null;
decisionType = null;
instances = null;
}
@Override
protected SampleType onOpen() throws Exception {
input = openInputStream();
return type;
}
@Override
protected Sample onNext() throws Exception
{
Sample s = input.next();
if(s == null) return null;
Instance instance = DataConverter.instanceFrom(s, type);
instance.setDataset(instances); // set attribute types for 'instance'
double wekaDecision = learner.classifyInstance(instance);
DataObject debellorDecision = null;
if(wekaDecision != Instance.missingValue()) {
if(decisionType.dataClass == NumericFeature.class)
debellorDecision = new NumericFeature(wekaDecision);
else if(decisionType.dataClass == SymbolicFeature.class)
debellorDecision = new SymbolicFeature((int)wekaDecision, decisionType);
}
return s.setDecision(debellorDecision);
}
@Override
protected void onClose() throws Exception {
input.close();
}
@Override
public String toString() {
String s = "empty";
if(learner != null) {
s = learner.toString().trim();
int newline = s.indexOf("\n");
if(newline >= 0)
s = s.substring(0, newline) + "...";
}
//String n = (name != null) && (!name.equals("")) ? (name + ": ") : "WekaClassifier";
String n = "WekaClassifier";
return n + "[" + s + "]";
}
}