/*
* Debellor
*
* Copyright (C) 2008-2009 by Marcin Wojnarski
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package org.debellor.rseslib;
import java.util.Properties;
import org.debellor.core.Cell;
import org.debellor.core.DataObject;
import org.debellor.core.DataType;
import org.debellor.core.Sample;
import org.debellor.core.Sample.SampleType;
import org.debellor.core.exception.DebellorException;
import rseslib.processing.classification.Classifier;
import rseslib.processing.classification.ClassifierFactory;
import rseslib.structure.attribute.Header;
import rseslib.structure.data.DoubleData;
import rseslib.structure.table.ArrayListDoubleDataTable;
import rseslib.structure.table.DoubleDataTable;
import rseslib.system.progress.EmptyProgress;
/**
* Debellor wrapper for Rseslib classifiers - classes implementing
* <code>rseslib.processing.classification.Classifier</code> interface.
* Rseslib class is identified by its name
* given in long (with package specification) or short form.
* This name must exactly match (including case)
* the corresponding Rseslib class name.
* Available names can be found by running Rseslib GUI (like VRseslib)
* and opening the selection list of classifiers.
* Alternatively, you can browse Rseslib code
* (subpackages of <code>rseslib.processing.classification</code> package)
* for classes implementing
* <code>rseslib.processing.classification.Classifier</code> interface.
*
* <p>Parameters (or <i>properties</i> in Rseslib's terminology)
* can be passed to Rseslib classifier in a standard
* Debellor's way, by calling {@link #set} or {@link #setParameters}.
* Parameters should have names of corresponding Rseslib properties,
* exactly in the same form as is printed in Rseslib GUI
* in the dialog for setting classifier properties.
* <p>
* For example, the "C45" classifier, which implements C4.5 decision tree,
* has properties "noOfPartsForBuilding", "noOfPartsForPruning", "pruning".
* You can set their values in the following way:
*
* <pre>
* Cell learner = new RseslibClassifier("C45");
* learner.set("noOfPartsForPruning", "3");
* learner.set("pruning", "true");
* </pre>
*
* If you don't specify some parameter, its default value will be used.
*
* @author Marcin Wojnarski
*
*/
public class RseslibClassifier extends Cell {
private Class<? extends Classifier> learnerClass;
private Classifier learner;
/** dataType in Debellor format */
SampleType type;
DataType decisionType;
/** dataType in Rseslib format */
private Header header;
protected Stream input;
public RseslibClassifier(Class<? extends Classifier> learnerClass) throws RseslibConversionException {
init(learnerClass);
}
/**
* @param className Name of Rseslib class implementing the classifier algorithm,
* given in long (with package specification) or short form.
* Letter case must be the same as in the class name.
* @throws RseslibConversionException
*/
@SuppressWarnings("unchecked")
public RseslibClassifier(String className) throws RseslibConversionException {
String[] paths = {"",
"rseslib.processing.classification.",
"rseslib.processing.classification.bayes.",
"rseslib.processing.classification.meta.",
"rseslib.processing.classification.neural.",
"rseslib.processing.classification.parameterised.",
"rseslib.processing.classification.parameterised.knn.",
"rseslib.processing.classification.parameterised.pca.",
"rseslib.processing.classification.rules.",
"rseslib.processing.classification.svm.",
"rseslib.processing.classification.tree.c45."};
boolean found = false;
Class<? extends Classifier> foundClass = null;
for(String p : paths) {
try {
foundClass = (Class<? extends Classifier>) Class.forName(p + className);
if(!Classifier.class.isAssignableFrom(foundClass))
throw new Exception();
found = true;
break;
} catch (Exception e) {}
}
if(!found)
throw new IllegalArgumentException("Rseslib classifier class not found: " + className);
init(foundClass);
}
private void init(Class<? extends Classifier> learnerClass) throws RseslibConversionException {
this.learnerClass = learnerClass;
setAvailableParams(ParamConverter.listParameters(learnerClass));
}
@Override
protected void onLearn() throws Exception {
input = openInputStream();
type = input.sampleType;
decisionType = (DataType) type.decision;
header = DataConverter.headerFrom(type);
DoubleDataTable table = new ArrayListDoubleDataTable(header);
// read samples
Sample s;
while((s = input.next()) != null)
table.add(DataConverter.doubledataFrom(s, type, header));
input.close();
// convert parameters
Properties prop = ParamConverter.propertiesFrom(parameters, learnerClass);
// train classifier
learner = ClassifierFactory.createClassifier(
learnerClass, prop, table, new EmptyProgress());
}
@Override
protected void onErase() throws DebellorException {
learner = null;
type = null;
decisionType = null;
header = null;
}
@Override
protected SampleType onOpen() throws Exception {
input = openInputStream();
return type;
}
@Override
protected Sample onNext() throws Exception {
Sample s = input.next();
if(s == null) return null;
DoubleData doubledata = DataConverter.doubledataFrom(s, type, header);
double rseslibDecision = learner.classify(doubledata);
DataObject debellorDecision = null;
if(rseslibDecision != Double.NaN)
debellorDecision = DataConverter.attrFromRseslibValue(rseslibDecision, decisionType);
return s.setDecision(debellorDecision);
}
@Override
protected void onClose() throws Exception {
input.close();
}
@Override
public String toString() {
//String s = (name != null) && (!name.equals("")) ? (name + ": ") : "";
String s = "";
if(learner != null) {
String sRseslib = learner.toString().trim();
int newline = sRseslib.indexOf("\n");
if(newline >= 0)
sRseslib = sRseslib.substring(0, newline-1) + "...";
s += "RseslibClassifier(" + learnerClass.getSimpleName() + "): " + sRseslib;
}
else s += "empty RseslibClassifier";
return "[" + s + "]";
}
}