/*
* Debellor
*
* Copyright (C) 2008-2009 by Marcin Wojnarski
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package org.debellor.weka;
import org.debellor.core.Cell;
import org.debellor.core.Sample;
import org.debellor.core.Sample.SampleType;
import weka.core.Instances;
import weka.filters.Filter;
/**
* Debellor wrapper for Weka filters - subclasses of <code>weka.filters.Filter</code>.
* Weka class implementing the filter is identified by its name
* given in long (with package specification) or short form.
* This name must exactly match (including case)
* the corresponding Weka class name.
* Available names can be found by running Weka GUI (Explorer or Experimenter)
* and opening the selection list of filters.
* Alternatively, you can browse Weka code
* (subpackages of <code>weka.filters</code> package).
*
* <p>Parameters (or <i>options</i> in Weka's terminology)
* can be passed to the Weka filter in a standard
* Debellor's way, by calling {@link #set} or {@link #setParameters}.
* Parameters should have names of corresponding Weka options,
* exactly in the same form as is printed in Weka GUI
* in the dialog for setting filter options.
*
* <p> Filter is applied to Weka {@code Instance}s created by
* concatenation of {@code data} and {@code decision} fields of input samples
* (if both are present, as indicated by {@link SampleType};
* otherwise only one of the fields is used).
* Decision is placed as the last attribute of the instances
* and marked as class attribute.
*
* <p> After filtering, if output instances have class attribute specified,
* this attribute is extracted as a sample decision.
*
* <p> All input data are first buffered and then passed together to the filter.
* Thus, filters do <i>not</i> work in on-line fashion.
*
* @see WekaClassifier
*
* @author Marcin Wojnarski
*
*/
public class WekaFilter extends Cell {
private Class<? extends Filter> filterClass;
/** Buffered instances after filtering, to be returned by next() */
private Instances instances;
private SampleType type;
/** Number of samples/instances already returned by next() */
private int pos;
protected Stream input;
/**
* @param className Name of Weka class implementing the filter algorithm,
* given in long (with package specification) or short form.
* @throws AmbiguousClassNameException
*/
@SuppressWarnings("unchecked")
public WekaFilter(String className) throws AmbiguousClassNameException
{
super(false);
String[] paths = {"",
"weka.filters.",
"weka.filters.supervised.",
"weka.filters.supervised.attribute.",
"weka.filters.supervised.instance.",
"weka.filters.unsupervised.",
"weka.filters.unsupervised.attribute.",
"weka.filters.unsupervised.instance.",
};
boolean found = false;
Class<? extends Filter> cls;
for(String p : paths) {
try {
cls = (Class<? extends Filter>) Class.forName(p + className);
if(!Filter.class.isAssignableFrom(cls))
throw new Exception();
if(found) // matching class was already found, this one is the 2nd
throw new AmbiguousClassNameException(className, this.getClass());
found = true;
filterClass = cls;
} catch(AmbiguousClassNameException e) {
throw e;
} catch(Exception e) {}
}
if(!found)
throw new IllegalArgumentException("Weka filter class not found: " + className);
}
@Override
protected SampleType onOpen() throws Exception
{
// convert dataType: Debellor->Weka
input = openInputStream();
SampleType inputType = input.sampleType;
Instances inputInstances = DataConverter.instancesFrom(inputType);
// read samples
Sample s;
while((s = input.next()) != null)
inputInstances.add(DataConverter.instanceFrom(s, inputType));
input.close();
// prepare filter
Filter filter = filterClass.newInstance();
ParamConverter.setWekaOptions(filter, parameters);
filter.setInputFormat(inputInstances);
// apply filter; convert dataType: Weka->Debellor
instances = Filter.useFilter(inputInstances, filter);
type = DataConverter.sampleTypeFrom(instances);
pos = 0;
return type;
}
@Override
protected Sample onNext() throws Exception {
if(pos >= instances.numInstances())
return null;
return DataConverter.sampleFrom(instances.instance(pos++), type);
}
@Override
protected void onClose() throws Exception {
instances = null;
type = null;
}
}