Repository /Debellor/debellor-1.0.jar:org.debellor.weka.WekaFilter


Back

No file description

Source code

/*
 *  Debellor
 *
 *  Copyright (C) 2008-2009 by Marcin Wojnarski
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package org.debellor.weka;

import org.debellor.core.Cell;
import org.debellor.core.Sample;
import org.debellor.core.Sample.SampleType;

import weka.core.Instances;
import weka.filters.Filter;

/**
 * Debellor wrapper for Weka filters - subclasses of <code>weka.filters.Filter</code>.
 * Weka class implementing the filter is identified by its name
 * given in long (with package specification) or short form.
 * This name must exactly match (including case)
 * the corresponding Weka class name.
 * Available names can be found by running Weka GUI (Explorer or Experimenter) 
 * and opening the selection list of filters.  
 * Alternatively, you can browse Weka code 
 * (subpackages of <code>weka.filters</code> package).
 * 
 * <p>Parameters (or <i>options</i> in Weka's terminology) 
 * can be passed to the Weka filter in a standard
 * Debellor's way, by calling {@link #set} or {@link #setParameters}.
 * Parameters should have names of corresponding Weka options,
 * exactly in the same form as is printed in Weka GUI
 * in the dialog for setting filter options.
 * 
 * <p> Filter is applied to Weka {@code Instance}s created by
 * concatenation of {@code data} and {@code decision} fields of input samples
 * (if both are present, as indicated by {@link SampleType};
 * otherwise only one of the fields is used). 
 * Decision is placed as the last attribute of the instances
 * and marked as class attribute.
 * 
 * <p> After filtering, if output instances have class attribute specified, 
 * this attribute is extracted as a sample decision. 
 * 
 * <p> All input data are first buffered and then passed together to the filter.
 * Thus, filters do <i>not</i> work in on-line fashion. 
 * 
 * @see WekaClassifier
 * 
 * @author Marcin Wojnarski
 *
 */
public class WekaFilter extends Cell {

	private Class<? extends Filter> filterClass;
	
	/** Buffered instances after filtering, to be returned by next() */
	private Instances instances;
	private SampleType type;

	/** Number of samples/instances already returned by next() */
	private int pos;

	protected Stream input;

	
	/**
	 * @param className Name of Weka class implementing the filter algorithm,
	 * given in long (with package specification) or short form.
	 * @throws AmbiguousClassNameException 
	 */
	@SuppressWarnings("unchecked")
	public WekaFilter(String className) throws AmbiguousClassNameException 
	{
		super(false);
		
		String[] paths = {"", 
				"weka.filters.", 
				"weka.filters.supervised.", 
				"weka.filters.supervised.attribute.", 
				"weka.filters.supervised.instance.", 
				"weka.filters.unsupervised.", 
				"weka.filters.unsupervised.attribute.", 
				"weka.filters.unsupervised.instance.", 
		};
		
		boolean found = false;
		Class<? extends Filter> cls;

		for(String p : paths) {
			try {
				cls = (Class<? extends Filter>) Class.forName(p + className);
				if(!Filter.class.isAssignableFrom(cls))
					throw new Exception();
				
				if(found)		// matching class was already found, this one is the 2nd
					throw new AmbiguousClassNameException(className, this.getClass());
				
				found = true;
				filterClass = cls;
			} catch(AmbiguousClassNameException e) {
				throw e;
			} catch(Exception e) {}
		}
		if(!found)
			throw new IllegalArgumentException("Weka filter class not found: " + className);
	}

	@Override
	protected SampleType onOpen() throws Exception 
	{
		// convert dataType: Debellor->Weka
		input = openInputStream();
		SampleType inputType = input.sampleType;
		Instances inputInstances = DataConverter.instancesFrom(inputType);

		// read samples
		Sample s;
		while((s = input.next()) != null)
			inputInstances.add(DataConverter.instanceFrom(s, inputType));
		input.close();

		// prepare filter
		Filter filter = filterClass.newInstance();
		ParamConverter.setWekaOptions(filter, parameters);
		filter.setInputFormat(inputInstances);

		// apply filter; convert dataType: Weka->Debellor
		instances = Filter.useFilter(inputInstances, filter);
		type = DataConverter.sampleTypeFrom(instances);
		pos = 0;
		
		return type;
	}

	@Override
	protected Sample onNext() throws Exception {
		if(pos >= instances.numInstances())
			return null;
		return DataConverter.sampleFrom(instances.instance(pos++), type);
	}

	@Override
	protected void onClose() throws Exception {
		instances = null;
		type = null;
	}
	
}

Copyright © 2008-2011 by TunedIT
Design by luksite