Repository /Debellor/debellor-1.0.jar:org.debellor.weka.WekaClassifier


Back

No file description

Source code

/*
 *  Debellor
 *
 *  Copyright (C) 2008-2009 by Marcin Wojnarski
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package org.debellor.weka;

import org.debellor.core.Cell;
import org.debellor.core.DataObject;
import org.debellor.core.DataType;
import org.debellor.core.Sample;
import org.debellor.core.Sample.SampleType;
import org.debellor.core.data.NumericFeature;
import org.debellor.core.data.SymbolicFeature;
import org.debellor.core.exception.DebellorException;

import weka.classifiers.Classifier;
import weka.core.Instance;
import weka.core.Instances;

/**
 * Debellor wrapper for Weka classifiers 
 * - subclasses of <code>weka.classifiers.Classifier</code>.
 * Weka class implementing the classifier is identified by its name
 * given in long (with package specification) or short form.
 * This name must exactly match (including case)
 * the corresponding Weka class name.
 * Available names can be found by running Weka GUI (Explorer or Experimenter) 
 * and opening the selection list of classifiers.  
 * Alternatively, you can browse Weka code 
 * (subpackages of <code>weka.classifiers</code> package).
 * 
 * <p>Parameters (or <i>options</i> in Weka's terminology) 
 * can be passed to Weka classifier in a standard
 * Debellor's way, by calling {@link #set} or {@link #setParameters}.
 * Parameters should have names of corresponding Weka options,
 * exactly in the same form as is printed in Weka GUI
 * in the dialog for setting classifier options.
 * <p>
 * For example, the "J48" classifier, which implements C4.5 decision tree,
 * has options "binarySplits", "confidenceFactor", "debug", ...
 * You can set their values in the following way:
 * 
 * <pre>
 * Cell learner = new WekaClassifier("J48");
 * learner.set("binarySplits", "true");
 * learner.set("confidenceFactor", "0.45");
 * </pre>
 * 
 * If you don't specify some option, its default value will be used.
 * 
 * @author Marcin Wojnarski
 *
 */
public class WekaClassifier extends Cell {

	private Class<? extends Classifier> learnerClass;
	private Classifier learner;
	
	SampleType type;
	DataType decisionType;
	
	/** Empty data set, just to hold Weka representation of attribute types */
	Instances instances;
	protected Stream input;
	
	
	public WekaClassifier(Class<? extends Classifier> learnerClass) throws Exception {
		init(learnerClass);
	}

	
	/**
	 * @param className Name of Weka class implementing the classifier algorithm,
	 * given in long (with package specification) or short form.
	 * @throws Exception 
	 */
	@SuppressWarnings("unchecked")
	public WekaClassifier(String className) throws Exception {
		String[] paths = {"", 
				"weka.classifiers.", 
				"weka.classifiers.lazy.", 
				"weka.classifiers.trees.", 
				"weka.classifiers.trees.lmt.", 
				"weka.classifiers.trees.m5.", 
				"weka.classifiers.bayes.", 
				"weka.classifiers.bayes.net.", 
				"weka.classifiers.rules.", 
				"weka.classifiers.misc.", 
				"weka.classifiers.functions.", 
				"weka.classifiers.meta.", 
		};
		
		boolean found = false;
		Class<? extends Classifier> cls = null;

		for(String p : paths) {
			try {
				cls = (Class<? extends Classifier>) Class.forName(p + className);
				if(!Classifier.class.isAssignableFrom(cls))
					throw new Exception();
				if(found)		// matching class was already found, this one is the 2nd
					throw new AmbiguousClassNameException(className, this.getClass());
				found = true;
			} catch(AmbiguousClassNameException e) {
				throw e;
			} catch(Exception e) {}
		}
		if(!found)
			throw new IllegalArgumentException("Weka classifier class not found: " + className);
		
		init(cls);
	}
	
	private void init(Class<? extends Classifier> learnerClass) throws Exception {
		this.learnerClass = learnerClass;
		setAvailableParams(ParamConverter.listParameters(learnerClass.newInstance()));
	}


//	@Override
//	public Parameters listParams() {
//		Parameters par = new Parameters();
//		Enumeration options;
//		try {
//			options = learnerClass.newInstance().listOptions();
//		} catch(Exception e) {
//			throw new DebellorError(e);
//		}
//		while(options.hasMoreElements()) {
//			Option opt = (Option) options.nextElement();
//			par.set(opt.name(), "");
//			// TODO defaultParameters()
//		}
//		return par;
//	}

	@Override
	protected void onLearn() throws Exception {
		learner = learnerClass.newInstance();
		ParamConverter.setWekaOptions(learner, parameters);
		
		input = openInputStream();
		type = input.sampleType;
		decisionType = (DataType) type.decision;
		instances = DataConverter.instancesFrom(type);

		// read samples
		Sample s;
		while((s = input.next()) != null)
			instances.add(DataConverter.instanceFrom(s, type));
		input.close();

		learner.buildClassifier(instances);

		// remove all training samples, keep only info on attribute types for use during testing
		instances.delete();
	}

	@Override
	protected void onErase() throws DebellorException {
		learner = null;
		type = null;
		decisionType = null;
		instances = null;
	}


	@Override
	protected SampleType onOpen() throws Exception {
		input = openInputStream();
		return type;
	}

	@Override
	protected Sample onNext() throws Exception 
	{
		Sample s = input.next();
		if(s == null) return null;
		Instance instance = DataConverter.instanceFrom(s, type);
		instance.setDataset(instances);		// set attribute types for 'instance'
		double wekaDecision = learner.classifyInstance(instance);

		DataObject debellorDecision = null;
		if(wekaDecision != Instance.missingValue()) {
			if(decisionType.dataClass == NumericFeature.class)
				debellorDecision = new NumericFeature(wekaDecision);
			else if(decisionType.dataClass == SymbolicFeature.class)
				debellorDecision = new SymbolicFeature((int)wekaDecision, decisionType);
		}

		return s.setDecision(debellorDecision);
	}

	@Override
	protected void onClose() throws Exception {
		input.close();
	}

	@Override
	public String toString() {
		String s = "empty";
		if(learner != null) { 
			s = learner.toString().trim();
			int newline = s.indexOf("\n");
			if(newline >= 0)
				s = s.substring(0, newline) + "...";
		}
		//String n = (name != null) && (!name.equals("")) ? (name + ": ") : "WekaClassifier";
		String n = "WekaClassifier";
		return n + "[" + s + "]";
	}

}
Copyright © 2008-2011 by TunedIT
Design by luksite