Repository /Debellor/debellor-1.0.jar:org.debellor.core.data.SymbolicFeature


Back

No file description

Source code

/*
 *  Debellor
 *
 *  Copyright (C) 2008-2009 by Marcin Wojnarski
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package org.debellor.core.data;

import org.debellor.core.DataObject;
import org.debellor.core.DataType;
import org.debellor.core.exception.data.DataCastException;
import org.debellor.core.exception.data.DataException;
import org.debellor.core.exception.data.SymbolicValueNotFoundException;

/**
 * Represents value of a symbolic feature/attribute.
 * Internally, the value is stored as String.
 * 
 * @author Marcin Wojnarski
 *
 */
public final class SymbolicFeature extends DataObject {

	public static final class SymbolicFeatureType extends DataType {
		
		/** Dictionary of possible values. 
		 * If non-null, contains a set of all symbolic values
		 * that may appear in the data stream.
		 * Additionally, defines a mapping between symbolic values and
		 * numeric codes, given by indices of the array.
		 * This mapping may or <i>may not</i> be used by data consumers
		 * to convert symbolic values to numbers.
		 * Note that for trainable cells it is possible that
		 * input data at the processing stage contain
		 * some other symbolic values, not present at the training stage
		 * (neither in the SymbolicFeatureType specification nor in the actual data received).
		 * It is desirable, although not mandatory, that the cell be able to handle such case.
		 * <p>Null value means that the set of possible symbolic values is unknown
		 * and the consumer of data should expect any value.</p> */
		public final String[] values;
				
		
		/** Constructs a SymbolicFeatureType <i>without</i> the dictionary of possible values. */
		public SymbolicFeatureType() {
			super(SymbolicFeature.class);
			values = null;
		}
		/** Constructs a SymbolicFeatureType with the dictionary of possible values
		 * passed as an array String[] or as a variable-length parameter list. */
		public SymbolicFeatureType(String... values) {
			super(SymbolicFeature.class);
			this.values = values.clone();
		}
		/** @param size  Number of symbols. Their values will be "0","1",... */
		public SymbolicFeatureType(int size) {
			super(SymbolicFeature.class);
			values = new String[size];
			for(int i = 0; i < size; i++)
				values[i] = "" + i;
		}
		
		public int size() {
			return values != null ? values.length : -1;
		}
		public String get(int i) {
			return values[i];
		}
		public int codeOf(String value) throws SymbolicValueNotFoundException {
			for(int i = 0; i < values.length; i++)
				if(values[i].equals(value)) return i;
			throw new SymbolicValueNotFoundException(value);
		}
		
		public String toString(String separator, String quote) {
			String s = "";
			for(int i = 0; i < size(); i++) {
				if(i > 0) s += separator;
				s += quote + get(i) + quote;
			}
			return s;
		}
		@Override
		public String toString() {
			return "(" + toString(",", "") + ")";
		}
	}
	
	
	public final String value;
	
	/** Constructs a SymbolicFeature with a given value.
	 * Normally, one would create many SymbolicFeature objects
	 * with the same value in a given data stream.
	 * In such case, it is best to create all of them
	 * using the same instance of String value passed to this constructor.
	 * This increases efficiency of further comparisons of SymbolicFeature objects
	 * and decreases memory usage.
	 * If a SymbolicFeatureType object is available and contains
	 * the dictionary of all possible symbolic values,
	 * it may be convenient to use {@link SymbolicFeature#SymbolicFeature(String, DataType)}
	 * constructor instead, to initialize all SymbolicFeatures with
	 * the canonical String value from the dictionary. */
	public SymbolicFeature(String value) {
		if(value == null) this.value = "";
		else this.value = value;
	}
	
	/**
	 * @param valueCode
	 * @param type
	 * @throws DataCastException 
	 */
	public SymbolicFeature(int valueCode, DataType type) throws DataException {
		this(type.asSymbolicFeatureType().get(valueCode));
	}

	/**
	 * Creates SymbolicFeature with a 'canonical' String as a symbolic value,
	 * taken from the SymbolicFeatureType object.
	 * @param value
	 * @param type  an instance of SymbolicFeatureType
	 * @throws DataException 
	 */
	public SymbolicFeature(String value, DataType type) throws DataException {
		this(type.asSymbolicFeatureType().codeOf(value), type);
	}

	@Override
	public String toString() {
		return "'" + value + "'";
	}

	@Override
	public boolean equals(Object obj) {
		if((obj == null) || !(obj instanceof SymbolicFeature)) return false;
		SymbolicFeature s = (SymbolicFeature) obj;
		return (value == s.value) || (value.equals(s.value));
	}

	@Override
	public int hashCode() {
		return value.hashCode();
	}

}

Copyright © 2008-2011 by TunedIT
Design by luksite