/*
* Debellor
*
* Copyright (C) 2008-2009 by Marcin Wojnarski
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package org.debellor.core.data;
import org.debellor.core.DataObject;
import org.debellor.core.DataType;
import org.debellor.core.exception.data.DataCastException;
import org.debellor.core.exception.data.DataException;
import org.debellor.core.exception.data.SymbolicValueNotFoundException;
/**
* Represents value of a symbolic feature/attribute.
* Internally, the value is stored as String.
*
* @author Marcin Wojnarski
*
*/
public final class SymbolicFeature extends DataObject {
public static final class SymbolicFeatureType extends DataType {
/** Dictionary of possible values.
* If non-null, contains a set of all symbolic values
* that may appear in the data stream.
* Additionally, defines a mapping between symbolic values and
* numeric codes, given by indices of the array.
* This mapping may or <i>may not</i> be used by data consumers
* to convert symbolic values to numbers.
* Note that for trainable cells it is possible that
* input data at the processing stage contain
* some other symbolic values, not present at the training stage
* (neither in the SymbolicFeatureType specification nor in the actual data received).
* It is desirable, although not mandatory, that the cell be able to handle such case.
* <p>Null value means that the set of possible symbolic values is unknown
* and the consumer of data should expect any value.</p> */
public final String[] values;
/** Constructs a SymbolicFeatureType <i>without</i> the dictionary of possible values. */
public SymbolicFeatureType() {
super(SymbolicFeature.class);
values = null;
}
/** Constructs a SymbolicFeatureType with the dictionary of possible values
* passed as an array String[] or as a variable-length parameter list. */
public SymbolicFeatureType(String... values) {
super(SymbolicFeature.class);
this.values = values.clone();
}
/** @param size Number of symbols. Their values will be "0","1",... */
public SymbolicFeatureType(int size) {
super(SymbolicFeature.class);
values = new String[size];
for(int i = 0; i < size; i++)
values[i] = "" + i;
}
public int size() {
return values != null ? values.length : -1;
}
public String get(int i) {
return values[i];
}
public int codeOf(String value) throws SymbolicValueNotFoundException {
for(int i = 0; i < values.length; i++)
if(values[i].equals(value)) return i;
throw new SymbolicValueNotFoundException(value);
}
public String toString(String separator, String quote) {
String s = "";
for(int i = 0; i < size(); i++) {
if(i > 0) s += separator;
s += quote + get(i) + quote;
}
return s;
}
@Override
public String toString() {
return "(" + toString(",", "") + ")";
}
}
public final String value;
/** Constructs a SymbolicFeature with a given value.
* Normally, one would create many SymbolicFeature objects
* with the same value in a given data stream.
* In such case, it is best to create all of them
* using the same instance of String value passed to this constructor.
* This increases efficiency of further comparisons of SymbolicFeature objects
* and decreases memory usage.
* If a SymbolicFeatureType object is available and contains
* the dictionary of all possible symbolic values,
* it may be convenient to use {@link SymbolicFeature#SymbolicFeature(String, DataType)}
* constructor instead, to initialize all SymbolicFeatures with
* the canonical String value from the dictionary. */
public SymbolicFeature(String value) {
if(value == null) this.value = "";
else this.value = value;
}
/**
* @param valueCode
* @param type
* @throws DataCastException
*/
public SymbolicFeature(int valueCode, DataType type) throws DataException {
this(type.asSymbolicFeatureType().get(valueCode));
}
/**
* Creates SymbolicFeature with a 'canonical' String as a symbolic value,
* taken from the SymbolicFeatureType object.
* @param value
* @param type an instance of SymbolicFeatureType
* @throws DataException
*/
public SymbolicFeature(String value, DataType type) throws DataException {
this(type.asSymbolicFeatureType().codeOf(value), type);
}
@Override
public String toString() {
return "'" + value + "'";
}
@Override
public boolean equals(Object obj) {
if((obj == null) || !(obj instanceof SymbolicFeature)) return false;
SymbolicFeature s = (SymbolicFeature) obj;
return (value == s.value) || (value.equals(s.value));
}
@Override
public int hashCode() {
return value.hashCode();
}
}