001package org.maltparser.core.feature; 002 003 004import java.util.ArrayList; 005import java.util.Stack; 006import java.util.regex.Pattern; 007 008import org.maltparser.core.exception.MaltChainedException; 009import org.maltparser.core.feature.function.AddressFunction; 010import org.maltparser.core.feature.function.FeatureFunction; 011import org.maltparser.core.feature.function.Function; 012import org.maltparser.core.feature.spec.SpecificationModel; 013import org.maltparser.core.feature.spec.SpecificationSubModel; 014import org.maltparser.core.feature.system.FeatureEngine; 015import org.maltparser.core.helper.HashMap; 016 017 018/** 019* 020* 021* @author Johan Hall 022*/ 023public class FeatureModel extends HashMap<String, FeatureVector> { 024 public final static long serialVersionUID = 3256444702936019250L; 025 private final static Pattern splitPattern = Pattern.compile("\\(|\\)|\\[|\\]|,"); 026 private final SpecificationModel specModel; 027 private final ArrayList<AddressFunction> addressFunctionCache; 028 private final ArrayList<FeatureFunction> featureFunctionCache; 029 private final FeatureFunction divideFeatureFunction; 030 private final FeatureRegistry registry; 031 private final FeatureEngine featureEngine; 032 private final FeatureVector mainFeatureVector; 033// private final HashMap<String,ArrayList<Integer>> divideFeatureIndexVectorMap; 034 private final ArrayList<Integer> divideFeatureIndexVector; 035 036 public FeatureModel(SpecificationModel _specModel, FeatureRegistry _registry, FeatureEngine _engine, String dataSplitColumn, String dataSplitStructure) throws MaltChainedException { 037 this.specModel = _specModel; 038 this.registry = _registry; 039 this.featureEngine = _engine; 040 this.addressFunctionCache = new ArrayList<AddressFunction>(); 041 this.featureFunctionCache = new ArrayList<FeatureFunction>(); 042 FeatureVector tmpMainFeatureVector = null; 043 for (SpecificationSubModel subModel : specModel) { 044 FeatureVector fv = new FeatureVector(this, subModel); 045 if (tmpMainFeatureVector == null) { 046 if (subModel.getSubModelName().equals("MAIN")) { 047 tmpMainFeatureVector = fv; 048 } else { 049 tmpMainFeatureVector = fv; 050 put(subModel.getSubModelName(), fv); 051 } 052 } else { 053 put(subModel.getSubModelName(), fv); 054 } 055 } 056 this.mainFeatureVector = tmpMainFeatureVector; 057 if (dataSplitColumn != null && dataSplitColumn.length() > 0 && dataSplitStructure != null && dataSplitStructure.length() > 0) { 058 final StringBuilder sb = new StringBuilder(); 059 sb.append("InputColumn("); 060 sb.append(dataSplitColumn); 061 sb.append(", "); 062 sb.append(dataSplitStructure); 063 sb.append(')'); 064 this.divideFeatureFunction = identifyFeature(sb.toString()); 065// this.divideFeatureIndexVectorMap = new HashMap<String,ArrayList<Integer>>(); 066 this.divideFeatureIndexVector = new ArrayList<Integer>(); 067 068 for (int i = 0; i < mainFeatureVector.size(); i++) { 069 if (mainFeatureVector.get(i).equals(divideFeatureFunction)) { 070 divideFeatureIndexVector.add(i); 071 } 072 } 073 for (SpecificationSubModel subModel : specModel) { 074 FeatureVector featureVector = get(subModel.getSubModelName()); 075 if (featureVector == null) { 076 featureVector = mainFeatureVector; 077 } 078 String divideKeyName = "/"+subModel.getSubModelName(); 079// divideFeatureIndexVectorMap.put(divideKeyName, divideFeatureIndexVector); 080 081 FeatureVector divideFeatureVector = (FeatureVector)featureVector.clone(); 082 for (Integer i : divideFeatureIndexVector) { 083 divideFeatureVector.remove(divideFeatureVector.get(i)); 084 } 085 put(divideKeyName,divideFeatureVector); 086 } 087 } else { 088 this.divideFeatureFunction = null; 089// this.divideFeatureIndexVectorMap = null; 090 this.divideFeatureIndexVector = null; 091 } 092 } 093 094 public SpecificationModel getSpecModel() { 095 return specModel; 096 } 097 098 public FeatureRegistry getRegistry() { 099 return registry; 100 } 101 102 public FeatureEngine getFeatureEngine() { 103 return featureEngine; 104 } 105 106 public FeatureVector getMainFeatureVector() { 107 return mainFeatureVector; 108 } 109 110 public FeatureVector getFeatureVector(String subModelName) { 111 return get(subModelName); 112 } 113 114 public FeatureVector getFeatureVector(String decisionSymbol, String subModelName) { 115 final StringBuilder sb = new StringBuilder(); 116 if (decisionSymbol.length() > 0) { 117 sb.append(decisionSymbol); 118 sb.append('.'); 119 } 120 sb.append(subModelName); 121 if (containsKey(sb.toString())) { 122 return get(sb.toString()); 123 } else if (containsKey(subModelName)) { 124 return get(subModelName); 125 } 126 return mainFeatureVector; 127 } 128 129 public FeatureFunction getDivideFeatureFunction() { 130 return divideFeatureFunction; 131 } 132 133 public boolean hasDivideFeatureFunction() { 134 return divideFeatureFunction != null; 135 } 136 137// public ArrayList<Integer> getDivideFeatureIndexVectorMap(String divideSubModelName) { 138// return divideFeatureIndexVectorMap.get(divideSubModelName); 139// } 140// 141// public boolean hasDivideFeatureIndexVectorMap() { 142// return divideFeatureIndexVectorMap != null; 143// } 144 145 public ArrayList<Integer> getDivideFeatureIndexVector() { 146 return divideFeatureIndexVector; 147 } 148 149 public boolean hasDivideFeatureIndexVector() { 150 return divideFeatureIndexVector != null; 151 } 152 153 public void update() throws MaltChainedException { 154 for (int i = 0, n = addressFunctionCache.size(); i < n; i++) { 155 addressFunctionCache.get(i).update(); 156 } 157 158 for (int i = 0, n = featureFunctionCache.size(); i < n; i++) { 159 featureFunctionCache.get(i).update(); 160 } 161 } 162 163 public void update(Object[] arguments) throws MaltChainedException { 164 for (int i = 0, n = addressFunctionCache.size(); i < n; i++) { 165 addressFunctionCache.get(i).update(arguments); 166 } 167 168 for (int i = 0, n = featureFunctionCache.size(); i < n; i++) { 169 featureFunctionCache.get(i).update(); 170 } 171 } 172 173 public FeatureFunction identifyFeature(String spec) throws MaltChainedException { 174 String[] items =splitPattern.split(spec); 175 Stack<Object> objects = new Stack<Object>(); 176 for (int i = items.length-1; i >= 0; i--) { 177 if (items[i].trim().length() != 0) { 178 objects.push(items[i].trim()); 179 } 180 } 181 identifyFeatureFunction(objects); 182 if (objects.size() != 1 || !(objects.peek() instanceof FeatureFunction) || (objects.peek() instanceof AddressFunction)) { 183 throw new FeatureException("The feature specification '"+spec+"' were not recognized properly. "); 184 } 185 return (FeatureFunction)objects.pop(); 186 } 187 188 protected void identifyFeatureFunction(Stack<Object> objects) throws MaltChainedException { 189 Function function = featureEngine.newFunction(objects.peek().toString(), registry); 190 if (function != null) { 191 objects.pop(); 192 if (!objects.isEmpty()) { 193 identifyFeatureFunction(objects); 194 } 195 initializeFunction(function, objects); 196 } else { 197 if (!objects.isEmpty()) { 198 Object o = objects.pop(); 199 if (!objects.isEmpty()) { 200 identifyFeatureFunction(objects); 201 } 202 objects.push(o); 203 } 204 } 205 } 206 207 protected void initializeFunction(Function function, Stack<Object> objects) throws MaltChainedException { 208 Class<?>[] paramTypes = function.getParameterTypes(); 209 Object[] arguments = new Object[paramTypes.length]; 210 for (int i = 0; i < paramTypes.length; i++) { 211 if (paramTypes[i] == java.lang.Integer.class) { 212 if (objects.peek() instanceof String) { 213 String object = (String)objects.pop(); 214 try { 215 objects.push(Integer.parseInt(object)); 216 } catch (NumberFormatException e) { 217 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+object+"'" + ", expect an integer value. ", e); 218 } 219 } else { 220 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'" + ", expect an integer value. "); 221 } 222 } else if (paramTypes[i] == java.lang.Double.class) { 223 if (objects.peek() instanceof String) { 224 String object = (String)objects.pop(); 225 try { 226 objects.push(Double.parseDouble(object)); 227 } catch (NumberFormatException e) { 228 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+object+"'" + ", expect a numeric value. ", e); 229 } 230 } else { 231 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'" + ", expect a numeric value. "); 232 } 233 } else if (paramTypes[i] == java.lang.Boolean.class) { 234 if (objects.peek() instanceof String) { 235 objects.push(Boolean.parseBoolean(((String)objects.pop()))); 236 } else { 237 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'" + ", expect a boolean value. "); 238 239 } 240 } 241 if (!paramTypes[i].isInstance(objects.peek())) { 242 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'"); 243 } 244 arguments[i] = objects.pop(); 245 } 246 function.initialize(arguments); 247 if (function instanceof AddressFunction) { 248 int index = addressFunctionCache.indexOf(function); 249 if (index != -1) { 250 function = addressFunctionCache.get(index); 251 } else { 252 addressFunctionCache.add((AddressFunction)function); 253 } 254 } else if (function instanceof FeatureFunction) { 255 int index = featureFunctionCache.indexOf(function); 256 if (index != -1) { 257 function = featureFunctionCache.get(index); 258 } else { 259 featureFunctionCache.add((FeatureFunction)function); 260 } 261 } 262 objects.push(function); 263 } 264 265 public String toString() { 266 return specModel.toString(); 267 } 268}