001package org.maltparser.core.syntaxgraph.feature; 002 003import java.util.LinkedHashMap; 004import java.util.Map; 005import java.util.regex.Pattern; 006 007import org.maltparser.core.exception.MaltChainedException; 008import org.maltparser.core.feature.function.AddressFunction; 009import org.maltparser.core.feature.function.FeatureFunction; 010import org.maltparser.core.feature.value.AddressValue; 011import org.maltparser.core.feature.value.FeatureValue; 012import org.maltparser.core.feature.value.SingleFeatureValue; 013import org.maltparser.core.io.dataformat.ColumnDescription; 014import org.maltparser.core.symbol.SymbolTable; 015import org.maltparser.core.symbol.SymbolTableHandler; 016import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId; 017import org.maltparser.core.syntaxgraph.SyntaxGraphException; 018import org.maltparser.core.syntaxgraph.node.DependencyNode; 019 020public final class NumOfFeature implements FeatureFunction { 021 public final static Class<?>[] paramTypes = { org.maltparser.core.feature.function.AddressFunction.class, 022 java.lang.String.class, 023 java.lang.String.class}; 024 private final static Pattern splitPattern = Pattern.compile("\\|"); 025 public enum NumOfRelation { 026 LDEPS, RDEPS, DEPS 027 }; 028 private AddressFunction addressFunction; 029 private final SymbolTableHandler tableHandler; 030 private SymbolTable table; 031 private final SingleFeatureValue featureValue; 032 private NumOfRelation numOfRelation; 033 private String numOfRelationName; 034 private String normalizationString; 035 private final Map<Integer,String> normalization; 036 037 public NumOfFeature(SymbolTableHandler tableHandler) throws MaltChainedException { 038 this.tableHandler = tableHandler; 039 this.featureValue = new SingleFeatureValue(this); 040 this.normalization = new LinkedHashMap<Integer,String>(); 041 } 042 043 /** 044 * Initialize the distance feature function 045 * 046 * @param arguments an array of arguments with the type returned by getParameterTypes() 047 * @throws MaltChainedException 048 */ 049 public void initialize(Object[] arguments) throws MaltChainedException { 050 if (arguments.length != 3) { 051 throw new SyntaxGraphException("Could not initialize NumOfFeature: number of arguments are not correct. "); 052 } 053 // Checks that the two arguments are address functions 054 if (!(arguments[0] instanceof AddressFunction)) { 055 throw new SyntaxGraphException("Could not initialize NumOfFeature: the first argument is not an address function. "); 056 } 057 if (!(arguments[1] instanceof java.lang.String)) { 058 throw new SyntaxGraphException("Could not initialize NumOfFeature: the second argument (relation) is not a string. "); 059 } 060 if (!(arguments[2] instanceof java.lang.String)) { 061 throw new SyntaxGraphException("Could not initialize NumOfFeature: the third argument (normalization) is not a string. "); 062 } 063 setAddressFunction((AddressFunction)arguments[0]); 064 setNumOfRelation((String)arguments[1]); 065 normalizationString = (String)arguments[2]; 066 // Creates a symbol table called "NUMOF" using one null value 067 setSymbolTable(tableHandler.addSymbolTable("NUMOF"+normalizationString, ColumnDescription.INPUT, "one")); 068 069 String[] items = splitPattern.split(normalizationString); 070 071 if (items.length <= 0 || !items[0].equals("0")) { 072 throw new SyntaxGraphException("Could not initialize NumOfFeature ("+this+"): the third argument (normalization) must contain a list of integer values separated with | and the first element must be 0."); 073 } 074 int tmp = -1; 075 for (int i = 0; i < items.length; i++) { 076 int v; 077 try { 078 v = Integer.parseInt(items[i]); 079 } catch (NumberFormatException e) { 080 throw new SyntaxGraphException("Could not initialize NumOfFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |", e); 081 } 082 normalization.put(v, ">="+v); 083 table.addSymbol(">="+v); 084 if (tmp != -1 && tmp >= v) { 085 throw new SyntaxGraphException("Could not initialize NumOfFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |"); 086 } 087 tmp = v; 088 } 089 } 090 091 /** 092 * Returns an array of class types used by the feature extraction system to invoke initialize with 093 * correct arguments. 094 * 095 * @return an array of class types 096 */ 097 public Class<?>[] getParameterTypes() { 098 return paramTypes; 099 } 100 101 /** 102 * Returns the string representation of the integer <code>code</code> according to the numof feature function. 103 * 104 * @param code the integer representation of the symbol 105 * @return the string representation of the integer <code>code</code> according to the numof feature function. 106 * @throws MaltChainedException 107 */ 108 public String getSymbol(int code) throws MaltChainedException { 109 return table.getSymbolCodeToString(code); 110 } 111 112 /** 113 * Returns the integer representation of the string <code>symbol</code> according to the numof feature function. 114 * 115 * @param symbol the string representation of the symbol 116 * @return the integer representation of the string <code>symbol</code> according to the numof feature function. 117 * @throws MaltChainedException 118 */ 119 public int getCode(String symbol) throws MaltChainedException { 120 return table.getSymbolStringToCode(symbol); 121 } 122 123 /** 124 * Cause the feature function to update the feature value. 125 * 126 * @throws MaltChainedException 127 */ 128 public void update() throws MaltChainedException { 129 // Retrieve the address value 130 final AddressValue arg1 = addressFunction.getAddressValue(); 131 // if arg1 or arg2 is null, then set a NO_NODE null value as feature value 132 if (arg1.getAddress() == null ) { 133 featureValue.setIndexCode(table.getNullValueCode(NullValueId.NO_NODE)); 134 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.NO_NODE)); 135 featureValue.setNullValue(true); 136 } else { 137 // Unfortunately this method takes a lot of time arg1.getAddressClass().asSubclass(org.maltparser.core.syntaxgraph.node.DependencyNode.class); 138 // Cast the address arguments to dependency nodes 139 final DependencyNode node = (DependencyNode)arg1.getAddress(); 140 int numof = 0; 141 if (numOfRelation == NumOfRelation.DEPS) { 142 numof = node.getLeftDependentCount() + node.getRightDependentCount(); 143 } else if (numOfRelation == NumOfRelation.LDEPS) { 144 numof = node.getLeftDependentCount(); 145 } else if (numOfRelation == NumOfRelation.RDEPS) { 146 numof = node.getRightDependentCount(); 147 } 148 int lower = -1; 149 boolean f = false; 150 for (Integer upper : normalization.keySet()) { 151 if (numof >= lower && numof < upper) { 152 featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower))); 153 featureValue.setSymbol(normalization.get(lower)); 154 f = true; 155 break; 156 } 157 lower = upper; 158 } 159 if (f == false) { 160 featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower))); 161 featureValue.setSymbol(normalization.get(lower)); 162 } 163 // Tells the feature value that the feature is known and is not a null value 164 featureValue.setNullValue(false); 165 } 166 featureValue.setValue(1); 167// featureValue.setKnown(true); 168 } 169 170 public void setNumOfRelation(String numOfRelationName) { 171 this.numOfRelationName = numOfRelationName; 172 numOfRelation = NumOfRelation.valueOf(numOfRelationName.toUpperCase()); 173 } 174 175 public NumOfRelation getNumOfRelation() { 176 return numOfRelation; 177 } 178 179 /** 180 * Returns the feature value 181 * 182 * @return the feature value 183 */ 184 public FeatureValue getFeatureValue() { 185 return featureValue; 186 } 187 188 /** 189 * Returns the symbol table used by the numof feature function 190 * 191 * @return the symbol table used by the numof feature function 192 */ 193 public SymbolTable getSymbolTable() { 194 return table; 195 } 196 197 /** 198 * Returns the address function 199 * 200 * @return the address function 201 */ 202 public AddressFunction getAddressFunction() { 203 return addressFunction; 204 } 205 206 207 /** 208 * Sets the address function 209 * 210 * @param addressFunction a address function 211 */ 212 public void setAddressFunction(AddressFunction addressFunction) { 213 this.addressFunction = addressFunction; 214 } 215 216 /** 217 * Sets the symbol table used by the numof feature function 218 * 219 * @param table 220 */ 221 public void setSymbolTable(SymbolTable table) { 222 this.table = table; 223 } 224 225 public int getType() { 226 return ColumnDescription.STRING; 227 } 228 229 public String getMapIdentifier() { 230 return getSymbolTable().getName(); 231 } 232 233 public boolean equals(Object obj) { 234 if (this == obj) 235 return true; 236 if (obj == null) 237 return false; 238 if (getClass() != obj.getClass()) 239 return false; 240 return obj.toString().equals(this.toString()); 241 } 242 243 public int hashCode() { 244 return 217 + (null == toString() ? 0 : toString().hashCode()); 245 } 246 247 public String toString() { 248 final StringBuilder sb = new StringBuilder(); 249 sb.append("NumOf("); 250 sb.append(addressFunction.toString()); 251 sb.append(", "); 252 sb.append(numOfRelationName); 253 sb.append(", "); 254 sb.append(normalizationString); 255 sb.append(')'); 256 return sb.toString(); 257 } 258}