001package org.maltparser.core.symbol.trie;
002
003import java.io.BufferedReader;
004import java.io.BufferedWriter;
005import java.io.IOException;
006import java.util.Set;
007import java.util.SortedMap;
008import java.util.TreeMap;
009
010import org.maltparser.core.exception.MaltChainedException;
011import org.maltparser.core.io.dataformat.ColumnDescription;
012import org.maltparser.core.symbol.SymbolException;
013import org.maltparser.core.symbol.SymbolTable;
014import org.maltparser.core.symbol.nullvalue.InputNullValues;
015import org.maltparser.core.symbol.nullvalue.NullValues;
016import org.maltparser.core.symbol.nullvalue.OutputNullValues;
017import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId;
018/**
019
020@author Johan Hall
021@since 1.0
022*/
023public class TrieSymbolTable implements SymbolTable {
024        private final String name;
025        private final Trie trie;
026        private final SortedMap<Integer, TrieNode> codeTable;
027        private int columnCategory;
028        private final NullValues nullValues;
029        private int valueCounter;
030    /** Cache the hash code for the symbol table */
031    private int cachedHash;
032    
033    
034        public TrieSymbolTable(String name, Trie trie, int columnCategory, String nullValueStrategy) throws MaltChainedException { 
035                this.name = name;
036                this.trie = trie;
037                this.columnCategory = columnCategory;
038                
039                codeTable = new TreeMap<Integer, TrieNode>();
040                if (columnCategory == ColumnDescription.INPUT) {
041                        nullValues = new InputNullValues(nullValueStrategy, this);
042                } else if (columnCategory == ColumnDescription.DEPENDENCY_EDGE_LABEL) {
043                        nullValues = new OutputNullValues(nullValueStrategy, this);
044                } else {
045                        nullValues = new InputNullValues(nullValueStrategy, this);
046                }
047                valueCounter = nullValues.getNextCode();
048        }
049        
050        public TrieSymbolTable(String name, Trie trie) { 
051                this.name = name;
052                this.trie = trie;
053                codeTable = new TreeMap<Integer, TrieNode>();
054                nullValues = new InputNullValues("one", this);
055                valueCounter = 1;
056        }
057        
058        public int addSymbol(String symbol) throws MaltChainedException {
059                if (nullValues == null || !nullValues.isNullValue(symbol)) {
060                        if (symbol == null || symbol.length() == 0) {
061                                throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table");
062                        }
063                        
064                        final TrieNode node = trie.addValue(symbol, this, -1);
065                        final int code = node.getEntry(this); 
066                        if (!codeTable.containsKey(code)) {
067                                codeTable.put(code, node);
068                        }
069                        return code;
070                } else {
071                        return nullValues.symbolToCode(symbol);
072                }
073        }
074        
075//      public int addSymbol(StringBuilder symbol) throws MaltChainedException {
076//              if (nullValues == null || !nullValues.isNullValue(symbol)) {
077//                      if (symbol == null || symbol.length() == 0) {
078//                              throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table");
079//                      }
080//                      
081//                      final TrieNode node = trie.addValue(symbol, this, -1);
082//                      final int code = node.getEntry(this);
083//                      if (!codeTable.containsKey(code)) {
084//                              codeTable.put(code, node);
085//                      }
086//                      return code;
087//              } else {
088//                      return nullValues.symbolToCode(symbol);
089//              }
090//      }
091        
092        public String getSymbolCodeToString(int code) throws MaltChainedException {
093                if (code >= 0) {
094                        if (nullValues == null || !nullValues.isNullValue(code)) {
095                                TrieNode node = codeTable.get(code);
096                                if (node != null) {
097                                        return trie.getValue(node, this);
098                                } else {
099                                        return null;
100                                }
101                        } else {
102                                return nullValues.codeToSymbol(code);
103                        }
104                } else {
105                        throw new SymbolException("The symbol code '"+code+"' cannot be found in the symbol table. ");
106                }
107        }
108        
109        public int getSymbolStringToCode(String symbol) throws MaltChainedException {
110                if (symbol != null) {
111                        if (nullValues == null || !nullValues.isNullValue(symbol)) {
112                                final Integer entry = trie.getEntry(symbol, this);
113                                if (entry != null) {
114                                        return entry.intValue(); 
115                                } else {
116                                        return -1;
117                                }
118                        } else {
119                                return nullValues.symbolToCode(symbol);
120                        }
121                } else {
122                        throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. ");
123                }
124        }
125
126        public void clearTmpStorage() {
127
128        }
129        
130        public String getNullValueStrategy() {
131                if (nullValues == null) {
132                        return null;
133                }
134                return nullValues.getNullValueStrategy();
135        }
136        
137        
138        public int getColumnCategory() {
139                return columnCategory;
140        }
141        
142        public String printSymbolTable() throws MaltChainedException {
143                StringBuilder sb = new StringBuilder();
144                for (Integer code : codeTable.keySet()) {
145                        sb.append(code+"\t"+trie.getValue(codeTable.get(code), this)+"\n");
146                }
147                return sb.toString();
148        }
149        
150        public void saveHeader(BufferedWriter out) throws MaltChainedException  {
151                try {
152                        out.append('\t');
153                        out.append(getName());
154                        out.append('\t');
155                        out.append(Integer.toString(getColumnCategory()));
156                        out.append('\t');
157                        out.append(getNullValueStrategy());
158                        out.append('\n');
159                } catch (IOException e) {
160                        throw new SymbolException("Could not save the symbol table. ", e);
161                }
162        }
163        
164        public int size() {
165                return codeTable.size();
166        }
167        
168        
169        public void save(BufferedWriter out) throws MaltChainedException  {
170                try {
171                        out.write(name);
172                        out.write('\n');
173                        for (Integer code : codeTable.keySet()) {
174                                out.write(code+"");
175                                out.write('\t');
176                                out.write(trie.getValue(codeTable.get(code), this));
177                                out.write('\n');
178                        }
179                        out.write('\n');
180                } catch (IOException e) {
181                        throw new SymbolException("Could not save the symbol table. ", e);
182                }
183        }
184        
185        public void load(BufferedReader in) throws MaltChainedException {
186                int max = 0;
187                int index = 0;
188                String fileLine;
189                try {
190                        while ((fileLine = in.readLine()) != null) {
191                                if (fileLine.length() == 0 || (index = fileLine.indexOf('\t')) == -1) {
192                                        setValueCounter(max+1);
193                                        break;
194                                }
195                                int code = Integer.parseInt(fileLine.substring(0,index));
196                                final String str = fileLine.substring(index+1);
197                                final TrieNode node = trie.addValue(str, this, code);
198                                codeTable.put(node.getEntry(this), node); 
199                                if (max < code) {
200                                        max = code;
201                                }
202                        }
203                } catch (NumberFormatException e) {
204                        throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the first column. ", e);
205                } catch (IOException e) {
206                        throw new SymbolException("Could not load the symbol table. ", e);
207                }
208        }
209        
210        public String getName() {
211                return name;
212        }
213
214        public int getValueCounter() {
215                return valueCounter;
216        }
217
218        private void setValueCounter(int valueCounter) {
219                this.valueCounter = valueCounter;
220        }
221        
222        protected void updateValueCounter(int code) {
223                if (code > valueCounter) {
224                        valueCounter = code;
225                }
226        }
227        
228        protected int increaseValueCounter() {
229                return valueCounter++;
230        }
231        
232        public int getNullValueCode(NullValueId nullValueIdentifier) throws MaltChainedException {
233                if (nullValues == null) {
234                        throw new SymbolException("The symbol table does not have any null-values. ");
235                }
236                return nullValues.nullvalueToCode(nullValueIdentifier);
237        }
238        
239        public String getNullValueSymbol(NullValueId nullValueIdentifier) throws MaltChainedException {
240                if (nullValues == null) {
241                        throw new SymbolException("The symbol table does not have any null-values. ");
242                }
243                return nullValues.nullvalueToSymbol(nullValueIdentifier);
244        }
245        
246        public boolean isNullValue(String symbol) throws MaltChainedException {
247                if (nullValues != null) {
248                        return nullValues.isNullValue(symbol);
249                } 
250                return false;
251        }
252        
253        public boolean isNullValue(int code) throws MaltChainedException {
254                if (nullValues != null) {
255                        return nullValues.isNullValue(code);
256                } 
257                return false;
258        }
259        
260//      public void copy(SymbolTable fromTable) throws MaltChainedException {
261//              final SortedMap<Integer, TrieNode> fromCodeTable =  ((TrieSymbolTable)fromTable).getCodeTable();
262//              int max = getValueCounter()-1;
263//              for (Integer code : fromCodeTable.keySet()) {
264//                      final String str = trie.getValue(fromCodeTable.get(code), this);
265//                      final TrieNode node = trie.addValue(str, this, code);
266//                      codeTable.put(node.getEntry(this), node); //.getCode(), node);
267//                      if (max < code) {
268//                              max = code;
269//                      }
270//              }
271//              setValueCounter(max+1);
272//      }
273
274        public SortedMap<Integer, TrieNode> getCodeTable() {
275                return codeTable;
276        }
277        
278        public Set<Integer> getCodes() {
279                return codeTable.keySet();
280        }
281        
282        protected Trie getTrie() {
283                return trie;
284        }
285        
286        public boolean equals(Object obj) {
287                if (this == obj)
288                        return true;
289                if (obj == null)
290                        return false;
291                if (getClass() != obj.getClass())
292                        return false;
293                final TrieSymbolTable other = (TrieSymbolTable)obj;
294                return ((name == null) ? other.name == null : name.equals(other.name));
295        }
296
297        public int hashCode() {
298                if (cachedHash == 0) {
299                        cachedHash = 217 + (null == name ? 0 : name.hashCode());
300                }
301                return cachedHash;
302        }
303        
304        public String toString() {
305                final StringBuilder sb = new StringBuilder();
306                sb.append(name);
307                sb.append(' ');
308                sb.append(valueCounter);
309                return sb.toString();
310        }
311}