001package org.maltparser.core.symbol.hash;
002
003import java.io.BufferedReader;
004import java.io.BufferedWriter;
005import java.io.FileInputStream;
006import java.io.FileNotFoundException;
007import java.io.FileOutputStream;
008import java.io.IOException;
009import java.io.InputStreamReader;
010import java.io.OutputStreamWriter;
011import java.io.UnsupportedEncodingException;
012import java.util.Map;
013import java.util.Set;
014import java.util.regex.Pattern;
015import java.util.regex.PatternSyntaxException;
016
017import org.maltparser.core.exception.MaltChainedException;
018import org.maltparser.core.helper.HashMap;
019import org.maltparser.core.symbol.SymbolException;
020import org.maltparser.core.symbol.SymbolTable;
021import org.maltparser.core.symbol.SymbolTableHandler;
022
023
024public class HashSymbolTableHandler implements SymbolTableHandler {
025        private final Map<String,  HashSymbolTable> symbolTables;
026        
027        public HashSymbolTableHandler() { 
028                this.symbolTables = new HashMap<String, HashSymbolTable>();
029        }
030        
031        public SymbolTable addSymbolTable(String tableName) throws MaltChainedException {
032                HashSymbolTable symbolTable = symbolTables.get(tableName);
033                if (symbolTable == null) {
034                        symbolTable = new HashSymbolTable(tableName);
035                        symbolTables.put(tableName, symbolTable);
036                }
037                return symbolTable;
038        }
039        
040        public SymbolTable addSymbolTable(String tableName, int columnCategory, String nullValueStrategy) throws MaltChainedException {
041                HashSymbolTable symbolTable = symbolTables.get(tableName);
042                if (symbolTable == null) {
043                        symbolTable = new HashSymbolTable(tableName, columnCategory, nullValueStrategy);
044                        symbolTables.put(tableName, symbolTable);
045                }
046                return symbolTable;
047        }
048        
049        public SymbolTable addSymbolTable(String tableName, SymbolTable parentTable) throws MaltChainedException {
050                HashSymbolTable symbolTable = symbolTables.get(tableName);
051                if (symbolTable == null) {
052                        HashSymbolTable hashParentTable = (HashSymbolTable)parentTable;
053                        symbolTable = new HashSymbolTable(tableName, hashParentTable.getColumnCategory(), hashParentTable.getNullValueStrategy());
054                        symbolTables.put(tableName, symbolTable);
055                }
056                return symbolTable;
057        }
058        
059        public SymbolTable getSymbolTable(String tableName) {
060                return symbolTables.get(tableName);
061        }
062        
063        public Set<String> getSymbolTableNames() {
064                return symbolTables.keySet();
065        }
066        
067        public void cleanUp() {}
068        
069        public void save(OutputStreamWriter osw) throws MaltChainedException  {
070                try {
071                        BufferedWriter bout = new BufferedWriter(osw);
072                        for (HashSymbolTable table : symbolTables.values()) {
073                                table.saveHeader(bout);
074                        }
075                        bout.write('\n');
076                        for (HashSymbolTable table : symbolTables.values()) {
077                                table.save(bout);
078                        }
079                        bout.close();
080                } catch (IOException e) {
081                        throw new SymbolException("Could not save the symbol tables. ", e);
082                }       
083        }
084        
085        public void save(String fileName, String charSet) throws MaltChainedException  {
086                try {
087                        save(new OutputStreamWriter(new FileOutputStream(fileName), charSet));
088                } catch (FileNotFoundException e) {
089                        throw new SymbolException("The symbol table file '"+fileName+"' cannot be created. ", e);
090                } catch (UnsupportedEncodingException e) {
091                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
092                }
093        }
094        
095        public void loadHeader(BufferedReader bin) throws MaltChainedException {
096                String fileLine = "";
097                Pattern tabPattern = Pattern.compile("\t");
098                try {
099                        while ((fileLine = bin.readLine()) != null) {
100                                if (fileLine.length() == 0 || fileLine.charAt(0) != '\t') {
101                                        break;
102                                }
103                                String items[];
104                                try {
105                                        items = tabPattern.split(fileLine.substring(1));
106                                } catch (PatternSyntaxException e) {
107                                        throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' could not split into atomic parts. ", e);
108                                }
109                                if (items.length != 3) {
110                                        throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' must contain four columns. ");
111                                }
112                                addSymbolTable(items[0], Integer.parseInt(items[1]), items[2]);
113                        }
114                } catch (NumberFormatException e) {
115                        throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the header. ", e);
116                } catch (IOException e) {
117                        throw new SymbolException("Could not load the symbol table. ", e);
118                }
119        }
120        
121        public void load(InputStreamReader isr) throws MaltChainedException  {
122                try {
123                        BufferedReader bin = new BufferedReader(isr);
124                        String fileLine;
125                        SymbolTable table = null;
126                        bin.mark(2);
127                        if (bin.read() == '\t') {
128                                bin.reset();
129                                loadHeader(bin);
130                        } else {
131                                bin.reset();
132                        }
133                        while ((fileLine = bin.readLine()) != null) {
134                                if (fileLine.length() > 0) {
135                                        table = addSymbolTable(fileLine);
136                                        table.load(bin);
137                                }
138                        }
139                        bin.close();
140                } catch (IOException e) {
141                        throw new SymbolException("Could not load the symbol tables. ", e);
142                }                       
143        }
144        
145        public void load(String fileName, String charSet) throws MaltChainedException  {
146                try {
147                        load(new InputStreamReader(new FileInputStream(fileName), charSet));
148                } catch (FileNotFoundException e) {
149                        throw new SymbolException("The symbol table file '"+fileName+"' cannot be found. ", e);
150                } catch (UnsupportedEncodingException e) {
151                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
152                }               
153        }
154        
155        public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, String nullValueStrategy) throws MaltChainedException {
156                try {
157                        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet));
158                        String fileLine;
159                        SymbolTable table = addSymbolTable(tableName, columnCategory, nullValueStrategy);
160
161                        while ((fileLine = br.readLine()) != null) {
162                                table.addSymbol(fileLine.trim());
163                        }
164                        br.close();
165                        return table;
166                } catch (FileNotFoundException e) {
167                        throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e);
168                } catch (UnsupportedEncodingException e) {
169                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
170                } catch (IOException e) {
171                        throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e);
172                }
173        }
174        
175        public String printSymbolTables() throws MaltChainedException  {
176                StringBuilder sb = new StringBuilder();
177                for (HashSymbolTable table : symbolTables.values()) {
178                        sb.append(table.printSymbolTable());
179                }
180                return sb.toString();
181        }
182}