![]() |
The Java Developers Almanac 1.4Order this book from Amazon. |
e442. Implementing a FilterReader to Filter Lines Based on a Regular ExpressionA common use of regular expressions is to find all lines that match a pattern, similar to thegrep Unix command. This example implements a
FilterReader that will filter an input stream based on a pattern.
try { // Create the FilterReader String filename = "infile.txt"; String pattern = "pattern"; BufferedReader rd = new BufferedReader(new FileReader(filename)); rd = new BufferedReader(new RegexReader(rd, pattern)); // Retrieve all lines that match pattern String line = null; while ((line = rd.readLine()) != null) { // line matches the pattern } } catch (IOException e) { } // This class takes a reader and a pattern and removes lines // that don't match the pattern. // Line terminators are converted to a \n. public class RegexReader extends FilterReader { // This variable holds the current line. // If null and emitNewline is false, a newline must be fetched. String curLine; // This is the index of the first unread character in curLine. // If at any time curLineIx == curLine.length, curLine is set to null. int curLineIx; // If true, the newline at the end of curLine has not been returned. // It would have been more convenient to append the newline // onto freshly fetched lines. However, that would incur another // allocation and copy. boolean emitNewline; // Matcher used to test every line Matcher matcher; public RegexReader(BufferedReader in, String patternStr) { super(in); Pattern pattern = Pattern.compile(patternStr); matcher = pattern.matcher(""); } // This overridden method fills cbuf with characters read from in. public int read(char cbuf[], int off, int len) throws IOException { // Fetch new line if necessary if (curLine == null && !emitNewline) { getNextLine(); } // Return characters from current line if (curLine != null) { int num = Math.min(len, Math.min(cbuf.length-off, curLine.length()-curLineIx)); // Copy characters from curLine to cbuf for (int i=0; i<num; i++) { cbuf[off++] = curLine.charAt(curLineIx++); } // No more characters in curLine if (curLineIx == curLine.length()) { curLine = null; // Is there room for the newline? if (num < len && off < cbuf.length) { cbuf[off++] = '\n'; emitNewline = false; num++; } } // Return number of character read return num; } else if (emitNewline && len > 0) { // Emit just the newline cbuf[off] = '\n'; emitNewline = false; return 1; } else if (len > 0) { // No more characters left in input reader return -1; } else { // Client did not ask for any characters return 0; } } // Get next matching line private void getNextLine() throws IOException { curLine = ((BufferedReader)in).readLine(); while (curLine != null) { matcher.reset(curLine); if (matcher.find()) { emitNewline = true; curLineIx = 0; return; } curLine = ((BufferedReader)in).readLine(); } return; } public boolean ready() throws IOException { return curLine != null || emitNewline || in.ready(); } public boolean markSupported() { return false; } }
e443. Matching Line Boundaries in a Regular Expression e444. Matching Across Line Boundaries in a Regular Expression e445. Reading Lines from a String Using a Regular Expression e446. Removing Line Termination Characters from a String
© 2002 Addison-Wesley. |