View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.StringReader;
7   import java.util.*;
8   
9   import net.sourceforge.pmd.lang.LanguageVersion;
10  import net.sourceforge.pmd.lang.LanguageVersionHandler;
11  import net.sourceforge.pmd.lang.TokenManager;
12  import net.sourceforge.pmd.lang.java.ast.JavaParserConstants;
13  import net.sourceforge.pmd.lang.java.ast.Token;
14  
15  public class JavaTokenizer implements Tokenizer {
16  
17      public static final String IGNORE_LITERALS = "ignore_literals";
18      public static final String IGNORE_IDENTIFIERS = "ignore_identifiers";
19      public static final String IGNORE_ANNOTATIONS = "ignore_annotations";
20      public static final String CPD_START = "\"CPD-START\"";
21      public static final String CPD_END = "\"CPD-END\"";
22  
23      private boolean ignoreAnnotations;
24      private boolean ignoreLiterals;
25      private boolean ignoreIdentifiers;
26      List<Discarder> discarders = new ArrayList<Discarder>();
27  
28  
29      public void setProperties(Properties properties) {
30          ignoreAnnotations = Boolean.parseBoolean(properties.getProperty(IGNORE_ANNOTATIONS, "false"));
31          ignoreLiterals = Boolean.parseBoolean(properties.getProperty(IGNORE_LITERALS, "false"));
32          ignoreIdentifiers = Boolean.parseBoolean(properties.getProperty(IGNORE_IDENTIFIERS, "false"));
33      }
34  
35      public void tokenize(SourceCode sourceCode, Tokens tokenEntries) {
36          StringBuilder stringBuilder = sourceCode.getCodeBuffer();
37  
38          // Note that Java version is irrelevant for tokenizing
39          LanguageVersionHandler languageVersionHandler = LanguageVersion.JAVA_14.getLanguageVersionHandler();
40          String fileName = sourceCode.getFileName();
41          TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()).getTokenManager(
42                  fileName, new StringReader(stringBuilder.toString()));
43          Token currentToken = (Token) tokenMgr.getNextToken();
44  
45          initDiscarders();
46  
47          while (currentToken.image.length() > 0) {
48              for (Discarder discarder : discarders) {
49                  discarder.add(currentToken);
50              }
51  
52              if (inDiscardingState()) {
53                  currentToken = (Token) tokenMgr.getNextToken();
54                  continue;
55              }
56  
57              //skip semicolons
58              if (currentToken.kind != JavaParserConstants.SEMICOLON) {
59                  processToken(tokenEntries, fileName, currentToken);
60              }
61              currentToken = (Token) tokenMgr.getNextToken();
62          }
63          tokenEntries.add(TokenEntry.getEOF());
64      }
65  
66      private void processToken(Tokens tokenEntries, String fileName, Token currentToken) {
67          String image = currentToken.image;
68          if (ignoreLiterals
69                  && (currentToken.kind == JavaParserConstants.STRING_LITERAL
70                  || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL
71                  || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) {
72              image = String.valueOf(currentToken.kind);
73          }
74          if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) {
75              image = String.valueOf(currentToken.kind);
76          }
77          tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine));
78      }
79  
80      private void initDiscarders() {
81          if (ignoreAnnotations)
82              discarders.add(new AnnotationStateDiscarder());
83          discarders.add(new SuppressCPDDiscarder());
84          discarders.add(new KeyWordToSemiColonStateDiscarder(JavaParserConstants.IMPORT));
85          discarders.add(new KeyWordToSemiColonStateDiscarder(JavaParserConstants.PACKAGE));
86      }
87  
88      private boolean inDiscardingState() {
89          boolean discarding = false;
90          for (Discarder discarder : discarders) {
91              if (discarder.isDiscarding())
92                  discarding = true;
93          }
94          return discarding;
95      }
96  
97      public void setIgnoreLiterals(boolean ignore) {
98          this.ignoreLiterals = ignore;
99      }
100 
101     public void setIgnoreIdentifiers(boolean ignore) {
102         this.ignoreIdentifiers = ignore;
103     }
104 
105     public void setIgnoreAnnotations(boolean ignoreAnnotations) {
106         this.ignoreAnnotations = ignoreAnnotations;
107     }
108 
109     static public interface Discarder {
110         public void add(Token token);
111 
112         public boolean isDiscarding();
113     }
114 
115     static public class AnnotationStateDiscarder implements Discarder {
116 
117         Stack<Token> tokenStack = new Stack<Token>();
118 
119         public void add(Token token) {
120             if (isDiscarding() && tokenStack.size() == 2 && token.kind != JavaParserConstants.LPAREN) {
121                 tokenStack.clear();
122             }
123 
124             if (token.kind == JavaParserConstants.AT && !isDiscarding()) {
125                 tokenStack.push(token);
126                 return;
127             }
128             if (token.kind == JavaParserConstants.RPAREN && isDiscarding()) {
129                 Token popped = null;
130                 while ((popped = tokenStack.pop()).kind != JavaParserConstants.LPAREN) ;
131                 return;
132 
133             } else {
134                 if (isDiscarding())
135                     tokenStack.push(token);
136             }
137         }
138 
139         public boolean isDiscarding() {
140             return !tokenStack.isEmpty();
141         }
142 
143     }
144 
145     static public class KeyWordToSemiColonStateDiscarder implements Discarder {
146 
147         private final int keyword;
148         Stack<Token> tokenStack = new Stack<Token>();
149 
150         public KeyWordToSemiColonStateDiscarder(int keyword) {
151             this.keyword = keyword;
152         }
153 
154         public void add(Token token) {
155             if (token.kind == keyword)
156                 tokenStack.add(token);
157             if (token.kind == JavaParserConstants.SEMICOLON && isDiscarding())
158                 tokenStack.clear();
159         }
160 
161         public boolean isDiscarding() {
162             return !tokenStack.isEmpty();
163         }
164 
165     }
166 
167     static public class SuppressCPDDiscarder implements Discarder {
168         AnnotationStateDiscarder asm = new AnnotationStateDiscarder();
169         Boolean discarding = false;
170 
171         public void add(Token token) {
172             asm.add(token);
173             //if processing an annotation, look for a CPD-START or CPD-END
174             if (asm.isDiscarding()) {
175                 if (CPD_START.equals(token.image))
176                     discarding = true;
177                 if (CPD_END.equals(token.image) && discarding)
178                     discarding = false;
179             }
180         }
181 
182         public boolean isDiscarding() {
183             return discarding;
184         }
185 
186     }
187 
188 }