1
2
3
4 package net.sourceforge.pmd.cpd;
5
6 import java.io.File;
7 import java.io.FileNotFoundException;
8 import java.io.IOException;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.Properties;
14 import java.util.Set;
15 import java.util.TreeMap;
16
17 import net.sourceforge.pmd.util.FileFinder;
18
19 public class CPD {
20
21 private static final int MISSING_FILES = 1;
22 private static final int MISSING_ARGS = 2;
23 private static final int DUPLICATE_CODE_FOUND = 4;
24
25 private CPDConfiguration configuration;
26
27 private Map<String, SourceCode> source = new TreeMap<String, SourceCode>();
28 private CPDListener listener = new CPDNullListener();
29 private Tokens tokens = new Tokens();
30 private MatchAlgorithm matchAlgorithm;
31
32 public CPD(CPDConfiguration theConfiguration) {
33 configuration = theConfiguration;
34 }
35
36 public void setCpdListener(CPDListener cpdListener) {
37 this.listener = cpdListener;
38 }
39
40 public void go() {
41 TokenEntry.clearImages();
42 matchAlgorithm = new MatchAlgorithm(
43 source, tokens,
44 configuration.minimumTileSize(),
45 listener
46 );
47 matchAlgorithm.findMatches();
48 }
49
50 public Iterator<Match> getMatches() {
51 return matchAlgorithm.matches();
52 }
53
54 public void add(File file) throws IOException {
55 add(1, file);
56 }
57
58 public void addAllInDirectory(String dir) throws IOException {
59 addDirectory(dir, false);
60 }
61
62 public void addRecursively(String dir) throws IOException {
63 addDirectory(dir, true);
64 }
65
66 public void add(List<File> files) throws IOException {
67 for (File f: files) {
68 add(files.size(), f);
69 }
70 }
71
72 private void addDirectory(String dir, boolean recurse) throws IOException {
73 if (!(new File(dir)).exists()) {
74 throw new FileNotFoundException("Couldn't find directory " + dir);
75 }
76 FileFinder finder = new FileFinder();
77
78 add(finder.findFilesFrom(dir, configuration.filenameFilter(), recurse));
79 }
80
81 private Set<String> current = new HashSet<String>();
82
83 private void add(int fileCount, File file) throws IOException {
84
85 if (configuration.skipDuplicates()) {
86
87 String signature = file.getName() + '_' + file.length();
88 if (current.contains(signature)) {
89 System.err.println("Skipping " + file.getAbsolutePath() + " since it appears to be a duplicate file and --skip-duplicate-files is set");
90 return;
91 }
92 current.add(signature);
93 }
94
95 if (!file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) {
96 System.err.println("Skipping " + file + " since it appears to be a symlink");
97 return;
98 }
99
100 listener.addedFile(fileCount, file);
101 SourceCode sourceCode = configuration.sourceCodeFor(file);
102 configuration.tokenizer().tokenize(sourceCode, tokens);
103 source.put(sourceCode.getFileName(), sourceCode);
104 }
105
106 private static void setSystemProperties(String[] args) {
107 boolean ignoreLiterals = CPDConfiguration.findBooleanSwitch(args, "--ignore-literals");
108 boolean ignoreIdentifiers = CPDConfiguration.findBooleanSwitch(args, "--ignore-identifiers");
109 boolean ignoreAnnotations = CPDConfiguration.findBooleanSwitch(args, "--ignore-annotations");
110 Properties properties = System.getProperties();
111 if (ignoreLiterals) {
112 properties.setProperty(JavaTokenizer.IGNORE_LITERALS, "true");
113 }
114 if (ignoreIdentifiers) {
115 properties.setProperty(JavaTokenizer.IGNORE_IDENTIFIERS, "true");
116 }
117 if (ignoreAnnotations) {
118 properties.setProperty(JavaTokenizer.IGNORE_ANNOTATIONS, "true");
119 }
120 System.setProperties(properties);
121 }
122
123 public static void main(String[] args) {
124 if (args.length == 0) {
125 showUsage();
126 System.exit(MISSING_ARGS);
127 }
128
129 try {
130 CPDConfiguration config = new CPDConfiguration(args);
131
132
133
134 setSystemProperties(args);
135
136 CPD cpd = new CPD(config);
137
138
139 boolean missingFiles = true;
140 for (int position = 0; position < args.length; position++) {
141 if (args[position].equals("--files")) {
142 cpd.addRecursively(args[position + 1]);
143 if ( missingFiles ) {
144 missingFiles = false;
145 }
146 }
147 }
148
149 if ( missingFiles ) {
150 System.out.println("No " + "--files" + " value passed in");
151 showUsage();
152 System.exit(MISSING_FILES);
153 }
154
155 cpd.go();
156 if (cpd.getMatches().hasNext()) {
157 System.out.println(config.renderer().render(cpd.getMatches()));
158 System.exit(DUPLICATE_CODE_FOUND);
159 }
160 } catch (Exception e) {
161 e.printStackTrace();
162 }
163 }
164
165 public static void showUsage() {
166 System.out.println("Usage:");
167 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens xxx --files xxx [--language xxx] [--encoding xxx] [--format (xml|text|csv|vs)] [--skip-duplicate-files] ");
168 System.out.println("i.e: ");
169 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files c:\\jdk14\\src\\java ");
170 System.out.println("or: ");
171 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files /path/to/c/code --language c ");
172 System.out.println("or: ");
173 System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --encoding UTF-16LE --files /path/to/java/code --format xml");
174 }
175
176 }