1 ////////////////////////////////////////////////////////////////////////////////
2 // checkstyle: Checks Java source code for adherence to a set of rules.
3 // Copyright (C) 2001-2015 the original author or authors.
4 //
5 // This library is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU Lesser General Public
7 // License as published by the Free Software Foundation; either
8 // version 2.1 of the License, or (at your option) any later version.
9 //
10 // This library is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with this library; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 ////////////////////////////////////////////////////////////////////////////////
19 package com.puppycrawl.tools.checkstyle.api;
20
21 import java.io.BufferedReader;
22 import java.io.File;
23 import java.io.FileInputStream;
24 import java.io.IOException;
25 import java.io.InputStreamReader;
26 import java.io.Reader;
27 import java.io.StringReader;
28 import java.io.UnsupportedEncodingException;
29 import java.nio.ByteBuffer;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetDecoder;
32 import java.nio.charset.CodingErrorAction;
33 import java.nio.charset.UnsupportedCharsetException;
34 import java.util.AbstractList;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.ConcurrentModificationException;
38 import java.util.List;
39 import java.util.regex.Matcher;
40 import java.util.regex.Pattern;
41
42 /**
43 * Represents the text contents of a file of arbitrary plain text type.
44 * <p>
45 * This class will be passed to instances of class FileSetCheck by
46 * Checker. It implements a string list to ensure backwards
47 * compatibility, but can be extended in the future to allow more
48 * flexible, more powerful or more efficient handling of certain
49 * situations.
50 * </p>
51 *
52 * @author Martin von Gagern
53 */
54 public final class FileText extends AbstractList<String>
55 {
56
57 /**
58 * The number of characters to read in one go.
59 */
60 private static final int READ_BUFFER_SIZE = 1024;
61
62 /**
63 * Regular expression pattern matching all line terminators.
64 */
65 private static final Pattern LINE_TERMINATOR =
66 Utils.getPattern("\\n|\\r\\n?");
67
68 // For now, we always keep both full text and lines array.
69 // In the long run, however, the one passed at initialization might be
70 // enough, while the other could be lazily created when requested.
71 // This would save memory but cost CPU cycles.
72
73 /**
74 * The name of the file.
75 * <code>null</code> if no file name is available for whatever reason.
76 */
77 private final File file;
78
79 /**
80 * The charset used to read the file.
81 * <code>null</code> if the file was reconstructed from a list of lines.
82 */
83 private final Charset charset;
84
85 /**
86 * The full text contents of the file.
87 */
88 private final String fullText;
89
90 /**
91 * The lines of the file, without terminators.
92 */
93 private final String[] lines;
94
95 /**
96 * The first position of each line within the full text.
97 */
98 private int[] lineBreaks;
99
100 /**
101 * Creates a new file text representation.
102 *
103 * The file will be read using the specified encoding, replacing
104 * malformed input and unmappable characters with the default
105 * replacement character.
106 *
107 * @param file the name of the file
108 * @param charsetName the encoding to use when reading the file
109 * @throws NullPointerException if the text is null
110 * @throws IOException if the file could not be read
111 */
112 public FileText(File file, String charsetName) throws IOException
113 {
114 this.file = file;
115
116 // We use our own decoder, to be sure we have complete control
117 // about replacements.
118 final CharsetDecoder decoder;
119 try {
120 charset = Charset.forName(charsetName);
121 decoder = charset.newDecoder();
122 decoder.onMalformedInput(CodingErrorAction.REPLACE);
123 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
124 }
125 catch (final UnsupportedCharsetException ex) {
126 final String message = "Unsupported charset: " + charsetName;
127 final UnsupportedEncodingException ex2;
128 ex2 = new UnsupportedEncodingException(message);
129 ex2.initCause(ex);
130 throw ex2;
131 }
132
133 final char[] chars = new char[READ_BUFFER_SIZE];
134 final StringBuilder buf = new StringBuilder();
135 final FileInputStream stream = new FileInputStream(file);
136 final Reader reader = new InputStreamReader(stream, decoder);
137 try {
138 while (true) {
139 final int len = reader.read(chars);
140 if (len < 0) {
141 break;
142 }
143 buf.append(chars, 0, len);
144 }
145 }
146 finally {
147 Utils.closeQuietly(reader);
148 }
149 // buf.trimToSize(); // could be used instead of toString().
150 fullText = buf.toString();
151
152 // Use the BufferedReader to break down the lines as this
153 // is about 30% faster than using the
154 // LINE_TERMINATOR.split(fullText, -1) method
155 final ArrayList<String> lines = new ArrayList<>();
156 final BufferedReader br =
157 new BufferedReader(new StringReader(fullText));
158 for (;;) {
159 final String l = br.readLine();
160 if (null == l) {
161 break;
162 }
163 lines.add(l);
164 }
165 this.lines = lines.toArray(new String[lines.size()]);
166 }
167
168 /**
169 * Compatibility constructor.
170 *
171 * This constructor reconstructs the text of the file by joining
172 * lines with linefeed characters. This process does not restore
173 * the original line terminators and should therefore be avoided.
174 *
175 * @param file the name of the file
176 * @param lines the lines of the text, without terminators
177 * @throws NullPointerException if the lines array is null
178 */
179 private FileText(File file, List<String> lines)
180 {
181 final StringBuilder buf = new StringBuilder();
182 for (final String line : lines) {
183 buf.append(line).append('\n');
184 }
185 buf.trimToSize();
186
187 this.file = file;
188 charset = null;
189 fullText = buf.toString();
190 this.lines = lines.toArray(new String[lines.size()]);
191 }
192
193 /**
194 * Compatibility conversion.
195 *
196 * This method can be used to convert the arguments passed to
197 * {@link FileSetCheck#process(File,List)} to a FileText
198 * object. If the list of lines already is a FileText, it is
199 * returned as is. Otherwise, a new FileText is constructed by
200 * joining the lines using line feed characters.
201 *
202 * @param file the name of the file
203 * @param lines the lines of the text, without terminators
204 * @return an object representing the denoted text file
205 */
206 public static FileText fromLines(File file, List<String> lines)
207 {
208 return lines instanceof FileText
209 ? (FileText) lines
210 : new FileText(file, lines);
211 }
212
213 /**
214 * Get the name of the file.
215 * @return an object containing the name of the file
216 */
217 public File getFile()
218 {
219 return file;
220 }
221
222 /**
223 * Get the character set which was used to read the file.
224 * Will be <code>null</code> for a file reconstructed from its lines.
225 * @return the charset used when the file was read
226 */
227 public Charset getCharset()
228 {
229 return charset;
230 }
231
232 /**
233 * Get the binary contents of the file.
234 * The returned object must not be modified.
235 * @return a buffer containing the bytes making up the file
236 * @throws IOException if the bytes could not be read from the file
237 */
238 public ByteBuffer getBytes() throws IOException
239 {
240 // We might decide to cache file bytes in the future.
241 if (file == null) {
242 return null;
243 }
244 if (file.length() > Integer.MAX_VALUE) {
245 throw new IOException("File too large.");
246 }
247 byte[] bytes = new byte[(int) file.length() + 1];
248 final FileInputStream stream = new FileInputStream(file);
249 try {
250 int fill = 0;
251 while (true) {
252 if (fill >= bytes.length) {
253 // shouldn't happen, but it might nevertheless
254 final byte[] newBytes = new byte[bytes.length * 2 + 1];
255 System.arraycopy(bytes, 0, newBytes, 0, fill);
256 bytes = newBytes;
257 }
258 final int len = stream.read(bytes, fill,
259 bytes.length - fill);
260 if (len == -1) {
261 break;
262 }
263 fill += len;
264 }
265 return ByteBuffer.wrap(bytes, 0, fill).asReadOnlyBuffer();
266 }
267 finally {
268 Utils.closeQuietly(stream);
269 }
270 }
271
272 /**
273 * Retrieve the full text of the file.
274 * @return the full text of the file
275 */
276 public CharSequence getFullText()
277 {
278 return fullText;
279 }
280
281 /**
282 * Returns an array of all lines.
283 * {@code text.toLinesArray()} is equivalent to
284 * {@code text.toArray(new String[text.size()])}.
285 * @return an array of all lines of the text
286 */
287 public String[] toLinesArray()
288 {
289 return lines.clone();
290 }
291
292 /**
293 * Find positions of line breaks in the full text.
294 * @return an array giving the first positions of each line.
295 */
296 private int[] lineBreaks()
297 {
298 if (lineBreaks == null) {
299 final int[] lineBreaks = new int[size() + 1];
300 lineBreaks[0] = 0;
301 int lineNo = 1;
302 final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
303 while (matcher.find()) {
304 lineBreaks[lineNo++] = matcher.end();
305 }
306 if (lineNo < lineBreaks.length) {
307 lineBreaks[lineNo++] = fullText.length();
308 }
309 if (lineNo != lineBreaks.length) {
310 throw new ConcurrentModificationException("Text changed.");
311 }
312 this.lineBreaks = lineBreaks;
313 }
314 return lineBreaks;
315 }
316
317 /**
318 * Determine line and column numbers in full text.
319 * @param pos the character position in the full text
320 * @return the line and column numbers of this character
321 */
322 public LineColumn lineColumn(int pos)
323 {
324 final int[] lineBreaks = lineBreaks();
325 int lineNo = Arrays.binarySearch(lineBreaks, pos);
326 if (lineNo < 0) {
327 // we have: lineNo = -(insertion point) - 1
328 // we want: lineNo = (insertion point) - 1
329 lineNo = -lineNo - 2;
330 }
331 final int startOfLine = lineBreaks[lineNo];
332 final int columnNo = pos - startOfLine;
333 // now we have lineNo and columnNo, both starting at zero.
334 return new LineColumn(lineNo + 1, columnNo);
335 }
336
337 /**
338 * Retrieves a line of the text by its number.
339 * The returned line will not contain a trailing terminator.
340 * @param lineNo the number of the line to get, starting at zero
341 * @return the line with the given number
342 */
343 @Override
344 public String get(final int lineNo)
345 {
346 return lines[lineNo];
347 }
348
349 /**
350 * Counts the lines of the text.
351 * @return the number of lines in the text
352 */
353 @Override
354 public int size()
355 {
356 return lines.length;
357 }
358
359 }