View Javadoc
1   ////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code for adherence to a set of rules.
3   // Copyright (C) 2001-2015 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ////////////////////////////////////////////////////////////////////////////////
19  package com.puppycrawl.tools.checkstyle.api;
20  
21  import java.io.BufferedReader;
22  import java.io.File;
23  import java.io.FileInputStream;
24  import java.io.IOException;
25  import java.io.InputStreamReader;
26  import java.io.Reader;
27  import java.io.StringReader;
28  import java.io.UnsupportedEncodingException;
29  import java.nio.ByteBuffer;
30  import java.nio.charset.Charset;
31  import java.nio.charset.CharsetDecoder;
32  import java.nio.charset.CodingErrorAction;
33  import java.nio.charset.UnsupportedCharsetException;
34  import java.util.AbstractList;
35  import java.util.ArrayList;
36  import java.util.Arrays;
37  import java.util.ConcurrentModificationException;
38  import java.util.List;
39  import java.util.regex.Matcher;
40  import java.util.regex.Pattern;
41  
42  /**
43   * Represents the text contents of a file of arbitrary plain text type.
44   * <p>
45   * This class will be passed to instances of class FileSetCheck by
46   * Checker. It implements a string list to ensure backwards
47   * compatibility, but can be extended in the future to allow more
48   * flexible, more powerful or more efficient handling of certain
49   * situations.
50   * </p>
51   *
52   * @author Martin von Gagern
53   */
54  public final class FileText extends AbstractList<String>
55  {
56  
57      /**
58       * The number of characters to read in one go.
59       */
60      private static final int READ_BUFFER_SIZE = 1024;
61  
62      /**
63       * Regular expression pattern matching all line terminators.
64       */
65      private static final Pattern LINE_TERMINATOR =
66          Utils.getPattern("\\n|\\r\\n?");
67  
68      // For now, we always keep both full text and lines array.
69      // In the long run, however, the one passed at initialization might be
70      // enough, while the other could be lazily created when requested.
71      // This would save memory but cost CPU cycles.
72  
73      /**
74       * The name of the file.
75       * <code>null</code> if no file name is available for whatever reason.
76       */
77      private final File file;
78  
79      /**
80       * The charset used to read the file.
81       * <code>null</code> if the file was reconstructed from a list of lines.
82       */
83      private final Charset charset;
84  
85      /**
86       * The full text contents of the file.
87       */
88      private final String fullText;
89  
90      /**
91       * The lines of the file, without terminators.
92       */
93      private final String[] lines;
94  
95      /**
96       * The first position of each line within the full text.
97       */
98      private int[] lineBreaks;
99  
100     /**
101      * Creates a new file text representation.
102      *
103      * The file will be read using the specified encoding, replacing
104      * malformed input and unmappable characters with the default
105      * replacement character.
106      *
107      * @param file the name of the file
108      * @param charsetName the encoding to use when reading the file
109      * @throws NullPointerException if the text is null
110      * @throws IOException if the file could not be read
111      */
112     public FileText(File file, String charsetName) throws IOException
113     {
114         this.file = file;
115 
116         // We use our own decoder, to be sure we have complete control
117         // about replacements.
118         final CharsetDecoder decoder;
119         try {
120             charset = Charset.forName(charsetName);
121             decoder = charset.newDecoder();
122             decoder.onMalformedInput(CodingErrorAction.REPLACE);
123             decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
124         }
125         catch (final UnsupportedCharsetException ex) {
126             final String message = "Unsupported charset: " + charsetName;
127             final UnsupportedEncodingException ex2;
128             ex2 = new UnsupportedEncodingException(message);
129             ex2.initCause(ex);
130             throw ex2;
131         }
132 
133         final char[] chars = new char[READ_BUFFER_SIZE];
134         final StringBuilder buf = new StringBuilder();
135         final FileInputStream stream = new FileInputStream(file);
136         final Reader reader = new InputStreamReader(stream, decoder);
137         try {
138             while (true) {
139                 final int len = reader.read(chars);
140                 if (len < 0) {
141                     break;
142                 }
143                 buf.append(chars, 0, len);
144             }
145         }
146         finally {
147             Utils.closeQuietly(reader);
148         }
149         // buf.trimToSize(); // could be used instead of toString().
150         fullText = buf.toString();
151 
152         // Use the BufferedReader to break down the lines as this
153         // is about 30% faster than using the
154         // LINE_TERMINATOR.split(fullText, -1) method
155         final ArrayList<String> lines = new ArrayList<>();
156         final BufferedReader br =
157             new BufferedReader(new StringReader(fullText));
158         for (;;) {
159             final String l = br.readLine();
160             if (null == l) {
161                 break;
162             }
163             lines.add(l);
164         }
165         this.lines = lines.toArray(new String[lines.size()]);
166     }
167 
168     /**
169      * Compatibility constructor.
170      *
171      * This constructor reconstructs the text of the file by joining
172      * lines with linefeed characters. This process does not restore
173      * the original line terminators and should therefore be avoided.
174      *
175      * @param file the name of the file
176      * @param lines the lines of the text, without terminators
177      * @throws NullPointerException if the lines array is null
178      */
179     private FileText(File file, List<String> lines)
180     {
181         final StringBuilder buf = new StringBuilder();
182         for (final String line : lines) {
183             buf.append(line).append('\n');
184         }
185         buf.trimToSize();
186 
187         this.file = file;
188         charset = null;
189         fullText = buf.toString();
190         this.lines = lines.toArray(new String[lines.size()]);
191     }
192 
193     /**
194      * Compatibility conversion.
195      *
196      * This method can be used to convert the arguments passed to
197      * {@link FileSetCheck#process(File,List)} to a FileText
198      * object. If the list of lines already is a FileText, it is
199      * returned as is. Otherwise, a new FileText is constructed by
200      * joining the lines using line feed characters.
201      *
202      * @param file the name of the file
203      * @param lines the lines of the text, without terminators
204      * @return an object representing the denoted text file
205      */
206     public static FileText fromLines(File file, List<String> lines)
207     {
208         return lines instanceof FileText
209             ? (FileText) lines
210             : new FileText(file, lines);
211     }
212 
213     /**
214      * Get the name of the file.
215      * @return an object containing the name of the file
216      */
217     public File getFile()
218     {
219         return file;
220     }
221 
222     /**
223      * Get the character set which was used to read the file.
224      * Will be <code>null</code> for a file reconstructed from its lines.
225      * @return the charset used when the file was read
226      */
227     public Charset getCharset()
228     {
229         return charset;
230     }
231 
232     /**
233      * Get the binary contents of the file.
234      * The returned object must not be modified.
235      * @return a buffer containing the bytes making up the file
236      * @throws IOException if the bytes could not be read from the file
237      */
238     public ByteBuffer getBytes() throws IOException
239     {
240         // We might decide to cache file bytes in the future.
241         if (file == null) {
242             return null;
243         }
244         if (file.length() > Integer.MAX_VALUE) {
245             throw new IOException("File too large.");
246         }
247         byte[] bytes = new byte[(int) file.length() + 1];
248         final FileInputStream stream = new FileInputStream(file);
249         try {
250             int fill = 0;
251             while (true) {
252                 if (fill >= bytes.length) {
253                     // shouldn't happen, but it might nevertheless
254                     final byte[] newBytes = new byte[bytes.length * 2 + 1];
255                     System.arraycopy(bytes, 0, newBytes, 0, fill);
256                     bytes = newBytes;
257                 }
258                 final int len = stream.read(bytes, fill,
259                                             bytes.length - fill);
260                 if (len == -1) {
261                     break;
262                 }
263                 fill += len;
264             }
265             return ByteBuffer.wrap(bytes, 0, fill).asReadOnlyBuffer();
266         }
267         finally {
268             Utils.closeQuietly(stream);
269         }
270     }
271 
272     /**
273      * Retrieve the full text of the file.
274      * @return the full text of the file
275      */
276     public CharSequence getFullText()
277     {
278         return fullText;
279     }
280 
281     /**
282      * Returns an array of all lines.
283      * {@code text.toLinesArray()} is equivalent to
284      * {@code text.toArray(new String[text.size()])}.
285      * @return an array of all lines of the text
286      */
287     public String[] toLinesArray()
288     {
289         return lines.clone();
290     }
291 
292     /**
293      * Find positions of line breaks in the full text.
294      * @return an array giving the first positions of each line.
295      */
296     private int[] lineBreaks()
297     {
298         if (lineBreaks == null) {
299             final int[] lineBreaks = new int[size() + 1];
300             lineBreaks[0] = 0;
301             int lineNo = 1;
302             final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
303             while (matcher.find()) {
304                 lineBreaks[lineNo++] = matcher.end();
305             }
306             if (lineNo < lineBreaks.length) {
307                 lineBreaks[lineNo++] = fullText.length();
308             }
309             if (lineNo != lineBreaks.length) {
310                 throw new ConcurrentModificationException("Text changed.");
311             }
312             this.lineBreaks = lineBreaks;
313         }
314         return lineBreaks;
315     }
316 
317     /**
318      * Determine line and column numbers in full text.
319      * @param pos the character position in the full text
320      * @return the line and column numbers of this character
321      */
322     public LineColumn lineColumn(int pos)
323     {
324         final int[] lineBreaks = lineBreaks();
325         int lineNo = Arrays.binarySearch(lineBreaks, pos);
326         if (lineNo < 0) {
327             // we have: lineNo = -(insertion point) - 1
328             // we want: lineNo =  (insertion point) - 1
329             lineNo = -lineNo - 2;
330         }
331         final int startOfLine = lineBreaks[lineNo];
332         final int columnNo = pos - startOfLine;
333         // now we have lineNo and columnNo, both starting at zero.
334         return new LineColumn(lineNo + 1, columnNo);
335     }
336 
337     /**
338      * Retrieves a line of the text by its number.
339      * The returned line will not contain a trailing terminator.
340      * @param lineNo the number of the line to get, starting at zero
341      * @return the line with the given number
342      */
343     @Override
344     public String get(final int lineNo)
345     {
346         return lines[lineNo];
347     }
348 
349     /**
350      * Counts the lines of the text.
351      * @return the number of lines in the text
352      */
353     @Override
354     public int size()
355     {
356         return lines.length;
357     }
358 
359 }