001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2014  Oliver Burn
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019package com.puppycrawl.tools.checkstyle.api;
020
021import java.io.BufferedReader;
022import java.io.File;
023import java.io.FileInputStream;
024import java.io.IOException;
025import java.io.InputStreamReader;
026import java.io.Reader;
027import java.io.StringReader;
028import java.io.UnsupportedEncodingException;
029import java.nio.ByteBuffer;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.CodingErrorAction;
033import java.nio.charset.UnsupportedCharsetException;
034import java.util.AbstractList;
035import java.util.ArrayList;
036import java.util.Arrays;
037import java.util.ConcurrentModificationException;
038import java.util.List;
039import java.util.regex.Matcher;
040import java.util.regex.Pattern;
041
042/**
043 * Represents the text contents of a file of arbitrary plain text type.
044 * <p>
045 * This class will be passed to instances of class FileSetCheck by
046 * Checker. It implements a string list to ensure backwards
047 * compatibility, but can be extended in the future to allow more
048 * flexible, more powerful or more efficient handling of certain
049 * situations.
050 * </p>
051 *
052 * @author Martin von Gagern
053 */
054public final class FileText extends AbstractList<String>
055{
056
057    /**
058     * The number of characters to read in one go.
059     */
060    private static final int READ_BUFFER_SIZE = 1024;
061
062    /**
063     * Regular expression pattern matching all line terminators.
064     */
065    private static final Pattern LINE_TERMINATOR =
066        Utils.getPattern("\\n|\\r\\n?");
067
068    // For now, we always keep both full text and lines array.
069    // In the long run, however, the one passed at initialization might be
070    // enough, while the other could be lazily created when requested.
071    // This would save memory but cost CPU cycles.
072
073    /**
074     * The name of the file.
075     * <code>null</code> if no file name is available for whatever reason.
076     */
077    private final File file;
078
079    /**
080     * The charset used to read the file.
081     * <code>null</code> if the file was reconstructed from a list of lines.
082     */
083    private final Charset charset;
084
085    /**
086     * The full text contents of the file.
087     */
088    private final String fullText;
089
090    /**
091     * The lines of the file, without terminators.
092     */
093    private final String[] lines;
094
095    /**
096     * The first position of each line within the full text.
097     */
098    private int[] lineBreaks;
099
100    /**
101     * Creates a new file text representation.
102     *
103     * The file will be read using the specified encoding, replacing
104     * malformed input and unmappable characters with the default
105     * replacement character.
106     *
107     * @param file the name of the file
108     * @param charsetName the encoding to use when reading the file
109     * @throws NullPointerException if the text is null
110     * @throws IOException if the file could not be read
111     */
112    public FileText(File file, String charsetName) throws IOException
113    {
114        this.file = file;
115
116        // We use our own decoder, to be sure we have complete control
117        // about replacements.
118        final CharsetDecoder decoder;
119        try {
120            charset = Charset.forName(charsetName);
121            decoder = charset.newDecoder();
122            decoder.onMalformedInput(CodingErrorAction.REPLACE);
123            decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
124        }
125        catch (final UnsupportedCharsetException ex) {
126            final String message = "Unsupported charset: " + charsetName;
127            final UnsupportedEncodingException ex2;
128            ex2 = new UnsupportedEncodingException(message);
129            ex2.initCause(ex);
130            throw ex2;
131        }
132
133        final char[] chars = new char[READ_BUFFER_SIZE];
134        final StringBuilder buf = new StringBuilder();
135        final FileInputStream stream = new FileInputStream(file);
136        final Reader reader = new InputStreamReader(stream, decoder);
137        try {
138            while (true) {
139                final int len = reader.read(chars);
140                if (len < 0) {
141                    break;
142                }
143                buf.append(chars, 0, len);
144            }
145        }
146        finally {
147            Utils.closeQuietly(reader);
148        }
149        // buf.trimToSize(); // could be used instead of toString().
150        fullText = buf.toString();
151
152        // Use the BufferedReader to break down the lines as this
153        // is about 30% faster than using the
154        // LINE_TERMINATOR.split(fullText, -1) method
155        final ArrayList<String> lines = new ArrayList<String>();
156        final BufferedReader br =
157            new BufferedReader(new StringReader(fullText));
158        for (;;) {
159            final String l = br.readLine();
160            if (null == l) {
161                break;
162            }
163            lines.add(l);
164        }
165        this.lines = lines.toArray(new String[lines.size()]);
166    }
167
168    /**
169     * Compatibility constructor.
170     *
171     * This constructor reconstructs the text of the file by joining
172     * lines with linefeed characters. This process does not restore
173     * the original line terminators and should therefore be avoided.
174     *
175     * @param file the name of the file
176     * @param lines the lines of the text, without terminators
177     * @throws NullPointerException if the lines array is null
178     */
179    private FileText(File file, List<String> lines)
180    {
181        final StringBuilder buf = new StringBuilder();
182        for (final String line : lines) {
183            buf.append(line).append('\n');
184        }
185        buf.trimToSize();
186
187        this.file = file;
188        charset = null;
189        fullText = buf.toString();
190        this.lines = lines.toArray(new String[lines.size()]);
191    }
192
193    /**
194     * Compatibility conversion.
195     *
196     * This method can be used to convert the arguments passed to
197     * {@link FileSetCheck#process(File,List)} to a FileText
198     * object. If the list of lines already is a FileText, it is
199     * returned as is. Otherwise, a new FileText is constructed by
200     * joining the lines using line feed characters.
201     *
202     * @param file the name of the file
203     * @param lines the lines of the text, without terminators
204     * @return an object representing the denoted text file
205     */
206    public static FileText fromLines(File file, List<String> lines)
207    {
208        return (lines instanceof FileText)
209            ? (FileText) lines
210            : new FileText(file, lines);
211    }
212
213    /**
214     * Get the name of the file.
215     * @return an object containing the name of the file
216     */
217    public File getFile()
218    {
219        return file;
220    }
221
222    /**
223     * Get the character set which was used to read the file.
224     * Will be <code>null</code> for a file reconstructed from its lines.
225     * @return the charset used when the file was read
226     */
227    public Charset getCharset()
228    {
229        return charset;
230    }
231
232    /**
233     * Get the binary contents of the file.
234     * The returned object must not be modified.
235     * @return a buffer containing the bytes making up the file
236     * @throws IOException if the bytes could not be read from the file
237     */
238    public ByteBuffer getBytes() throws IOException
239    {
240        // We might decide to cache file bytes in the future.
241        if (file == null) {
242            return null;
243        }
244        if (file.length() > Integer.MAX_VALUE) {
245            throw new IOException("File too large.");
246        }
247        byte[] bytes = new byte[(int) file.length() + 1];
248        final FileInputStream stream = new FileInputStream(file);
249        try {
250            int fill = 0;
251            while (true) {
252                if (fill >= bytes.length) {
253                    // shouldn't happen, but it might nevertheless
254                    final byte[] newBytes = new byte[bytes.length * 2 + 1];
255                    System.arraycopy(bytes, 0, newBytes, 0, fill);
256                    bytes = newBytes;
257                }
258                final int len = stream.read(bytes, fill,
259                                            bytes.length - fill);
260                if (len == -1) {
261                    break;
262                }
263                fill += len;
264            }
265            return ByteBuffer.wrap(bytes, 0, fill).asReadOnlyBuffer();
266        }
267        finally {
268            Utils.closeQuietly(stream);
269        }
270    }
271
272    /**
273     * Retrieve the full text of the file.
274     * @return the full text of the file
275     */
276    public CharSequence getFullText()
277    {
278        return fullText;
279    }
280
281    /**
282     * Returns an array of all lines.
283     * {@code text.toLinesArray()} is equivalent to
284     * {@code text.toArray(new String[text.size()])}.
285     * @return an array of all lines of the text
286     */
287    public String[] toLinesArray()
288    {
289        return lines.clone();
290    }
291
292    /**
293     * Find positions of line breaks in the full text.
294     * @return an array giving the first positions of each line.
295     */
296    private int[] lineBreaks()
297    {
298        if (lineBreaks == null) {
299            final int[] lineBreaks = new int[size() + 1];
300            lineBreaks[0] = 0;
301            int lineNo = 1;
302            final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
303            while (matcher.find()) {
304                lineBreaks[lineNo++] = matcher.end();
305            }
306            if (lineNo < lineBreaks.length) {
307                lineBreaks[lineNo++] = fullText.length();
308            }
309            if (lineNo != lineBreaks.length) {
310                throw new ConcurrentModificationException("Text changed.");
311            }
312            this.lineBreaks = lineBreaks;
313        }
314        return lineBreaks;
315    }
316
317    /**
318     * Determine line and column numbers in full text.
319     * @param pos the character position in the full text
320     * @return the line and column numbers of this character
321     */
322    public LineColumn lineColumn(int pos)
323    {
324        final int[] lineBreaks = lineBreaks();
325        int lineNo = Arrays.binarySearch(lineBreaks, pos);
326        if (lineNo < 0) {
327            // we have: lineNo = -(insertion point) - 1
328            // we want: lineNo =  (insertion point) - 1
329            lineNo = -lineNo - 2;
330        }
331        final int startOfLine = lineBreaks[lineNo];
332        final int columnNo = pos - startOfLine;
333        // now we have lineNo and columnNo, both starting at zero.
334        return new LineColumn(lineNo + 1, columnNo);
335    }
336
337    /**
338     * Retrieves a line of the text by its number.
339     * The returned line will not contain a trailing terminator.
340     * @param lineNo the number of the line to get, starting at zero
341     * @return the line with the given number
342     */
343    @Override
344    public String get(final int lineNo)
345    {
346        return lines[lineNo];
347    }
348
349    /**
350     * Counts the lines of the text.
351     * @return the number of lines in the text
352     */
353    @Override
354    public int size()
355    {
356        return lines.length;
357    }
358
359}