001//////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code for adherence to a set of rules. 003// Copyright (C) 2001-2014 Oliver Burn 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018//////////////////////////////////////////////////////////////////////////////// 019package com.puppycrawl.tools.checkstyle.api; 020 021import java.io.BufferedReader; 022import java.io.File; 023import java.io.FileInputStream; 024import java.io.IOException; 025import java.io.InputStreamReader; 026import java.io.Reader; 027import java.io.StringReader; 028import java.io.UnsupportedEncodingException; 029import java.nio.ByteBuffer; 030import java.nio.charset.Charset; 031import java.nio.charset.CharsetDecoder; 032import java.nio.charset.CodingErrorAction; 033import java.nio.charset.UnsupportedCharsetException; 034import java.util.AbstractList; 035import java.util.ArrayList; 036import java.util.Arrays; 037import java.util.ConcurrentModificationException; 038import java.util.List; 039import java.util.regex.Matcher; 040import java.util.regex.Pattern; 041 042/** 043 * Represents the text contents of a file of arbitrary plain text type. 044 * <p> 045 * This class will be passed to instances of class FileSetCheck by 046 * Checker. It implements a string list to ensure backwards 047 * compatibility, but can be extended in the future to allow more 048 * flexible, more powerful or more efficient handling of certain 049 * situations. 050 * </p> 051 * 052 * @author Martin von Gagern 053 */ 054public final class FileText extends AbstractList<String> 055{ 056 057 /** 058 * The number of characters to read in one go. 059 */ 060 private static final int READ_BUFFER_SIZE = 1024; 061 062 /** 063 * Regular expression pattern matching all line terminators. 064 */ 065 private static final Pattern LINE_TERMINATOR = 066 Utils.getPattern("\\n|\\r\\n?"); 067 068 // For now, we always keep both full text and lines array. 069 // In the long run, however, the one passed at initialization might be 070 // enough, while the other could be lazily created when requested. 071 // This would save memory but cost CPU cycles. 072 073 /** 074 * The name of the file. 075 * <code>null</code> if no file name is available for whatever reason. 076 */ 077 private final File file; 078 079 /** 080 * The charset used to read the file. 081 * <code>null</code> if the file was reconstructed from a list of lines. 082 */ 083 private final Charset charset; 084 085 /** 086 * The full text contents of the file. 087 */ 088 private final String fullText; 089 090 /** 091 * The lines of the file, without terminators. 092 */ 093 private final String[] lines; 094 095 /** 096 * The first position of each line within the full text. 097 */ 098 private int[] lineBreaks; 099 100 /** 101 * Creates a new file text representation. 102 * 103 * The file will be read using the specified encoding, replacing 104 * malformed input and unmappable characters with the default 105 * replacement character. 106 * 107 * @param file the name of the file 108 * @param charsetName the encoding to use when reading the file 109 * @throws NullPointerException if the text is null 110 * @throws IOException if the file could not be read 111 */ 112 public FileText(File file, String charsetName) throws IOException 113 { 114 this.file = file; 115 116 // We use our own decoder, to be sure we have complete control 117 // about replacements. 118 final CharsetDecoder decoder; 119 try { 120 charset = Charset.forName(charsetName); 121 decoder = charset.newDecoder(); 122 decoder.onMalformedInput(CodingErrorAction.REPLACE); 123 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 124 } 125 catch (final UnsupportedCharsetException ex) { 126 final String message = "Unsupported charset: " + charsetName; 127 final UnsupportedEncodingException ex2; 128 ex2 = new UnsupportedEncodingException(message); 129 ex2.initCause(ex); 130 throw ex2; 131 } 132 133 final char[] chars = new char[READ_BUFFER_SIZE]; 134 final StringBuilder buf = new StringBuilder(); 135 final FileInputStream stream = new FileInputStream(file); 136 final Reader reader = new InputStreamReader(stream, decoder); 137 try { 138 while (true) { 139 final int len = reader.read(chars); 140 if (len < 0) { 141 break; 142 } 143 buf.append(chars, 0, len); 144 } 145 } 146 finally { 147 Utils.closeQuietly(reader); 148 } 149 // buf.trimToSize(); // could be used instead of toString(). 150 fullText = buf.toString(); 151 152 // Use the BufferedReader to break down the lines as this 153 // is about 30% faster than using the 154 // LINE_TERMINATOR.split(fullText, -1) method 155 final ArrayList<String> lines = new ArrayList<String>(); 156 final BufferedReader br = 157 new BufferedReader(new StringReader(fullText)); 158 for (;;) { 159 final String l = br.readLine(); 160 if (null == l) { 161 break; 162 } 163 lines.add(l); 164 } 165 this.lines = lines.toArray(new String[lines.size()]); 166 } 167 168 /** 169 * Compatibility constructor. 170 * 171 * This constructor reconstructs the text of the file by joining 172 * lines with linefeed characters. This process does not restore 173 * the original line terminators and should therefore be avoided. 174 * 175 * @param file the name of the file 176 * @param lines the lines of the text, without terminators 177 * @throws NullPointerException if the lines array is null 178 */ 179 private FileText(File file, List<String> lines) 180 { 181 final StringBuilder buf = new StringBuilder(); 182 for (final String line : lines) { 183 buf.append(line).append('\n'); 184 } 185 buf.trimToSize(); 186 187 this.file = file; 188 charset = null; 189 fullText = buf.toString(); 190 this.lines = lines.toArray(new String[lines.size()]); 191 } 192 193 /** 194 * Compatibility conversion. 195 * 196 * This method can be used to convert the arguments passed to 197 * {@link FileSetCheck#process(File,List)} to a FileText 198 * object. If the list of lines already is a FileText, it is 199 * returned as is. Otherwise, a new FileText is constructed by 200 * joining the lines using line feed characters. 201 * 202 * @param file the name of the file 203 * @param lines the lines of the text, without terminators 204 * @return an object representing the denoted text file 205 */ 206 public static FileText fromLines(File file, List<String> lines) 207 { 208 return (lines instanceof FileText) 209 ? (FileText) lines 210 : new FileText(file, lines); 211 } 212 213 /** 214 * Get the name of the file. 215 * @return an object containing the name of the file 216 */ 217 public File getFile() 218 { 219 return file; 220 } 221 222 /** 223 * Get the character set which was used to read the file. 224 * Will be <code>null</code> for a file reconstructed from its lines. 225 * @return the charset used when the file was read 226 */ 227 public Charset getCharset() 228 { 229 return charset; 230 } 231 232 /** 233 * Get the binary contents of the file. 234 * The returned object must not be modified. 235 * @return a buffer containing the bytes making up the file 236 * @throws IOException if the bytes could not be read from the file 237 */ 238 public ByteBuffer getBytes() throws IOException 239 { 240 // We might decide to cache file bytes in the future. 241 if (file == null) { 242 return null; 243 } 244 if (file.length() > Integer.MAX_VALUE) { 245 throw new IOException("File too large."); 246 } 247 byte[] bytes = new byte[(int) file.length() + 1]; 248 final FileInputStream stream = new FileInputStream(file); 249 try { 250 int fill = 0; 251 while (true) { 252 if (fill >= bytes.length) { 253 // shouldn't happen, but it might nevertheless 254 final byte[] newBytes = new byte[bytes.length * 2 + 1]; 255 System.arraycopy(bytes, 0, newBytes, 0, fill); 256 bytes = newBytes; 257 } 258 final int len = stream.read(bytes, fill, 259 bytes.length - fill); 260 if (len == -1) { 261 break; 262 } 263 fill += len; 264 } 265 return ByteBuffer.wrap(bytes, 0, fill).asReadOnlyBuffer(); 266 } 267 finally { 268 Utils.closeQuietly(stream); 269 } 270 } 271 272 /** 273 * Retrieve the full text of the file. 274 * @return the full text of the file 275 */ 276 public CharSequence getFullText() 277 { 278 return fullText; 279 } 280 281 /** 282 * Returns an array of all lines. 283 * {@code text.toLinesArray()} is equivalent to 284 * {@code text.toArray(new String[text.size()])}. 285 * @return an array of all lines of the text 286 */ 287 public String[] toLinesArray() 288 { 289 return lines.clone(); 290 } 291 292 /** 293 * Find positions of line breaks in the full text. 294 * @return an array giving the first positions of each line. 295 */ 296 private int[] lineBreaks() 297 { 298 if (lineBreaks == null) { 299 final int[] lineBreaks = new int[size() + 1]; 300 lineBreaks[0] = 0; 301 int lineNo = 1; 302 final Matcher matcher = LINE_TERMINATOR.matcher(fullText); 303 while (matcher.find()) { 304 lineBreaks[lineNo++] = matcher.end(); 305 } 306 if (lineNo < lineBreaks.length) { 307 lineBreaks[lineNo++] = fullText.length(); 308 } 309 if (lineNo != lineBreaks.length) { 310 throw new ConcurrentModificationException("Text changed."); 311 } 312 this.lineBreaks = lineBreaks; 313 } 314 return lineBreaks; 315 } 316 317 /** 318 * Determine line and column numbers in full text. 319 * @param pos the character position in the full text 320 * @return the line and column numbers of this character 321 */ 322 public LineColumn lineColumn(int pos) 323 { 324 final int[] lineBreaks = lineBreaks(); 325 int lineNo = Arrays.binarySearch(lineBreaks, pos); 326 if (lineNo < 0) { 327 // we have: lineNo = -(insertion point) - 1 328 // we want: lineNo = (insertion point) - 1 329 lineNo = -lineNo - 2; 330 } 331 final int startOfLine = lineBreaks[lineNo]; 332 final int columnNo = pos - startOfLine; 333 // now we have lineNo and columnNo, both starting at zero. 334 return new LineColumn(lineNo + 1, columnNo); 335 } 336 337 /** 338 * Retrieves a line of the text by its number. 339 * The returned line will not contain a trailing terminator. 340 * @param lineNo the number of the line to get, starting at zero 341 * @return the line with the given number 342 */ 343 @Override 344 public String get(final int lineNo) 345 { 346 return lines[lineNo]; 347 } 348 349 /** 350 * Counts the lines of the text. 351 * @return the number of lines in the text 352 */ 353 @Override 354 public int size() 355 { 356 return lines.length; 357 } 358 359}