001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.IOException;
020import java.io.OutputStream;
021import java.io.Writer;
022import java.nio.ByteBuffer;
023import java.nio.CharBuffer;
024import java.nio.charset.Charset;
025import java.nio.charset.CharsetDecoder;
026import java.nio.charset.CoderResult;
027import java.nio.charset.CodingErrorAction;
028
029/**
030 * {@link OutputStream} implementation that transforms a byte stream to a
031 * character stream using a specified charset encoding and writes the resulting
032 * stream to a {@link Writer}. The stream is transformed using a
033 * {@link CharsetDecoder} object, guaranteeing that all charset
034 * encodings supported by the JRE are handled correctly.
035 * <p>
036 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer.
037 * This implies that the data is written to the underlying {@link Writer} in chunks
038 * that are no larger than the size of this buffer. By default, the buffer is
039 * flushed only when it overflows or when {@link #flush()} or {@link #close()}
040 * is called. In general there is therefore no need to wrap the underlying {@link Writer}
041 * in a {@link java.io.BufferedWriter}. {@link WriterOutputStream} can also
042 * be instructed to flush the buffer after each write operation. In this case, all
043 * available data is written immediately to the underlying {@link Writer}, implying that
044 * the current position of the {@link Writer} is correlated to the current position
045 * of the {@link WriterOutputStream}.
046 * <p>
047 * {@link WriterOutputStream} implements the inverse transformation of {@link java.io.OutputStreamWriter};
048 * in the following example, writing to {@code out2} would have the same result as writing to
049 * {@code out} directly (provided that the byte sequence is legal with respect to the
050 * charset encoding):
051 * <pre>
052 * OutputStream out = ...
053 * Charset cs = ...
054 * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
055 * WriterOutputStream out2 = new WriterOutputStream(writer, cs);</pre>
056 * {@link WriterOutputStream} implements the same transformation as {@link java.io.InputStreamReader},
057 * except that the control flow is reversed: both classes transform a byte stream
058 * into a character stream, but {@link java.io.InputStreamReader} pulls data from the underlying stream,
059 * while {@link WriterOutputStream} pushes it to the underlying stream.
060 * <p>
061 * Note that while there are use cases where there is no alternative to using
062 * this class, very often the need to use this class is an indication of a flaw
063 * in the design of the code. This class is typically used in situations where an existing
064 * API only accepts an {@link OutputStream} object, but where the stream is known to represent
065 * character data that must be decoded for further use.
066 * </p>
067 * <p>
068 * Instances of {@link WriterOutputStream} are not thread safe.
069 * </p>
070 *
071 * @see org.apache.commons.io.input.ReaderInputStream
072 * @since 2.0
073 */
074public class WriterOutputStream extends OutputStream {
075    private static final int BUFFER_SIZE = 1024;
076
077    private final Writer writer;
078    private final CharsetDecoder decoder;
079    private final boolean writeImmediately;
080
081    /**
082     * ByteBuffer used as input for the decoder. This buffer can be small
083     * as it is used only to transfer the received data to the
084     * decoder.
085     */
086    private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
087
088    /**
089     * CharBuffer used as output for the decoder. It should be
090     * somewhat larger as we write from this buffer to the
091     * underlying Writer.
092     */
093    private final CharBuffer decoderOut;
094
095    /**
096     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE}
097     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
098     * is called.
099     *
100     * @param writer the target {@link Writer}
101     * @param decoder the charset decoder
102     * @since 2.1
103     */
104    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
105        this(writer, decoder, BUFFER_SIZE, false);
106    }
107
108    /**
109     * Constructs a new {@link WriterOutputStream}.
110     *
111     * @param writer the target {@link Writer}
112     * @param decoder the charset decoder
113     * @param bufferSize the size of the output buffer in number of characters
114     * @param writeImmediately If {@code true} the output buffer will be flushed after each
115     *                         write operation, i.e. all available data will be written to the
116     *                         underlying {@link Writer} immediately. If {@code false}, the
117     *                         output buffer will only be flushed when it overflows or when
118     *                         {@link #flush()} or {@link #close()} is called.
119     * @since 2.1
120     */
121    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize,
122                              final boolean writeImmediately) {
123        checkIbmJdkWithBrokenUTF16( decoder.charset());
124        this.writer = writer;
125        this.decoder = decoder;
126        this.writeImmediately = writeImmediately;
127        decoderOut = CharBuffer.allocate(bufferSize);
128    }
129
130    /**
131     * Constructs a new {@link WriterOutputStream}.
132     *
133     * @param writer the target {@link Writer}
134     * @param charset the charset encoding
135     * @param bufferSize the size of the output buffer in number of characters
136     * @param writeImmediately If {@code true} the output buffer will be flushed after each
137     *                         write operation, i.e. all available data will be written to the
138     *                         underlying {@link Writer} immediately. If {@code false}, the
139     *                         output buffer will only be flushed when it overflows or when
140     *                         {@link #flush()} or {@link #close()} is called.
141     */
142    public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize,
143                              final boolean writeImmediately) {
144        this(writer,
145             charset.newDecoder()
146                    .onMalformedInput(CodingErrorAction.REPLACE)
147                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
148                    .replaceWith("?"),
149             bufferSize,
150             writeImmediately);
151    }
152
153    /**
154     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE}
155     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
156     * is called.
157     *
158     * @param writer the target {@link Writer}
159     * @param charset the charset encoding
160     */
161    public WriterOutputStream(final Writer writer, final Charset charset) {
162        this(writer, charset, BUFFER_SIZE, false);
163    }
164
165    /**
166     * Constructs a new {@link WriterOutputStream}.
167     *
168     * @param writer the target {@link Writer}
169     * @param charsetName the name of the charset encoding
170     * @param bufferSize the size of the output buffer in number of characters
171     * @param writeImmediately If {@code true} the output buffer will be flushed after each
172     *                         write operation, i.e. all available data will be written to the
173     *                         underlying {@link Writer} immediately. If {@code false}, the
174     *                         output buffer will only be flushed when it overflows or when
175     *                         {@link #flush()} or {@link #close()} is called.
176     */
177    public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize,
178                              final boolean writeImmediately) {
179        this(writer, Charset.forName(charsetName), bufferSize, writeImmediately);
180    }
181
182    /**
183     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE}
184     * characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or {@link #close()}
185     * is called.
186     *
187     * @param writer the target {@link Writer}
188     * @param charsetName the name of the charset encoding
189     */
190    public WriterOutputStream(final Writer writer, final String charsetName) {
191        this(writer, charsetName, BUFFER_SIZE, false);
192    }
193
194    /**
195     * Constructs a new {@link WriterOutputStream} that uses the default character encoding and with a default output
196     * buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when
197     * {@link #flush()} or {@link #close()} is called.
198     *
199     * @param writer the target {@link Writer}
200     * @deprecated 2.5 use {@link #WriterOutputStream(Writer, Charset)} instead
201     */
202    @Deprecated
203    public WriterOutputStream(final Writer writer) {
204        this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
205    }
206
207    /**
208     * Write bytes from the specified byte array to the stream.
209     *
210     * @param b the byte array containing the bytes to write
211     * @param off the start offset in the byte array
212     * @param len the number of bytes to write
213     * @throws IOException if an I/O error occurs.
214     */
215    @Override
216    public void write(final byte[] b, int off, int len) throws IOException {
217        while (len > 0) {
218            final int c = Math.min(len, decoderIn.remaining());
219            decoderIn.put(b, off, c);
220            processInput(false);
221            len -= c;
222            off += c;
223        }
224        if (writeImmediately) {
225            flushOutput();
226        }
227    }
228
229    /**
230     * Write bytes from the specified byte array to the stream.
231     *
232     * @param b the byte array containing the bytes to write
233     * @throws IOException if an I/O error occurs.
234     */
235    @Override
236    public void write(final byte[] b) throws IOException {
237        write(b, 0, b.length);
238    }
239
240    /**
241     * Write a single byte to the stream.
242     *
243     * @param b the byte to write
244     * @throws IOException if an I/O error occurs.
245     */
246    @Override
247    public void write(final int b) throws IOException {
248        write(new byte[] {(byte) b}, 0, 1);
249    }
250
251    /**
252     * Flush the stream. Any remaining content accumulated in the output buffer
253     * will be written to the underlying {@link Writer}. After that
254     * {@link Writer#flush()} will be called.
255     * @throws IOException if an I/O error occurs.
256     */
257    @Override
258    public void flush() throws IOException {
259        flushOutput();
260        writer.flush();
261    }
262
263    /**
264     * Close the stream. Any remaining content accumulated in the output buffer
265     * will be written to the underlying {@link Writer}. After that
266     * {@link Writer#close()} will be called.
267     * @throws IOException if an I/O error occurs.
268     */
269    @Override
270    public void close() throws IOException {
271        processInput(true);
272        flushOutput();
273        writer.close();
274    }
275
276    /**
277     * Decode the contents of the input ByteBuffer into a CharBuffer.
278     *
279     * @param endOfInput indicates end of input
280     * @throws IOException if an I/O error occurs.
281     */
282    private void processInput(final boolean endOfInput) throws IOException {
283        // Prepare decoderIn for reading
284        decoderIn.flip();
285        CoderResult coderResult;
286        while (true) {
287            coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
288            if (coderResult.isOverflow()) {
289                flushOutput();
290            } else if (coderResult.isUnderflow()) {
291                break;
292            } else {
293                // The decoder is configured to replace malformed input and unmappable characters,
294                // so we should not get here.
295                throw new IOException("Unexpected coder result");
296            }
297        }
298        // Discard the bytes that have been read
299        decoderIn.compact();
300    }
301
302    /**
303     * Flush the output.
304     *
305     * @throws IOException if an I/O error occurs.
306     */
307    private void flushOutput() throws IOException {
308        if (decoderOut.position() > 0) {
309            writer.write(decoderOut.array(), 0, decoderOut.position());
310            decoderOut.rewind();
311        }
312    }
313
314    /**
315     * Check if the JDK in use properly supports the given charset.
316     *
317     * @param charset the charset to check the support for
318     */
319    private static void checkIbmJdkWithBrokenUTF16(final Charset charset){
320        if (!"UTF-16".equals(charset.name())) {
321            return;
322        }
323        final String TEST_STRING_2 = "v\u00e9s";
324        final byte[] bytes = TEST_STRING_2.getBytes(charset);
325
326        final CharsetDecoder charsetDecoder2 = charset.newDecoder();
327        final ByteBuffer bb2 = ByteBuffer.allocate(16);
328        final CharBuffer cb2 = CharBuffer.allocate(TEST_STRING_2.length());
329        final int len = bytes.length;
330        for (int i = 0; i < len; i++) {
331            bb2.put(bytes[i]);
332            bb2.flip();
333            try {
334                charsetDecoder2.decode(bb2, cb2, i == (len - 1));
335            } catch ( final IllegalArgumentException e){
336                throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " +
337                        "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
338            }
339            bb2.compact();
340        }
341        cb2.rewind();
342        if (!TEST_STRING_2.equals(cb2.toString())){
343            throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " +
344                    "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
345        }
346
347    }
348}