Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8073700

XMLStreamWriter outputs Unicode extended characters (non-BMP) incorrectly

    Details

    • Type: Bug
    • Status: Closed
    • Priority: P4
    • Resolution: Duplicate
    • Affects Version/s: 8u31
    • Fix Version/s: None
    • Component/s: xml
    • Labels:

      Description

      FULL PRODUCT VERSION :
      java version "1.8.0_31"
      Java(TM) SE Runtime Environment (build 1.8.0_31-b13)
      Java HotSpot(TM) 64-Bit Server VM (build 25.31-b07, mixed mode)

      ADDITIONAL OS VERSION INFORMATION :
      Microsoft Windows [Version 6.1.7601]

      A DESCRIPTION OF THE PROBLEM :
      Outputting character data using XMLStreamwriter leads to incorrect results with unicode characters not in the BMP (Character.charCount(int) != 1).

      STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
      Run the following example, which attempts to write the character U+10480 (𐒀�) wrapped in a tag <el>

      import java.io.OutputStreamWriter;
      import java.nio.charset.StandardCharsets;

      import javax.xml.stream.XMLOutputFactory;
      import javax.xml.stream.XMLStreamException;
      import javax.xml.stream.XMLStreamWriter;

      public class XmlStreamWriterExtendedCharactersFail {
          public static void main(String[] args) throws XMLStreamException {
              String inlineStr = "inlineStr = 𐒀�";
              // create string using StringBuilder to avoid Java file encoding confusion:
              String sbStr = new StringBuilder("sbStr = ").appendCodePoint(0x10480).toString();
              assert sbStr.equals(inlineStr);
              System.out.println(sbStr);

              OutputStreamWriter outWriter = new OutputStreamWriter(System.out,
                      StandardCharsets.UTF_8.newEncoder());
              XMLStreamWriter writer = XMLOutputFactory.newFactory()
                      .createXMLStreamWriter(outWriter);
              writer.writeStartDocument("UTF-8", "1.1");
              writer.writeStartElement("el");
              writer.writeCharacters(sbStr);
              writer.writeEndElement();
              writer.writeEndDocument();
              writer.close();
          }
      }

      EXPECTED VERSUS ACTUAL BEHAVIOR :
      EXPECTED -
      The following output:

      sbStr = 𐒀�
      <?xml version="1.1" encoding="UTF-8"?><el>sbStr = 𐒀�</el>
      ACTUAL -
      sbStr = 𐒀�
      <?xml version="1.1" encoding="UTF-8"?><el>sbStr = &#xd801;&#xdc80;</el>

      Note that &#xd801;&#xdc80; are invalid code points and will lead to an error when parsing with SAX.

      REPRODUCIBILITY :
      This bug can be reproduced always.

      ---------- BEGIN SOURCE ----------
      import java.io.OutputStreamWriter;
      import java.nio.charset.StandardCharsets;

      import javax.xml.stream.XMLOutputFactory;
      import javax.xml.stream.XMLStreamException;
      import javax.xml.stream.XMLStreamWriter;

      public class XmlStreamWriterExtendedCharactersFail {
          public static void main(String[] args) throws XMLStreamException {
              String inlineStr = "inlineStr = 𐒀�";
              // create string using StringBuilder to avoid Java file encoding confusion:
              String sbStr = new StringBuilder("sbStr = ").appendCodePoint(0x10480).toString();
              assert sbStr.equals(inlineStr);
              System.out.println(sbStr);

              OutputStreamWriter outWriter = new OutputStreamWriter(System.out,
                      StandardCharsets.UTF_8.newEncoder());
              XMLStreamWriter writer = XMLOutputFactory.newFactory()
                      .createXMLStreamWriter(outWriter);
              writer.writeStartDocument("UTF-8", "1.1");
              writer.writeStartElement("el");
              writer.writeCharacters(sbStr);
              writer.writeEndElement();
              writer.writeEndDocument();
              writer.close();
          }
      }
      ---------- END SOURCE ----------

      CUSTOMER SUBMITTED WORKAROUND :
      workaround: wrapper around XMLStreamWriter:

      import java.io.IOException;
      import java.io.Writer;
      import java.nio.CharBuffer;

      import javax.xml.namespace.NamespaceContext;
      import javax.xml.stream.FactoryConfigurationError;
      import javax.xml.stream.XMLOutputFactory;
      import javax.xml.stream.XMLStreamException;
      import javax.xml.stream.XMLStreamWriter;

      import org.apache.commons.lang3.StringEscapeUtils;
      import org.apache.commons.lang3.text.translate.CharSequenceTranslator;

      public class UnicodeXMLStreamWriter implements XMLStreamWriter {
          private XMLStreamWriter xmlSW;
          private Writer writer;
          private CharSequenceTranslator characterEscapor = StringEscapeUtils.ESCAPE_XML11;

          public static UnicodeXMLStreamWriter newInstance(Writer writer) throws XMLStreamException, FactoryConfigurationError {
              return newInstance(writer, XMLOutputFactory.newFactory());
          }

          public static UnicodeXMLStreamWriter newInstance(Writer writer, XMLOutputFactory factory) throws XMLStreamException {
              XMLStreamWriter xmlSW = factory.createXMLStreamWriter(writer);
              return new UnicodeXMLStreamWriter(writer, xmlSW);
          }

          public UnicodeXMLStreamWriter(Writer writer, XMLStreamWriter xmlSW) {
              this.writer = writer;
              this.xmlSW = xmlSW;
          }

          public void writeCharacters(String text) throws XMLStreamException {
              // finish writing start element
              xmlSW.writeCharacters("");
              try {
                  characterEscapor.translate(text, writer);
              } catch (IOException e) {
                  throw new XMLStreamException(e);
              }
          }

          public void writeCharacters(char[] text, int start, int len)
                  throws XMLStreamException {
              // finish writing start element
              xmlSW.writeCharacters("");
              try {
                  characterEscapor.translate(CharBuffer.wrap(text, start, len), writer);
              } catch (IOException e) {
                  throw new XMLStreamException(e);
              }
          }

          //////////////// REMAINING METHODS ARE DELEGATES to xmlSW ////////////////

          public void writeStartElement(String localName) throws XMLStreamException {
              xmlSW.writeStartElement(localName);
          }

          public void writeStartElement(String namespaceURI, String localName)
                  throws XMLStreamException {
              xmlSW.writeStartElement(namespaceURI, localName);
          }

          public void writeStartElement(String prefix, String localName,
                  String namespaceURI) throws XMLStreamException {
              xmlSW.writeStartElement(prefix, localName, namespaceURI);
          }

          public void writeEmptyElement(String namespaceURI, String localName)
                  throws XMLStreamException {
              xmlSW.writeEmptyElement(namespaceURI, localName);
          }

          public void writeEmptyElement(String prefix, String localName,
                  String namespaceURI) throws XMLStreamException {
              xmlSW.writeEmptyElement(prefix, localName, namespaceURI);
          }

          public void writeEmptyElement(String localName) throws XMLStreamException {
              xmlSW.writeEmptyElement(localName);
          }

          public void writeEndElement() throws XMLStreamException {
              xmlSW.writeEndElement();
          }

          public void writeEndDocument() throws XMLStreamException {
              xmlSW.writeEndDocument();
          }

          public void close() throws XMLStreamException {
              xmlSW.close();
          }

          public void flush() throws XMLStreamException {
              xmlSW.flush();
          }

          public void writeAttribute(String localName, String value)
                  throws XMLStreamException {
              xmlSW.writeAttribute(localName, value);
          }

          public void writeAttribute(String prefix, String namespaceURI,
                  String localName, String value) throws XMLStreamException {
              xmlSW.writeAttribute(prefix, namespaceURI, localName, value);
          }

          public void writeAttribute(String namespaceURI, String localName,
                  String value) throws XMLStreamException {
              xmlSW.writeAttribute(namespaceURI, localName, value);
          }

          public void writeNamespace(String prefix, String namespaceURI)
                  throws XMLStreamException {
              xmlSW.writeNamespace(prefix, namespaceURI);
          }

          public void writeDefaultNamespace(String namespaceURI)
                  throws XMLStreamException {
              xmlSW.writeDefaultNamespace(namespaceURI);
          }

          public void writeComment(String data) throws XMLStreamException {
              xmlSW.writeComment(data);
          }

          public void writeProcessingInstruction(String target)
                  throws XMLStreamException {
              xmlSW.writeProcessingInstruction(target);
          }

          public void writeProcessingInstruction(String target, String data)
                  throws XMLStreamException {
              xmlSW.writeProcessingInstruction(target, data);
          }

          public void writeCData(String data) throws XMLStreamException {
              xmlSW.writeCData(data);
          }

          public void writeDTD(String dtd) throws XMLStreamException {
              xmlSW.writeDTD(dtd);
          }

          public void writeEntityRef(String name) throws XMLStreamException {
              xmlSW.writeEntityRef(name);
          }

          public void writeStartDocument() throws XMLStreamException {
              xmlSW.writeStartDocument();
          }

          public void writeStartDocument(String version) throws XMLStreamException {
              xmlSW.writeStartDocument(version);
          }

          public void writeStartDocument(String encoding, String version)
                  throws XMLStreamException {
              xmlSW.writeStartDocument(encoding, version);
          }

          public String getPrefix(String uri) throws XMLStreamException {
              return xmlSW.getPrefix(uri);
          }

          public void setPrefix(String prefix, String uri) throws XMLStreamException {
              xmlSW.setPrefix(prefix, uri);
          }

          public void setDefaultNamespace(String uri) throws XMLStreamException {
              xmlSW.setDefaultNamespace(uri);
          }

          public void setNamespaceContext(NamespaceContext context)
                  throws XMLStreamException {
              xmlSW.setNamespaceContext(context);
          }

          public NamespaceContext getNamespaceContext() {
              return xmlSW.getNamespaceContext();
          }

          public Object getProperty(String name) throws IllegalArgumentException {
              return xmlSW.getProperty(name);
          }
      }

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                aefimov Aleksej Efimov
                Reporter:
                webbuggrp Webbug Group
              • Votes:
                0 Vote for this issue
                Watchers:
                4 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: