From c6b3699b63a5f7a326d9d7f08093a4d6ddf05c86 Mon Sep 17 00:00:00 2001
From: Jeff Sharkey <jsharkey@android.com>
Date: Fri, 15 Jul 2022 15:57:30 -0600
Subject: [PATCH] Offer explicit 3-byte vs 4-byte modified UTF-8.

As documented in art/runtime/jni/jni_internal.cc, ART has deviated
from the RI by using a 4-byte encoding instead of the 3-byte encoding
required by the JNI specification.

Some users are okay with this 4-byte encoding (where they control
both the reading and writing logic) but other users require
compatibility with the DataOutput/DataInput API contract, so this
change lets users request either behavior.

This change now exercises all tests in both 4-byte and 3-byte modes,
and exhaustively confirms that all valid code-points match the
DataOutput/DataInput contract when in 3-byte mode.

Benchmark results still show significant performance benefits when
using this 3-byte encoding over the upstream RI:

    timeRead_Upstream_mean (ns):                  5090068
    timeRead_LocalUsing3ByteSequences_mean (ns):  1996032
    timeRead_LocalUsing4ByteSequences_mean (ns):  1813250

    timeWrite_Upstream_mean (ns):                 3856276
    timeWrite_LocalUsing3ByteSequences_mean (ns): 1632697
    timeWrite_LocalUsing4ByteSequences_mean (ns):  886503

Bug: 236923096
Test: atest FrameworksCoreTests:CharsetUtilsTest
Test: atest FrameworksCoreTests:FastDataTest
Test: atest FrameworksCoreTests:XmlTest
Test: atest FrameworksCoreTests:BinaryXmlTest
Test: ./frameworks/base/libs/hwui/tests/scripts/prep_generic.sh little && atest CorePerfTests:FastDataPerfTest
Change-Id: Ibddd36410a0d4a909522de011f23a337b53d6889
---
 .../internal/util/FastDataPerfTest.java       |  63 ++++++++--
 core/java/android/util/CharsetUtils.java      |  24 ++++
 core/java/android/util/TEST_MAPPING           |  28 +++++
 .../internal/util/BinaryXmlPullParser.java    |  13 +--
 .../internal/util/BinaryXmlSerializer.java    |   8 +-
 .../android/internal/util/FastDataInput.java  | 102 +++++++++++++++-
 .../android/internal/util/FastDataOutput.java |  75 ++++++++++--
 .../android/internal/util/ModifiedUtf8.java   | 110 ++++++++++++++++++
 .../com/android/internal/util/TEST_MAPPING    |  17 ++-
 core/jni/TEST_MAPPING                         |  16 +++
 .../coretests/src/android/util/XmlTest.java   |   2 +-
 .../android/internal/util/FastDataTest.java   |  93 ++++++++++++---
 12 files changed, 497 insertions(+), 54 deletions(-)
 create mode 100644 core/java/android/util/TEST_MAPPING
 create mode 100644 core/java/com/android/internal/util/ModifiedUtf8.java
 create mode 100644 core/jni/TEST_MAPPING

diff --git a/apct-tests/perftests/core/src/com/android/internal/util/FastDataPerfTest.java b/apct-tests/perftests/core/src/com/android/internal/util/FastDataPerfTest.java
index e3691a783bd6..76656bd67afa 100644
--- a/apct-tests/perftests/core/src/com/android/internal/util/FastDataPerfTest.java
+++ b/apct-tests/perftests/core/src/com/android/internal/util/FastDataPerfTest.java
@@ -52,19 +52,39 @@ public class FastDataPerfTest {
         while (state.keepRunning()) {
             os.reset();
             final BufferedOutputStream bos = new BufferedOutputStream(os, BUFFER_SIZE);
-            final DataOutput out = new DataOutputStream(bos);
-            doWrite(out);
-            bos.flush();
+            final DataOutputStream out = new DataOutputStream(bos);
+            try {
+                doWrite(out);
+                out.flush();
+            } finally {
+                out.close();
+            }
+        }
+    }
+
+    @Test
+    public void timeWrite_LocalUsing4ByteSequences() throws IOException {
+        final ByteArrayOutputStream os = new ByteArrayOutputStream(OUTPUT_SIZE);
+        final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
+        while (state.keepRunning()) {
+            os.reset();
+            final FastDataOutput out = FastDataOutput.obtainUsing4ByteSequences(os);
+            try {
+                doWrite(out);
+                out.flush();
+            } finally {
+                out.release();
+            }
         }
     }
 
     @Test
-    public void timeWrite_Local() throws IOException {
+    public void timeWrite_LocalUsing3ByteSequences() throws IOException {
         final ByteArrayOutputStream os = new ByteArrayOutputStream(OUTPUT_SIZE);
         final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
         while (state.keepRunning()) {
             os.reset();
-            final FastDataOutput out = FastDataOutput.obtain(os);
+            final FastDataOutput out = FastDataOutput.obtainUsing3ByteSequences(os);
             try {
                 doWrite(out);
                 out.flush();
@@ -81,19 +101,42 @@ public class FastDataPerfTest {
         while (state.keepRunning()) {
             is.reset();
             final BufferedInputStream bis = new BufferedInputStream(is, BUFFER_SIZE);
-            final DataInput in = new DataInputStream(bis);
-            doRead(in);
+            final DataInputStream in = new DataInputStream(bis);
+            try {
+                doRead(in);
+            } finally {
+                in.close();
+            }
+        }
+    }
+
+    @Test
+    public void timeRead_LocalUsing4ByteSequences() throws Exception {
+        final ByteArrayInputStream is = new ByteArrayInputStream(doWrite());
+        final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
+        while (state.keepRunning()) {
+            is.reset();
+            final FastDataInput in = FastDataInput.obtainUsing4ByteSequences(is);
+            try {
+                doRead(in);
+            } finally {
+                in.release();
+            }
         }
     }
 
     @Test
-    public void timeRead_Local() throws Exception {
+    public void timeRead_LocalUsing3ByteSequences() throws Exception {
         final ByteArrayInputStream is = new ByteArrayInputStream(doWrite());
         final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
         while (state.keepRunning()) {
             is.reset();
-            final DataInput in = new FastDataInput(is, BUFFER_SIZE);
-            doRead(in);
+            final FastDataInput in = FastDataInput.obtainUsing3ByteSequences(is);
+            try {
+                doRead(in);
+            } finally {
+                in.release();
+            }
         }
     }
 
diff --git a/core/java/android/util/CharsetUtils.java b/core/java/android/util/CharsetUtils.java
index fa146675b8d1..3b08c3b6d52f 100644
--- a/core/java/android/util/CharsetUtils.java
+++ b/core/java/android/util/CharsetUtils.java
@@ -26,6 +26,12 @@ import dalvik.annotation.optimization.FastNative;
  * <p>
  * These methods purposefully accept only non-movable byte array addresses to
  * avoid extra JNI overhead.
+ * <p>
+ * Callers are cautioned that there is a long-standing ART bug that emits
+ * non-standard 4-byte sequences, as described by {@code kUtfUse4ByteSequence}
+ * in {@code art/runtime/jni/jni_internal.cc}. If precise modified UTF-8
+ * encoding is required, use {@link com.android.internal.util.ModifiedUtf8}
+ * instead.
  *
  * @hide
  */
@@ -33,6 +39,12 @@ public class CharsetUtils {
     /**
      * Attempt to encode the given string as modified UTF-8 into the destination
      * byte array without making any new allocations.
+     * <p>
+     * Callers are cautioned that there is a long-standing ART bug that emits
+     * non-standard 4-byte sequences, as described by
+     * {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
+     * If precise modified UTF-8 encoding is required, use
+     * {@link com.android.internal.util.ModifiedUtf8} instead.
      *
      * @param src string value to be encoded
      * @param dest destination byte array to encode into
@@ -50,6 +62,12 @@ public class CharsetUtils {
     /**
      * Attempt to encode the given string as modified UTF-8 into the destination
      * byte array without making any new allocations.
+     * <p>
+     * Callers are cautioned that there is a long-standing ART bug that emits
+     * non-standard 4-byte sequences, as described by
+     * {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
+     * If precise modified UTF-8 encoding is required, use
+     * {@link com.android.internal.util.ModifiedUtf8} instead.
      *
      * @param src string value to be encoded
      * @param srcLen exact length of string to be encoded
@@ -66,6 +84,12 @@ public class CharsetUtils {
 
     /**
      * Attempt to decode a modified UTF-8 string from the source byte array.
+     * <p>
+     * Callers are cautioned that there is a long-standing ART bug that emits
+     * non-standard 4-byte sequences, as described by
+     * {@code kUtfUse4ByteSequence} in {@code art/runtime/jni/jni_internal.cc}.
+     * If precise modified UTF-8 encoding is required, use
+     * {@link com.android.internal.util.ModifiedUtf8} instead.
      *
      * @param src source byte array to decode from
      * @param srcOff offset into source where decoding should begin
diff --git a/core/java/android/util/TEST_MAPPING b/core/java/android/util/TEST_MAPPING
new file mode 100644
index 000000000000..0ae1c1593366
--- /dev/null
+++ b/core/java/android/util/TEST_MAPPING
@@ -0,0 +1,28 @@
+{
+  "presubmit": [
+    {
+      "name": "FrameworksCoreTests",
+      "options": [
+        {
+          "include-filter": "android.util.CharsetUtilsTest"
+        },
+        {
+          "include-filter": "com.android.internal.util.FastDataTest"
+        }
+      ],
+      "file_patterns": ["CharsetUtils|FastData"]
+    },
+    {
+      "name": "FrameworksCoreTests",
+      "options": [
+        {
+          "include-filter": "android.util.XmlTest"
+        },
+        {
+          "include-filter": "android.util.BinaryXmlTest"
+        }
+      ],
+      "file_patterns": ["Xml"]
+    }
+  ]
+}
diff --git a/core/java/com/android/internal/util/BinaryXmlPullParser.java b/core/java/com/android/internal/util/BinaryXmlPullParser.java
index 57552f301bd6..d3abac9f8877 100644
--- a/core/java/com/android/internal/util/BinaryXmlPullParser.java
+++ b/core/java/com/android/internal/util/BinaryXmlPullParser.java
@@ -73,12 +73,6 @@ import java.util.Objects;
  * </ul>
  */
 public final class BinaryXmlPullParser implements TypedXmlPullParser {
-    /**
-     * Default buffer size, which matches {@code FastXmlSerializer}. This should
-     * be kept in sync with {@link BinaryXmlPullParser}.
-     */
-    private static final int BUFFER_SIZE = 32_768;
-
     private FastDataInput mIn;
 
     private int mCurrentToken = START_DOCUMENT;
@@ -100,7 +94,12 @@ public final class BinaryXmlPullParser implements TypedXmlPullParser {
             throw new UnsupportedOperationException();
         }
 
-        mIn = new FastDataInput(is, BUFFER_SIZE);
+        if (mIn != null) {
+            mIn.release();
+            mIn = null;
+        }
+
+        mIn = FastDataInput.obtainUsing4ByteSequences(is);
 
         mCurrentToken = START_DOCUMENT;
         mCurrentDepth = 0;
diff --git a/core/java/com/android/internal/util/BinaryXmlSerializer.java b/core/java/com/android/internal/util/BinaryXmlSerializer.java
index f0ca1edb0b90..485430a43768 100644
--- a/core/java/com/android/internal/util/BinaryXmlSerializer.java
+++ b/core/java/com/android/internal/util/BinaryXmlSerializer.java
@@ -91,12 +91,6 @@ public final class BinaryXmlSerializer implements TypedXmlSerializer {
     static final int TYPE_BOOLEAN_TRUE = 12 << 4;
     static final int TYPE_BOOLEAN_FALSE = 13 << 4;
 
-    /**
-     * Default buffer size, which matches {@code FastXmlSerializer}. This should
-     * be kept in sync with {@link BinaryXmlPullParser}.
-     */
-    private static final int BUFFER_SIZE = 32_768;
-
     private FastDataOutput mOut;
 
     /**
@@ -124,7 +118,7 @@ public final class BinaryXmlSerializer implements TypedXmlSerializer {
             throw new UnsupportedOperationException();
         }
 
-        mOut = FastDataOutput.obtain(os);
+        mOut = FastDataOutput.obtainUsing4ByteSequences(os);
         mOut.write(PROTOCOL_MAGIC_VERSION_0);
 
         mTagCount = 0;
diff --git a/core/java/com/android/internal/util/FastDataInput.java b/core/java/com/android/internal/util/FastDataInput.java
index f8d241b5ede0..5117034815fc 100644
--- a/core/java/com/android/internal/util/FastDataInput.java
+++ b/core/java/com/android/internal/util/FastDataInput.java
@@ -30,6 +30,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.Arrays;
 import java.util.Objects;
+import java.util.concurrent.atomic.AtomicReference;
 
 /**
  * Optimized implementation of {@link DataInput} which buffers data in memory
@@ -41,13 +42,18 @@ import java.util.Objects;
 public class FastDataInput implements DataInput, Closeable {
     private static final int MAX_UNSIGNED_SHORT = 65_535;
 
+    private static final int DEFAULT_BUFFER_SIZE = 32_768;
+
+    private static AtomicReference<FastDataInput> sInCache = new AtomicReference<>();
+
     private final VMRuntime mRuntime;
-    private final InputStream mIn;
 
     private final byte[] mBuffer;
     private final long mBufferPtr;
     private final int mBufferCap;
+    private final boolean mUse4ByteSequence;
 
+    private InputStream mIn;
     private int mBufferPos;
     private int mBufferLim;
 
@@ -57,7 +63,18 @@ public class FastDataInput implements DataInput, Closeable {
     private int mStringRefCount = 0;
     private String[] mStringRefs = new String[32];
 
+    /**
+     * @deprecated callers must specify {@code use4ByteSequence} so they make a
+     *             clear choice about working around a long-standing ART bug, as
+     *             described by the {@code kUtfUse4ByteSequence} comments in
+     *             {@code art/runtime/jni/jni_internal.cc}.
+     */
+    @Deprecated
     public FastDataInput(@NonNull InputStream in, int bufferSize) {
+        this(in, bufferSize, true /* use4ByteSequence */);
+    }
+
+    public FastDataInput(@NonNull InputStream in, int bufferSize, boolean use4ByteSequence) {
         mRuntime = VMRuntime.getRuntime();
         mIn = Objects.requireNonNull(in);
         if (bufferSize < 8) {
@@ -67,6 +84,64 @@ public class FastDataInput implements DataInput, Closeable {
         mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
         mBufferPtr = mRuntime.addressOf(mBuffer);
         mBufferCap = mBuffer.length;
+        mUse4ByteSequence = use4ByteSequence;
+    }
+
+    /**
+     * Obtain a {@link FastDataInput} configured with the given
+     * {@link InputStream} and which encodes large code-points using 3-byte
+     * sequences.
+     * <p>
+     * This <em>is</em> compatible with the {@link DataInput} API contract,
+     * which specifies that large code-points must be encoded with 3-byte
+     * sequences.
+     */
+    public static FastDataInput obtainUsing3ByteSequences(@NonNull InputStream in) {
+        return new FastDataInput(in, DEFAULT_BUFFER_SIZE, false /* use4ByteSequence */);
+    }
+
+    /**
+     * Obtain a {@link FastDataInput} configured with the given
+     * {@link InputStream} and which decodes large code-points using 4-byte
+     * sequences.
+     * <p>
+     * This <em>is not</em> compatible with the {@link DataInput} API contract,
+     * which specifies that large code-points must be encoded with 3-byte
+     * sequences.
+     */
+    public static FastDataInput obtainUsing4ByteSequences(@NonNull InputStream in) {
+        FastDataInput instance = sInCache.getAndSet(null);
+        if (instance != null) {
+            instance.setInput(in);
+            return instance;
+        }
+        return new FastDataInput(in, DEFAULT_BUFFER_SIZE, true /* use4ByteSequence */);
+    }
+
+    /**
+     * Release a {@link FastDataInput} to potentially be recycled. You must not
+     * interact with the object after releasing it.
+     */
+    public void release() {
+        mIn = null;
+        mBufferPos = 0;
+        mBufferLim = 0;
+        mStringRefCount = 0;
+
+        if (mBufferCap == DEFAULT_BUFFER_SIZE && mUse4ByteSequence) {
+            // Try to return to the cache.
+            sInCache.compareAndSet(null, this);
+        }
+    }
+
+    /**
+     * Re-initializes the object for the new input.
+     */
+    private void setInput(@NonNull InputStream in) {
+        mIn = Objects.requireNonNull(in);
+        mBufferPos = 0;
+        mBufferLim = 0;
+        mStringRefCount = 0;
     }
 
     private void fill(int need) throws IOException {
@@ -90,6 +165,7 @@ public class FastDataInput implements DataInput, Closeable {
     @Override
     public void close() throws IOException {
         mIn.close();
+        release();
     }
 
     @Override
@@ -126,6 +202,14 @@ public class FastDataInput implements DataInput, Closeable {
 
     @Override
     public String readUTF() throws IOException {
+        if (mUse4ByteSequence) {
+            return readUTFUsing4ByteSequences();
+        } else {
+            return readUTFUsing3ByteSequences();
+        }
+    }
+
+    private String readUTFUsing4ByteSequences() throws IOException {
         // Attempt to read directly from buffer space if there's enough room,
         // otherwise fall back to chunking into place
         final int len = readUnsignedShort();
@@ -141,6 +225,22 @@ public class FastDataInput implements DataInput, Closeable {
         }
     }
 
+    private String readUTFUsing3ByteSequences() throws IOException {
+        // Attempt to read directly from buffer space if there's enough room,
+        // otherwise fall back to chunking into place
+        final int len = readUnsignedShort();
+        if (mBufferCap > len) {
+            if (mBufferLim - mBufferPos < len) fill(len);
+            final String res = ModifiedUtf8.decode(mBuffer, new char[len], mBufferPos, len);
+            mBufferPos += len;
+            return res;
+        } else {
+            final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
+            readFully(tmp, 0, len);
+            return ModifiedUtf8.decode(tmp, new char[len], 0, len);
+        }
+    }
+
     /**
      * Read a {@link String} value with the additional signal that the given
      * value is a candidate for being canonicalized, similar to
diff --git a/core/java/com/android/internal/util/FastDataOutput.java b/core/java/com/android/internal/util/FastDataOutput.java
index bc8496b3bdd3..c9e8f8f08229 100644
--- a/core/java/com/android/internal/util/FastDataOutput.java
+++ b/core/java/com/android/internal/util/FastDataOutput.java
@@ -42,7 +42,7 @@ import java.util.concurrent.atomic.AtomicReference;
 public class FastDataOutput implements DataOutput, Flushable, Closeable {
     private static final int MAX_UNSIGNED_SHORT = 65_535;
 
-    private static final int BUFFER_SIZE = 32_768;
+    private static final int DEFAULT_BUFFER_SIZE = 32_768;
 
     private static AtomicReference<FastDataOutput> sOutCache = new AtomicReference<>();
 
@@ -51,6 +51,7 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
     private final byte[] mBuffer;
     private final long mBufferPtr;
     private final int mBufferCap;
+    private final boolean mUse4ByteSequence;
 
     private OutputStream mOut;
     private int mBufferPos;
@@ -60,7 +61,18 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
      */
     private final HashMap<String, Short> mStringRefs = new HashMap<>();
 
+    /**
+     * @deprecated callers must specify {@code use4ByteSequence} so they make a
+     *             clear choice about working around a long-standing ART bug, as
+     *             described by the {@code kUtfUse4ByteSequence} comments in
+     *             {@code art/runtime/jni/jni_internal.cc}.
+     */
+    @Deprecated
     public FastDataOutput(@NonNull OutputStream out, int bufferSize) {
+        this(out, bufferSize, true /* use4ByteSequence */);
+    }
+
+    public FastDataOutput(@NonNull OutputStream out, int bufferSize, boolean use4ByteSequence) {
         mRuntime = VMRuntime.getRuntime();
         if (bufferSize < 8) {
             throw new IllegalArgumentException();
@@ -69,25 +81,45 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
         mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
         mBufferPtr = mRuntime.addressOf(mBuffer);
         mBufferCap = mBuffer.length;
+        mUse4ByteSequence = use4ByteSequence;
 
         setOutput(out);
     }
 
     /**
-     * Create a new FastDataOutput object or retrieve one from cache.
+     * Obtain a {@link FastDataOutput} configured with the given
+     * {@link OutputStream} and which encodes large code-points using 3-byte
+     * sequences.
+     * <p>
+     * This <em>is</em> compatible with the {@link DataOutput} API contract,
+     * which specifies that large code-points must be encoded with 3-byte
+     * sequences.
+     */
+    public static FastDataOutput obtainUsing3ByteSequences(@NonNull OutputStream out) {
+        return new FastDataOutput(out, DEFAULT_BUFFER_SIZE, false /* use4ByteSequence */);
+    }
+
+    /**
+     * Obtain a {@link FastDataOutput} configured with the given
+     * {@link OutputStream} and which encodes large code-points using 4-byte
+     * sequences.
+     * <p>
+     * This <em>is not</em> compatible with the {@link DataOutput} API contract,
+     * which specifies that large code-points must be encoded with 3-byte
+     * sequences.
      */
-    public static FastDataOutput obtain(@NonNull OutputStream out) {
+    public static FastDataOutput obtainUsing4ByteSequences(@NonNull OutputStream out) {
         FastDataOutput instance = sOutCache.getAndSet(null);
         if (instance != null) {
             instance.setOutput(out);
             return instance;
         }
-        return new FastDataOutput(out, BUFFER_SIZE);
+        return new FastDataOutput(out, DEFAULT_BUFFER_SIZE, true /* use4ByteSequence */);
     }
 
     /**
-     * Put a FastDataOutput object back into the cache.
-     * You must not touch the object after this call.
+     * Release a {@link FastDataOutput} to potentially be recycled. You must not
+     * interact with the object after releasing it.
      */
     public void release() {
         if (mBufferPos > 0) {
@@ -98,7 +130,7 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
         mBufferPos = 0;
         mStringRefs.clear();
 
-        if (mBufferCap == BUFFER_SIZE) {
+        if (mBufferCap == DEFAULT_BUFFER_SIZE && mUse4ByteSequence) {
             // Try to return to the cache.
             sOutCache.compareAndSet(null, this);
         }
@@ -156,6 +188,14 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
 
     @Override
     public void writeUTF(String s) throws IOException {
+        if (mUse4ByteSequence) {
+            writeUTFUsing4ByteSequences(s);
+        } else {
+            writeUTFUsing3ByteSequences(s);
+        }
+    }
+
+    private void writeUTFUsing4ByteSequences(String s) throws IOException {
         // Attempt to write directly to buffer space if there's enough room,
         // otherwise fall back to chunking into place
         if (mBufferCap - mBufferPos < 2 + s.length()) drain();
@@ -183,6 +223,27 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
         }
     }
 
+    private void writeUTFUsing3ByteSequences(String s) throws IOException {
+        final int len = (int) ModifiedUtf8.countBytes(s, false);
+        if (len > MAX_UNSIGNED_SHORT) {
+            throw new IOException("Modified UTF-8 length too large: " + len);
+        }
+
+        // Attempt to write directly to buffer space if there's enough room,
+        // otherwise fall back to chunking into place
+        if (mBufferCap >= 2 + len) {
+            if (mBufferCap - mBufferPos < 2 + len) drain();
+            writeShort(len);
+            ModifiedUtf8.encode(mBuffer, mBufferPos, s);
+            mBufferPos += len;
+        } else {
+            final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
+            ModifiedUtf8.encode(tmp, 0, s);
+            writeShort(len);
+            write(tmp, 0, len);
+        }
+    }
+
     /**
      * Write a {@link String} value with the additional signal that the given
      * value is a candidate for being canonicalized, similar to
diff --git a/core/java/com/android/internal/util/ModifiedUtf8.java b/core/java/com/android/internal/util/ModifiedUtf8.java
new file mode 100644
index 000000000000..a144c0034dd2
--- /dev/null
+++ b/core/java/com/android/internal/util/ModifiedUtf8.java
@@ -0,0 +1,110 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package com.android.internal.util;
+
+import java.io.UTFDataFormatException;
+
+public class ModifiedUtf8 {
+    /**
+     * Decodes a byte array containing <i>modified UTF-8</i> bytes into a string.
+     *
+     * <p>Note that although this method decodes the (supposedly impossible) zero byte to U+0000,
+     * that's what the RI does too.
+     */
+    public static String decode(byte[] in, char[] out, int offset, int utfSize)
+            throws UTFDataFormatException {
+        int count = 0, s = 0, a;
+        while (count < utfSize) {
+            if ((out[s] = (char) in[offset + count++]) < '\u0080') {
+                s++;
+            } else if (((a = out[s]) & 0xe0) == 0xc0) {
+                if (count >= utfSize) {
+                    throw new UTFDataFormatException("bad second byte at " + count);
+                }
+                int b = in[offset + count++];
+                if ((b & 0xC0) != 0x80) {
+                    throw new UTFDataFormatException("bad second byte at " + (count - 1));
+                }
+                out[s++] = (char) (((a & 0x1F) << 6) | (b & 0x3F));
+            } else if ((a & 0xf0) == 0xe0) {
+                if (count + 1 >= utfSize) {
+                    throw new UTFDataFormatException("bad third byte at " + (count + 1));
+                }
+                int b = in[offset + count++];
+                int c = in[offset + count++];
+                if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) {
+                    throw new UTFDataFormatException("bad second or third byte at " + (count - 2));
+                }
+                out[s++] = (char) (((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F));
+            } else {
+                throw new UTFDataFormatException("bad byte at " + (count - 1));
+            }
+        }
+        return new String(out, 0, s);
+    }
+
+    /**
+     * Returns the number of bytes the modified UTF-8 representation of 's' would take. Note
+     * that this is just the space for the bytes representing the characters, not the length
+     * which precedes those bytes, because different callers represent the length differently,
+     * as two, four, or even eight bytes. If {@code shortLength} is true, we'll throw an
+     * exception if the string is too long for its length to be represented by a short.
+     */
+    public static long countBytes(String s, boolean shortLength) throws UTFDataFormatException {
+        long result = 0;
+        final int length = s.length();
+        for (int i = 0; i < length; ++i) {
+            char ch = s.charAt(i);
+            if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
+                ++result;
+            } else if (ch <= 2047) {
+                result += 2;
+            } else {
+                result += 3;
+            }
+            if (shortLength && result > 65535) {
+                throw new UTFDataFormatException("String more than 65535 UTF bytes long");
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Encodes the <i>modified UTF-8</i> bytes corresponding to string {@code s} into the
+     * byte array {@code dst}, starting at the given {@code offset}.
+     */
+    public static void encode(byte[] dst, int offset, String s) {
+        final int length = s.length();
+        for (int i = 0; i < length; i++) {
+            char ch = s.charAt(i);
+            if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
+                dst[offset++] = (byte) ch;
+            } else if (ch <= 2047) {
+                dst[offset++] = (byte) (0xc0 | (0x1f & (ch >> 6)));
+                dst[offset++] = (byte) (0x80 | (0x3f & ch));
+            } else {
+                dst[offset++] = (byte) (0xe0 | (0x0f & (ch >> 12)));
+                dst[offset++] = (byte) (0x80 | (0x3f & (ch >> 6)));
+                dst[offset++] = (byte) (0x80 | (0x3f & ch));
+            }
+        }
+    }
+
+    private ModifiedUtf8() {
+    }
+}
diff --git a/core/java/com/android/internal/util/TEST_MAPPING b/core/java/com/android/internal/util/TEST_MAPPING
index 5881c5198617..41d59bbeb801 100644
--- a/core/java/com/android/internal/util/TEST_MAPPING
+++ b/core/java/com/android/internal/util/TEST_MAPPING
@@ -1,7 +1,20 @@
 {
   "presubmit": [
     {
-       "name": "ScreenshotHelperTests"
+      "name": "ScreenshotHelperTests",
+      "file_patterns": ["ScreenshotHelper"]
+    },
+    {
+      "name": "FrameworksCoreTests",
+      "options": [
+        {
+          "include-filter": "android.util.XmlTest"
+        },
+        {
+          "include-filter": "android.util.BinaryXmlTest"
+        }
+      ],
+      "file_patterns": ["Xml"]
     }
   ]
-}
\ No newline at end of file
+}
diff --git a/core/jni/TEST_MAPPING b/core/jni/TEST_MAPPING
new file mode 100644
index 000000000000..004c30ee5113
--- /dev/null
+++ b/core/jni/TEST_MAPPING
@@ -0,0 +1,16 @@
+{
+  "presubmit": [
+    {
+      "name": "FrameworksCoreTests",
+      "options": [
+        {
+          "include-filter": "android.util.CharsetUtilsTest"
+        },
+        {
+          "include-filter": "com.android.internal.util.FastDataTest"
+        }
+      ],
+      "file_patterns": ["CharsetUtils|FastData"]
+    }
+  ]
+}
diff --git a/core/tests/coretests/src/android/util/XmlTest.java b/core/tests/coretests/src/android/util/XmlTest.java
index 4e10ea926966..1cd4d139b229 100644
--- a/core/tests/coretests/src/android/util/XmlTest.java
+++ b/core/tests/coretests/src/android/util/XmlTest.java
@@ -224,7 +224,7 @@ public class XmlTest {
         doVerifyRead(in);
     }
 
-    private static final String TEST_STRING = "com.example";
+    private static final String TEST_STRING = "com☃example😀typical☃package😀name";
     private static final String TEST_STRING_EMPTY = "";
     private static final byte[] TEST_BYTES = new byte[] { 0, 1, 2, 3, 4, 3, 2, 1, 0 };
     private static final byte[] TEST_BYTES_EMPTY = new byte[0];
diff --git a/core/tests/coretests/src/com/android/internal/util/FastDataTest.java b/core/tests/coretests/src/com/android/internal/util/FastDataTest.java
index 81fb39fed026..04dfd6ee30e2 100644
--- a/core/tests/coretests/src/com/android/internal/util/FastDataTest.java
+++ b/core/tests/coretests/src/com/android/internal/util/FastDataTest.java
@@ -23,10 +23,13 @@ import static org.junit.Assert.fail;
 import android.annotation.NonNull;
 import android.util.ExceptionUtils;
 
-import androidx.test.ext.junit.runners.AndroidJUnit4;
+import libcore.util.HexEncoding;
 
+import org.junit.Assume;
 import org.junit.Test;
 import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -38,22 +41,34 @@ import java.io.EOFException;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.function.Consumer;
 
-@RunWith(AndroidJUnit4.class)
+@RunWith(Parameterized.class)
 public class FastDataTest {
+    private final boolean use4ByteSequence;
+
     private static final String TEST_SHORT_STRING = "a";
-    private static final String TEST_LONG_STRING = "com☃example☃typical☃package☃name";
+    private static final String TEST_LONG_STRING = "com☃example😀typical☃package😀name";
     private static final byte[] TEST_BYTES = TEST_LONG_STRING.getBytes(StandardCharsets.UTF_16LE);
 
+    @Parameters(name = "use4ByteSequence={0}")
+    public static Collection<Object[]> data() {
+        return Arrays.asList(new Object[][] { {true}, {false} });
+    }
+
+    public FastDataTest(boolean use4ByteSequence) {
+        this.use4ByteSequence = use4ByteSequence;
+    }
+
     @Test
     public void testEndOfFile_Int() throws Exception {
         try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
-                new byte[] { 1 }), 1000)) {
+                new byte[] { 1 }), 1000, use4ByteSequence)) {
             assertThrows(EOFException.class, () -> in.readInt());
         }
         try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
-                new byte[] { 1, 1, 1, 1 }), 1000)) {
+                new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
             assertEquals(1, in.readByte());
             assertThrows(EOFException.class, () -> in.readInt());
         }
@@ -62,11 +77,11 @@ public class FastDataTest {
     @Test
     public void testEndOfFile_String() throws Exception {
         try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
-                new byte[] { 1 }), 1000)) {
+                new byte[] { 1 }), 1000, use4ByteSequence)) {
             assertThrows(EOFException.class, () -> in.readUTF());
         }
         try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
-                new byte[] { 1, 1, 1, 1 }), 1000)) {
+                new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
             assertThrows(EOFException.class, () -> in.readUTF());
         }
     }
@@ -74,12 +89,12 @@ public class FastDataTest {
     @Test
     public void testEndOfFile_Bytes_Small() throws Exception {
         try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
-                new byte[] { 1, 1, 1, 1 }), 1000)) {
+                new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
             final byte[] tmp = new byte[10];
             assertThrows(EOFException.class, () -> in.readFully(tmp));
         }
         try (FastDataInput in = new FastDataInput(new ByteArrayInputStream(
-                new byte[] { 1, 1, 1, 1 }), 1000)) {
+                new byte[] { 1, 1, 1, 1 }), 1000, use4ByteSequence)) {
             final byte[] tmp = new byte[10_000];
             assertThrows(EOFException.class, () -> in.readFully(tmp));
         }
@@ -88,7 +103,8 @@ public class FastDataTest {
     @Test
     public void testUTF_Bounds() throws Exception {
         final char[] buf = new char[65_534];
-        try (FastDataOutput out = new FastDataOutput(new ByteArrayOutputStream(), BOUNCE_SIZE)) {
+        try (FastDataOutput out = new FastDataOutput(new ByteArrayOutputStream(),
+                BOUNCE_SIZE, use4ByteSequence)) {
             // Writing simple string will fit fine
             Arrays.fill(buf, '!');
             final String simple = new String(buf);
@@ -100,11 +116,15 @@ public class FastDataTest {
             final String complex = new String(buf);
             assertThrows(IOException.class, () -> out.writeUTF(complex));
             assertThrows(IOException.class, () -> out.writeInternedUTF(complex));
+
+            out.flush();
         }
     }
 
     @Test
     public void testTranscode() throws Exception {
+        Assume.assumeFalse(use4ByteSequence);
+
         // Verify that upstream data can be read by fast
         {
             final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
@@ -113,20 +133,22 @@ public class FastDataTest {
             out.flush();
 
             final FastDataInput in = new FastDataInput(
-                    new ByteArrayInputStream(outStream.toByteArray()), BOUNCE_SIZE);
-            doTransodeRead(in);
+                    new ByteArrayInputStream(outStream.toByteArray()),
+                    BOUNCE_SIZE, use4ByteSequence);
+            doTranscodeRead(in);
         }
 
         // Verify that fast data can be read by upstream
         {
             final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
-            final FastDataOutput out = new FastDataOutput(outStream, BOUNCE_SIZE);
+            final FastDataOutput out = new FastDataOutput(outStream,
+                    BOUNCE_SIZE, use4ByteSequence);
             doTranscodeWrite(out);
             out.flush();
 
             final DataInputStream in = new DataInputStream(
                     new ByteArrayInputStream(outStream.toByteArray()));
-            doTransodeRead(in);
+            doTranscodeRead(in);
         }
     }
 
@@ -144,7 +166,7 @@ public class FastDataTest {
         out.writeDouble(32d);
     }
 
-    private static void doTransodeRead(DataInput in) throws IOException {
+    private static void doTranscodeRead(DataInput in) throws IOException {
         assertEquals(true, in.readBoolean());
         assertEquals(false, in.readBoolean());
         assertEquals(1, in.readByte());
@@ -225,10 +247,12 @@ public class FastDataTest {
         doBounce((out) -> {
             out.writeUTF("");
             out.writeUTF("☃");
+            out.writeUTF("😀");
             out.writeUTF("example");
         }, (in) -> {
             assertEquals("", in.readUTF());
             assertEquals("☃", in.readUTF());
+            assertEquals("😀", in.readUTF());
             assertEquals("example", in.readUTF());
         });
     }
@@ -263,6 +287,35 @@ public class FastDataTest {
         }, 1);
     }
 
+    /**
+     * Verify that we encode every valid code-point identically to RI when
+     * running in 3-byte mode.
+     */
+    @Test
+    public void testBounce_UTF_Exhaustive() throws Exception {
+        Assume.assumeFalse(use4ByteSequence);
+
+        final ByteArrayOutputStream slowStream = new ByteArrayOutputStream();
+        final DataOutput slowData = new DataOutputStream(slowStream);
+
+        final ByteArrayOutputStream fastStream = new ByteArrayOutputStream();
+        final FastDataOutput fastData = FastDataOutput.obtainUsing3ByteSequences(fastStream);
+
+        for (int cp = Character.MIN_CODE_POINT; cp < Character.MAX_CODE_POINT; cp++) {
+            if (Character.isValidCodePoint(cp)) {
+                final String cpString = new String(Character.toChars(cp));
+                slowStream.reset();
+                slowData.writeUTF(cpString);
+                fastStream.reset();
+                fastData.writeUTF(cpString);
+                fastData.flush();
+                assertEquals("Bad encoding for code-point " + Integer.toHexString(cp),
+                        HexEncoding.encodeToString(slowStream.toByteArray()),
+                        HexEncoding.encodeToString(fastStream.toByteArray()));
+            }
+        }
+    }
+
     @Test
     public void testBounce_InternedUTF() throws Exception {
         doBounce((out) -> {
@@ -355,22 +408,24 @@ public class FastDataTest {
      * Verify that some common data can be written and read back, effectively
      * "bouncing" it through a serialized representation.
      */
-    private static void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
+    private void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
             @NonNull ThrowingConsumer<FastDataInput> in) throws Exception {
         doBounce(out, in, BOUNCE_REPEAT);
     }
 
-    private static void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
+    private void doBounce(@NonNull ThrowingConsumer<FastDataOutput> out,
             @NonNull ThrowingConsumer<FastDataInput> in, int count) throws Exception {
         final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
-        final FastDataOutput outData = new FastDataOutput(outStream, BOUNCE_SIZE);
+        final FastDataOutput outData = new FastDataOutput(outStream,
+                BOUNCE_SIZE, use4ByteSequence);
         for (int i = 0; i < count; i++) {
             out.accept(outData);
         }
         outData.flush();
 
         final ByteArrayInputStream inStream = new ByteArrayInputStream(outStream.toByteArray());
-        final FastDataInput inData = new FastDataInput(inStream, BOUNCE_SIZE);
+        final FastDataInput inData = new FastDataInput(inStream,
+                BOUNCE_SIZE, use4ByteSequence);
         for (int i = 0; i < count; i++) {
             in.accept(inData);
         }
-- 
GitLab