Commit 732964fa authored by Paul Adenot's avatar Paul Adenot
Browse files

Bug 1703812 - Part 13 - Trim encoder delay and padding when decoding AAC with...

Bug 1703812 - Part 13 - Trim encoder delay and padding when decoding AAC with android/RemoteDataDecoder.cpp. r=alwu

Differential Revision: https://phabricator.services.mozilla.com/D167681
parent 86e04dad
Loading
Loading
Loading
Loading
+77 −14
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include "mozilla/java/SampleWrappers.h"
#include "mozilla/java/SurfaceAllocatorWrappers.h"
#include "mozilla/Maybe.h"
#include "mozilla/Casting.h"
#include "nsPromiseFlatString.h"
#include "nsThreadUtils.h"
#include "prlog.h"
@@ -562,15 +563,35 @@ class RemoteAudioDecoder : public RemoteDataDecoder {
    bool formatHasCSD = false;
    NS_ENSURE_SUCCESS_VOID(aFormat->ContainsKey(u"csd-0"_ns, &formatHasCSD));

    // It would be nice to instead use more specific information here, but
    // we force a byte buffer for now since this handles arbitrary codecs.
    // TODO(bug 1768564): implement further type checking for codec data.
    uint8_t* audioSpecConfig;
    uint32_t configLength;
    if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) {
      const AacCodecSpecificData& aacCodecSpecificData =
          aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>();

      mRemainingEncoderDelay = mEncoderDelay =
          aacCodecSpecificData.mEncoderDelayFrames;
      mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount;
      audioSpecConfig =
          aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Elements();
      configLength =
          aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Length();
      LOG("Android RemoteDataDecoder: Found AAC decoder delay (%" PRIu32
          " frames) and total media frames (%" PRIu64 " frames)",
          mEncoderDelay, mTotalMediaFrames);
    } else {
      // Generally not used, this class is used only for decoding AAC, but can
      // decode other codecs.
      RefPtr<MediaByteBuffer> audioCodecSpecificBinaryBlob =
          ForceGetAudioCodecSpecificBlob(aConfig.mCodecSpecificConfig);
    if (!formatHasCSD && audioCodecSpecificBinaryBlob->Length() >= 2) {
      audioSpecConfig = audioCodecSpecificBinaryBlob->Elements();
      configLength = audioCodecSpecificBinaryBlob->Length();
      LOG("Android RemoteDataDecoder: extracting generic codec-specific data.");
    }

    if (!formatHasCSD && configLength >= 2) {
      jni::ByteBuffer::LocalRef buffer(env);
      buffer = jni::ByteBuffer::New(audioCodecSpecificBinaryBlob->Elements(),
                                    audioCodecSpecificBinaryBlob->Length());
      buffer = jni::ByteBuffer::New(audioSpecConfig, configLength);
      NS_ENSURE_SUCCESS_VOID(aFormat->SetByteBuffer(u"csd-0"_ns, buffer));
    }
  }
@@ -732,25 +753,63 @@ class RemoteAudioDecoder : public RemoteDataDecoder {

    if (size > 0) {
#ifdef MOZ_SAMPLE_TYPE_S16
      const int32_t numSamples = size / 2;
      uint32_t numSamples = size / sizeof(int16_t);
      uint32_t numFrames = numSamples / mOutputChannels;
#else
#  error We only support 16-bit integer PCM
#endif
      uint32_t bufferOffset = AssertedCast<uint32_t>(offset);
      if (mRemainingEncoderDelay) {
        uint32_t toPop = std::min(numFrames, mRemainingEncoderDelay);
        bufferOffset += toPop * mOutputChannels * sizeof(int16_t);
        numFrames -= toPop;
        numSamples -= toPop * mOutputChannels;
        mRemainingEncoderDelay -= toPop;
        LOG("Dropping %" PRId32
            " audio frames, corresponding the the encoder"
            " delay. Remaining "
            "%" PRIu32 ".",
            toPop, mRemainingEncoderDelay);
      }

      mDecodedFrames += numFrames;

      if (mTotalMediaFrames && mDecodedFrames > mTotalMediaFrames) {
        uint32_t paddingFrames = std::min(mDecodedFrames - mTotalMediaFrames,
                                          AssertedCast<uint64_t>(numFrames));
        // This needs to trim the buffer, removing elements at the end: simply
        // updating the frame count is enough.
        numFrames -= AssertedCast<int32_t>(paddingFrames);
        numSamples -= paddingFrames * mOutputChannels;
        // Reset the decoded frame count, so that the encoder delay and padding
        // are trimmed correctly when looping.
        mDecodedFrames = 0;
        mRemainingEncoderDelay = mEncoderDelay;

        LOG("Dropped: %u frames, corresponding to the padding", paddingFrames);
      }

      if (numSamples == 0) {
        LOG("Trimmed a whole packet, returning.");
        return;
      }

      AlignedAudioBuffer audio(numSamples);
      if (!audio) {
        Error(MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__));
        return;
      }
      jni::ByteBuffer::LocalRef dest =
          jni::ByteBuffer::New(audio.get(), numSamples * sizeof(int16_t));
      aBuffer->WriteToByteBuffer(
          dest, AssertedCast<int32_t>(bufferOffset),
          AssertedCast<int32_t>(numSamples * sizeof(int16_t)));

      jni::ByteBuffer::LocalRef dest = jni::ByteBuffer::New(audio.get(), size);
      aBuffer->WriteToByteBuffer(dest, offset, size);

      RefPtr<AudioData> data =
      RefPtr<AudioData> processed_data =
          new AudioData(0, TimeUnit::FromMicroseconds(presentationTimeUs),
                        std::move(audio), mOutputChannels, mOutputSampleRate);

      UpdateOutputStatus(std::move(data));
      UpdateOutputStatus(processed_data);
    }

    if (isEOS) {
@@ -778,6 +837,10 @@ class RemoteAudioDecoder : public RemoteDataDecoder {
  int32_t mOutputChannels{};
  int32_t mOutputSampleRate{};
  Maybe<TimeUnit> mFirstDemuxedSampleTime;
  uint64_t mDecodedFrames = 0;
  uint64_t mTotalMediaFrames = 0;
  uint32_t mEncoderDelay = 0;
  uint32_t mRemainingEncoderDelay = 0;
};

already_AddRefed<MediaDataDecoder> RemoteDataDecoder::CreateAudioDecoder(