TerracottaWarriors/Plugins/AndroidVulkanVideo_5.3/Source/AndroidVulkanVideo/Private/UnrealAudioOut.cpp

// ------------------------------------------------
// Copyright Joe Marshall 2024- All Rights Reserved
// ------------------------------------------------
//
// Unreal audio output implementation.
// ------------------------------------------------

#include "UnrealAudioOut.h"

#include "AudioDevice.h"
#include "Components/ActorComponent.h"
#include "Components/AudioComponent.h"
#include "Runtime/Engine/Classes/Sound/AudioSettings.h"
#include <time.h>

#include "Engine/engine.h"

#include "UnrealLogging.h"

static int64_t _getNSTime()
{
#if PLATFORM_ANDROID
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return static_cast<int64_t>(ts.tv_nsec) + (1000000000LL * static_cast<int64_t>(ts.tv_sec));
#else
    return -1;
#endif
}

static int64_t _scaleTime(int64_t val, float scalar)
{
    double dt = double(val) * double(scalar);
    return int64_t(dt);
}

void USoundWaveProceduralWithTiming::ResetAudio()
{
    USoundWaveProcedural::ResetAudio();
    QueueSilence(4096);
}

void USoundWaveProceduralWithTiming::QueueSilence(int64_t bytes)
{
    UE_LOG(LogDirectVideo, VeryVerbose, TEXT("Queue Silence %d"), bytes);
    uint8_t silence[4096];
    memset(silence, 0, 4096);
    int64_t count = 4096;
    while (bytes > 0)
    {
        if (bytes < 4096)
            count = bytes;
        bytes -= count;
        QueueAudio(silence, count);
    }
}

int32 USoundWaveProceduralWithTiming::GeneratePCMData(uint8 *PCMData, const int32 SamplesNeeded)
{
    lastBufSendTime = _getNSTime();

    if (!paused)
    {
        return USoundWaveProcedural::GeneratePCMData(PCMData, SamplesNeeded);
    }
    else
    {
        memset(PCMData, 0, SampleByteSize * SamplesNeeded);
        return SamplesNeeded;
    }
}

UnrealAudioOut::UnrealAudioOut(UActorComponent *pOwner)
    : audioSender(NULL), audioComponent(NULL), hasTimeOffset(false), presentationTimeOffset(0),
      pausedPresentationTime(-1), numChannels(0), sampleRate(0), playbackRate(1.0f),
      afterSeek(false), isPlaying(false), owner(pOwner)
{
}

void UnrealAudioOut::close()
{
    afterSeek = false;
    if (audioComponent.IsValid())
    {
        audioComponent->Stop();
        audioComponent->SetSound(NULL);
    }
    audioSender = NULL;
}

void UnrealAudioOut::init(int rate, int channels)
{
    close();
    // this object gets garbage collected at the right time because it is
    // assigned to a component below
    playbackRate = 1.0f;
    audioSender = NewObject<USoundWaveProceduralWithTiming>();
    audioSender->SetSampleRate(rate);
    audioSender->NumChannels = channels;
    audioSender->Duration = INDEFINITELY_LOOPING_DURATION;
    audioSender->bLooping = false;
    audioSender->bCanProcessAsync = true;
    audioSender->paused = false;

    if (!audioComponent.IsValid())
    {
        if (owner)
        {
            AActor *actor = owner->GetOwner<AActor>();
            if (actor)
            {
                audioComponent = actor->FindComponentByClass<UAudioComponent>();
            }
        }
    }

    if (audioComponent.IsValid())
    {
        audioComponent->SetSound(audioSender.Get());
    }
    else
    {
        FAudioDeviceHandle audioDevice = GEngine->GetMainAudioDevice();
        audioComponent = audioDevice->CreateComponent(audioSender.Get());
        if (audioComponent.IsValid())
        {
            audioComponent->bIsUISound = true;
            audioComponent->bAllowSpatialization = false;
            audioComponent->SetVolumeMultiplier(1.0f);
            audioComponent->AddToRoot();
            audioComponent->Play();
            // hold onto strong pointer to audiocomponent so it doesn't get destroyed until
            // we do

            audioComponentWeCreated = *audioComponent;
        }
        else
        {
            UE_LOGFMT(LogDirectVideo, Error, "Unable to create audio component!");
        }
    }
    if (audioComponent.IsValid())
    {
        audioComponent->Play();
    }

    numChannels = channels;
    sampleRate = rate;
    hasTimeOffset = false;
    presentationTimeOffset = 0;
    isPlaying = true;
    UE_LOG(LogDirectVideo, Verbose, TEXT("Audio component: channels %d sampleRate %d"), channels,
           sampleRate);
}

void UnrealAudioOut::initSilent()
{
    close();
    UE_LOGFMT(LogDirectVideo, Verbose, "Audio init silent");
    playbackRate = 1.0f;
    hasTimeOffset = false;
}

UnrealAudioOut::~UnrealAudioOut()
{
    if (audioComponentWeCreated)
    {
        audioComponent->RemoveFromRoot();
    }
    UE_LOGFMT(LogDirectVideo, Verbose, "Audio out destroyed");
}

void UnrealAudioOut::sendBuffer(uint8_t *buf, int bufSize, int64_t presentationTimeNS, bool reset)
{
    if (buf == NULL || bufSize == 0)
    {
        return;
    }
    auto pinnedSender = audioSender.Get();
    if (!pinnedSender)
    {
        UE_LOGFMT(LogDirectVideo, Error, "No audio sender");
        return;
    }
    if (reset)
    {
        // reset audio time clock
        // and clear audio buffers
        pinnedSender->ResetAudio();
    }

    int64_t lastSendTime = pinnedSender->lastBufSendTime;
    if (lastSendTime != 0)
    {
        int64_t samplesInSender = pinnedSender->GetAvailableAudioByteCount() / (numChannels * 2);
        if (samplesInSender <= 0)
        {
            pinnedSender->QueueSilence(4096);
            samplesInSender = 4096 / (numChannels * 2L);
        }
        int64_t timeForBuffer = (1000000000LL * samplesInSender) / static_cast<int64_t>(sampleRate);

        int64_t thisOutputTimeNS = lastSendTime + timeForBuffer;

        if (afterSeek)
        {
            // we've seeked and we've seen a video frame already
            // so add silence if required so that we can sync on the
            // already displayed video frame(s)
            afterSeek = false;
            int64_t newOffset = presentationTimeNS - thisOutputTimeNS;
            if (newOffset > (presentationTimeOffset + 10000000L))
            {
                UE_LOG(LogDirectVideo, VeryVerbose, TEXT("After seek silence"));
                // queue silence to make things line up
                int64_t difference = newOffset - presentationTimeOffset;
                int64_t silenceSamples =
                    (difference * static_cast<int64_t>(sampleRate)) / 1000000000LL;
                pinnedSender->QueueSilence(silenceSamples * 2L * numChannels);
            }
        }
        else
        {
            presentationTimeOffset = presentationTimeNS - thisOutputTimeNS;
        }
        hasTimeOffset = true;
        UE_LOG(LogDirectVideo, VeryVerbose,
               TEXT("Adjusting offset (%d) - now %ld timeForBuffer %ld lastSendTime %ld "
                    "thisSendTime %ld presentationTimeNS %ld offset %ld"),
               bufSize, _getNSTime(), timeForBuffer, lastSendTime, thisOutputTimeNS,
               presentationTimeNS, presentationTimeOffset);
    }
    pinnedSender->QueueAudio(buf, bufSize);
}

int64_t UnrealAudioOut::getPresentationTimeNS()
{
    if (!hasTimeOffset)
    {
        if (!audioSender)
        {
            // silent - initialize time to zero on first query
            // or else we miss a frame on startup
            presentationTimeOffset = -_scaleTime(_getNSTime(), playbackRate);
            hasTimeOffset = true;
        }
        else
        {
            // with audio - initialize
            return -1;
        }
    }
    int64_t retVal = _scaleTime(_getNSTime(), playbackRate) + presentationTimeOffset;
    return retVal;
}

IAudioOut::NsTime UnrealAudioOut::getWaitTimeForPresentationTime(int64_t presentationTimeNS,
                                                                 int64_t maxDurationNS)
{
    int64_t thisTime = getPresentationTimeNS();
    int64_t duration = presentationTimeNS - thisTime;
    // give a little slack for processing time
    duration = (duration * 100L) / 98L;
    if (maxDurationNS > 0 && duration > maxDurationNS)
    {
        duration = maxDurationNS;
    }
    NsTime curTime = std::chrono::steady_clock::now();
    if (duration < 0)
    {
        return curTime;
    }

    NsTime waitUntilTime = curTime + NsDuration(_scaleTime(duration, playbackRate));
    return waitUntilTime;
}

bool UnrealAudioOut::setVolume(float volume)
{
    if (!audioComponent.IsValid())
    {
        return false;
    }
    if (volume > 1.0)
        volume = 1.0;
    if (volume < 0.0)
        volume = 0.0;

    audioComponent->SetVolumeMultiplier(volume);
    return true;
}

void UnrealAudioOut::setPlaying(bool playing)
{
    if (playing == isPlaying)
    {
        return;
    }
    isPlaying = playing;
    if (playing)
    {
        // starting after a pause
        // reset presentation time offset
        if (pausedPresentationTime != -1 && hasTimeOffset)
        {
            presentationTimeOffset =
                pausedPresentationTime - _scaleTime(_getNSTime(), playbackRate);
        }
    }
    else
    {
        // pausing, save current prestime for restore
        pausedPresentationTime = getPresentationTimeNS();
    }
    if (audioSender)
    {
        audioSender->paused = !playing;
    }
}

void UnrealAudioOut::onSeek(int64_t newPresentationTimeNs, bool resetAudio)
{
    if (!audioSender)
    {
        presentationTimeOffset = newPresentationTimeNs - _scaleTime(_getNSTime(), playbackRate);
        UE_LOG(LogDirectVideo, VeryVerbose, TEXT("Seeking: pt %ld update offset %ld (rate=%f)"),
               newPresentationTimeNs, presentationTimeOffset, playbackRate);
    }
    else
    {
        // clear audio buffers
        if (resetAudio)
        {
            // after a seek, we may receive either audio or video frames
            // first. Here we clear the audio buffers, and reset the time
            //  offset so that first of either audio or video frame received
            // will reset the clock
            audioSender->ResetAudio();
            hasTimeOffset = false;
        }
        else
        {
            // we are looping - don't clear the audio buffer
            // and make a rough guess at the clock so that video
            // frames will still send out correctly and then resync clock when first audio frames
            // received
            int64_t lastSendTime = audioSender->lastBufSendTime;
            if (lastSendTime == 0)
            {
                // not actually sent anything yet - assume it will send pretty much now
                lastSendTime = _getNSTime();
            }
            int64_t samplesInSender = audioSender->GetAvailableAudioByteCount() / (numChannels * 2);
            int64_t timeForBuffer =
                (1000000000LL * samplesInSender) / static_cast<int64_t>(sampleRate);
            // when the next buffer (i.e. the one at newPresentationTimeNs)
            // will get sent at (in clock time )
            int64_t thisOutputTimeNS = lastSendTime + timeForBuffer;
            presentationTimeOffset =
                newPresentationTimeNs - _scaleTime(thisOutputTimeNS, playbackRate);
        }
    }
}

bool UnrealAudioOut::setRate(float newRate)
{
    if (audioSender)
    {
        if (newRate != 1.0 && newRate != 0.0)
        {
            UE_LOGFMT(LogDirectVideo, Error,
                      "Trying to set playback rate on video with audio, not supported yet");
            return false;
        }
        else
        {
            return true;
        }
    }
    else
    {
        if (newRate > 0)
        {
            if (hasTimeOffset)
            {
                int64_t presTime = getPresentationTimeNS();
                playbackRate = newRate;
                onSeek(presTime, false);
            }
            else
            {
                playbackRate = newRate;
            }
        }
    }
    return false;
}

int64_t UnrealAudioOut::getQueuedTimeNS()
{
    if (audioSender)
    {
        int64_t samplesInSender = audioSender->GetAvailableAudioByteCount() / (numChannels * 2);
        int64_t timeTotal = (1000000000L * samplesInSender) / sampleRate;
        return timeTotal;
    }
    else
    {
        return -1;
    }
}

void UnrealAudioOut::onHasVideoTime(int64_t newPresentationTimeNS)
{
    if (audioSender && !hasTimeOffset)
    {
        // if we haven't sent any audio yet, then set current time to this frame time,
        // and let first updating of offset add silence
        presentationTimeOffset = newPresentationTimeNS - _scaleTime(_getNSTime(), playbackRate);
        hasTimeOffset = true;
        afterSeek = true;
    }
}