首页 > 其他分享> 文章详细

WebRTC实现背景声音的混流

2020-04-30 18:03:58 阅读：481 来源： 互联网

标签：kTraceAudioDevice 混流 hr 背景 WEBRTC ptrAudioClientIn ERROR audio WebRTC

背景

在Windows上使用WebRTC做视频采集，然后使用RTMP进行直播推流。默认情况下WebRTC只会采集麦克风的声音，而不会采集机器的背景声音。需要编码实现背景声音的采集和混音功能。

思路

Windows提供的API中有音频采集的相关方法，官方也给出了简单的说明和示例，虽然不能运行：）。所以可以通过Windows的API来采集PCM格式的音频，然后通过WebRTC的群聊混音机制来进行音频合成

核心代码

音频采集部分

DWORD AudioCaptureCore::DoCaptureThread()
{
    keepRecording_ = true;
    HANDLE waitArray[2] = { _hShutdownCaptureEvent, _hCaptureSamplesReadyEvent };
    HRESULT hr = S_OK;

    LARGE_INTEGER t1;
    LARGE_INTEGER t2;
    int32_t time(0);

    BYTE* syncBuffer = NULL;
    UINT32 syncBufIndex = 0;

    _readSamples = 0;

    // Initialize COM as MTA in this thread.
    ScopedCOMInitializer comInit(ScopedCOMInitializer::kMTA);
    if (!comInit.succeeded()) {
        WEBRTC_TRACE(kTraceError, kTraceAudioDevice, _id,
            "failed to initialize COM in capture thread");
        return 1;
    }

    hr = InitCaptureThreadPriority();
    if (FAILED(hr))
    {
        return hr;
    }

    _Lock();


    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    REFERENCE_TIME hnsActualDuration;
    UINT32 bufferLength;
    UINT32 numFramesAvailable;
    IMMDeviceEnumerator *pEnumerator = NULL;
    IMMDevice *pDevice = NULL;
    WAVEFORMATEX *pwfx = NULL;
    UINT32 packetLength = 0;
    BOOL bDone = FALSE;
    BYTE *pData;
    DWORD flags;


    hr = CoCreateInstance(CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL,
        IID_IMMDeviceEnumerator, (void**)&pEnumerator);
    EXIT_ON_ERROR(hr);

        hr = pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
    EXIT_ON_ERROR(hr);

        hr = pDevice->Activate(IID_IAudioClient, CLSCTX_ALL, NULL, (void**)&_ptrAudioClientIn);
    EXIT_ON_ERROR(hr);

    // 
    hr = _ptrAudioClientIn->GetMixFormat(&pwfx);
    EXIT_ON_ERROR(hr);

    WAVEFORMATEX waveFormat;
    waveFormat.wFormatTag = WAVE_FORMAT_PCM;
    waveFormat.nChannels = 2;
    waveFormat.nSamplesPerSec = pwfx->nSamplesPerSec;
    waveFormat.nAvgBytesPerSec = pwfx->nSamplesPerSec * 4;
    waveFormat.wBitsPerSample = 16;
    waveFormat.nBlockAlign = 4;
    waveFormat.cbSize = 0;

    *pwfx = waveFormat;

    hr = _ptrAudioClientIn->Initialize(AUDCLNT_SHAREMODE_SHARED,
        AUDCLNT_STREAMFLAGS_LOOPBACK,
        hnsRequestedDuration,
        0,
        pwfx,
        NULL);
    EXIT_ON_ERROR(hr);

            // Set the VoE format equal to the AEC output format.
            _recAudioFrameSize = pwfx->nBlockAlign;
            _recSampleRate = pwfx->nSamplesPerSec;
            _recBlockSize = pwfx->nSamplesPerSec / 100;
            _recChannels = pwfx->nChannels;

            if (_ptrAudioBuffer)
            {
                // Update the audio buffer with the selected parameters
                _ptrAudioBuffer->SetRecordingSampleRate(_recSampleRate);
                _ptrAudioBuffer->SetRecordingChannels((uint8_t)_recChannels);
            }
            else
            {
                // We can enter this state during CoreAudioIsSupported() when no AudioDeviceImplementation
                // has been created, hence the AudioDeviceBuffer does not exist.
                // It is OK to end up here since we don't initiate any media in CoreAudioIsSupported().
                WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "AudioDeviceBuffer must be attached before streaming can start");
            }


            // Get the size of the allocated buffer.
            hr = _ptrAudioClientIn->GetBufferSize(&bufferLength);
            EXIT_ON_ERROR(hr);

            hr = _ptrAudioClientIn->GetService(__uuidof(IAudioCaptureClient), (void**)&_ptrCaptureClient);
            EXIT_ON_ERROR(hr);

            // Notify the audio sink which format to use.
                // 如上一行注释，以下的代码是将获取到的音频格式传给另外的类（自己定义的），同样的，因为
                // 手动制定了音频格式，所以就不需要通知了
            // hr = pMySink->SetFormat(pwfx);
            // EXIT_ON_ERROR(hr)

            // Calculate the actual duration of the allocated buffer.
            hnsActualDuration = (double)REFTIMES_PER_SEC * bufferLength / pwfx->nSamplesPerSec;

            //hr = _ptrAudioClientIn->Start();  // Start recording.
            //EXIT_ON_ERROR(hr);



    // Get size of capturing buffer (length is expressed as the number of audio frames the buffer can hold).
    // This value is fixed during the capturing session.
    //
            if (_ptrAudioClientIn == NULL)
    {
        WEBRTC_TRACE(kTraceError, kTraceAudioDevice, _id,
            "input state has been modified before capture loop starts.");
        return 1;
    }
            hr = _ptrAudioClientIn->GetBufferSize(&bufferLength);
    EXIT_ON_ERROR(hr);
    WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] size of buffer       : %u", bufferLength);

    // Allocate memory for sync buffer.
    // It is used for compensation between native 44.1 and internal 44.0 and
    // for cases when the capture buffer is larger than 10ms.
    //
    const UINT32 syncBufferSize = 2 * (bufferLength * _recAudioFrameSize);
    syncBuffer = new BYTE[syncBufferSize];
    if (syncBuffer == NULL)
    {
        return (DWORD)E_POINTER;
    }
    WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] size of sync buffer  : %u [bytes]", syncBufferSize);

    // Get maximum latency for the current stream (will not change for the lifetime of the IAudioClient object).
    //
    REFERENCE_TIME latency;
            _ptrAudioClientIn->GetStreamLatency(&latency);
    WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] max stream latency   : %u (%3.2f ms)",
        (DWORD)latency, (double)(latency / 10000.0));

    // Get the length of the periodic interval separating successive processing passes by
    // the audio engine on the data in the endpoint buffer.
    //
    REFERENCE_TIME devPeriod = 0;
    REFERENCE_TIME devPeriodMin = 0;
            _ptrAudioClientIn->GetDevicePeriod(&devPeriod, &devPeriodMin);
    WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] device period        : %u (%3.2f ms)",
        (DWORD)devPeriod, (double)(devPeriod / 10000.0));

    double extraDelayMS = (double)((latency + devPeriod) / 10000.0);
    WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] extraDelayMS         : %3.2f", extraDelayMS);

    double endpointBufferSizeMS = 10.0 * ((double)bufferLength / (double)_recBlockSize);
    WEBRTC_TRACE(kTraceInfo, kTraceAudioDevice, _id, "[CAPT] endpointBufferSizeMS : %3.2f", endpointBufferSizeMS);

    // Start up the capturing stream.
    //
            hr = _ptrAudioClientIn->Start();
    EXIT_ON_ERROR(hr);

    _UnLock();

    // Set event which will ensure that the calling thread modifies the recording state to true.
    //
    SetEvent(_hCaptureStartedEvent);

    // >> ---------------------------- THREAD LOOP ----------------------------


        while (keepRecording_)
        {
            BYTE *pData = 0;
            UINT32 framesAvailable = 0;
            DWORD flags = 0;
            UINT64 recTime = 0;
            UINT64 recPos = 0;

            std::cout << "bgm audio capturing" << std::endl;

            _Lock();

            // Sanity check to ensure that essential states are not modified
            // during the unlocked period.
                            if (_ptrCaptureClient == NULL || _ptrAudioClientIn == NULL)
            {
                _UnLock();
                WEBRTC_TRACE(kTraceCritical, kTraceAudioDevice, _id,
                    "input state has been modified during unlocked period");
                goto Exit;
            }

            //  Find out how much capture data is available
            //
            hr = _ptrCaptureClient->GetBuffer(&pData,           // packet which is ready to be read by used
                &framesAvailable, // #frames in the captured packet (can be zero)
                &flags,           // support flags (check)
                &recPos,          // device position of first audio frame in data packet
                &recTime);        // value of performance counter at the time of recording the first audio frame

            if (SUCCEEDED(hr))
            {
                if (AUDCLNT_S_BUFFER_EMPTY == hr)
                {
                    // Buffer was empty => start waiting for a new capture notification event
                    _UnLock();
                    continue;
                }

                if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
                {
                    // Treat all of the data in the packet as silence and ignore the actual data values.
                    WEBRTC_TRACE(kTraceWarning, kTraceAudioDevice, _id, "AUDCLNT_BUFFERFLAGS_SILENT");
                    pData = NULL;
                }

                assert(framesAvailable != 0);

                if (pData)
                {
                    CopyMemory(&syncBuffer[syncBufIndex*_recAudioFrameSize], pData, framesAvailable*_recAudioFrameSize);
                }
                else
                {
                    ZeroMemory(&syncBuffer[syncBufIndex*_recAudioFrameSize], framesAvailable*_recAudioFrameSize);
                }
                assert(syncBufferSize >= (syncBufIndex*_recAudioFrameSize) + framesAvailable*_recAudioFrameSize);

                // Release the capture buffer
                //
                hr = _ptrCaptureClient->ReleaseBuffer(framesAvailable);
                EXIT_ON_ERROR(hr);

                _readSamples += framesAvailable;
                syncBufIndex += framesAvailable;

                QueryPerformanceCounter(&t1);

                // Get the current recording and playout delay.
                uint32_t sndCardRecDelay = (uint32_t)
                    (((((UINT64)t1.QuadPart * _perfCounterFactor) - recTime)
                        / 10000) + (10 * syncBufIndex) / _recBlockSize - 10);
                uint32_t sndCardPlayDelay =
                    static_cast<uint32_t>(_sndCardPlayDelay);

                _sndCardRecDelay = sndCardRecDelay;

                while (syncBufIndex >= _recBlockSize)
                {
                    if (_ptrAudioBuffer)
                    {
                        _ptrAudioBuffer->SetRecordedBuffer((const int8_t*)syncBuffer, _recBlockSize);
                        _ptrAudioBuffer->SetVQEData(sndCardPlayDelay,
                            sndCardRecDelay,
                            0);

                        _ptrAudioBuffer->SetTypingStatus(KeyPressed());

                        QueryPerformanceCounter(&t1);    // measure time: START

                        _UnLock();  // release lock while making the callback
                        _ptrAudioBuffer->DeliverRecordedData();
                        _Lock();    // restore the lock

                        QueryPerformanceCounter(&t2);    // measure time: STOP

                                                         // Measure "average CPU load".
                                                         // Basically what we do here is to measure how many percent of our 10ms period
                                                         // is used for encoding and decoding. This value shuld be used as a warning indicator
                                                         // only and not seen as an absolute value. Running at ~100% will lead to bad QoS.
                        time = (int)(t2.QuadPart - t1.QuadPart);
                        _avgCPULoad = (float)(_avgCPULoad*.99 + (time + _playAcc) / (double)(_perfCounterFreq.QuadPart));
                        _playAcc = 0;

                        // Sanity check to ensure that essential states are not modified during the unlocked period
                                                    if (_ptrCaptureClient == NULL || _ptrAudioClientIn == NULL)
                        {
                            _UnLock();
                            WEBRTC_TRACE(kTraceCritical, kTraceAudioDevice, _id, "input state has been modified during unlocked period");
                            goto Exit;
                        }
                    }

                    // store remaining data which was not able to deliver as 10ms segment
                    MoveMemory(&syncBuffer[0], &syncBuffer[_recBlockSize*_recAudioFrameSize], (syncBufIndex - _recBlockSize)*_recAudioFrameSize);
                    syncBufIndex -= _recBlockSize;
                    sndCardRecDelay -= 10;
                }

                if (_AGC)
                {
                    uint32_t newMicLevel = _ptrAudioBuffer->NewMicLevel();
                    if (newMicLevel != 0)
                    {
                        // The VQE will only deliver non-zero microphone levels when a change is needed.
                        // Set this new mic level (received from the observer as return value in the callback).
                        WEBRTC_TRACE(kTraceStream, kTraceAudioDevice, _id, "AGC change of volume: new=%u", newMicLevel);
                        // We store this outside of the audio buffer to avoid
                        // having it overwritten by the getter thread.
                        _newMicLevel = newMicLevel;
                        SetEvent(_hSetCaptureVolumeEvent);
                    }
                }
            }
            else
            {
                // If GetBuffer returns AUDCLNT_E_BUFFER_ERROR, the thread consuming the audio samples
                // must wait for the next processing pass. The client might benefit from keeping a count
                // of the failed GetBuffer calls. If GetBuffer returns this error repeatedly, the client
                // can start a new processing loop after shutting down the current client by calling
                // IAudioClient::Stop, IAudioClient::Reset, and releasing the audio client.
                WEBRTC_TRACE(kTraceError, kTraceAudioDevice, _id,
                    "IAudioCaptureClient::GetBuffer returned AUDCLNT_E_BUFFER_ERROR, hr = 0x%08X", hr);
                goto Exit;
            }

            _UnLock();
        }

    // ---------------------------- THREAD LOOP ---------------------------- <<

            if (_ptrAudioClientIn)
    {
                    hr = _ptrAudioClientIn->Stop();
    }

Exit:
    if (FAILED(hr))
    {
                    _ptrAudioClientIn->Stop();
        _UnLock();
        _TraceCOMError(hr);
    }

    RevertCaptureThreadPriority();

    if (syncBuffer)
    {
        delete[] syncBuffer;
    }

    return (DWORD)hr;
}

声音合成

创建 webrtc::AudioConferenceMixer *audio_mixer_ = nullptr; 在使用多路声音的时候进行混音

int32_t AnyRtmpCore::RecordedDataIsAvailable(const void* audioSamples, const size_t nSamples,
    const size_t nBytesPerSample, const size_t nChannels, const uint32_t samplesPerSec, const uint32_t totalDelayMS,
    const int32_t clockDrift, const uint32_t currentMicLevel, const bool keyPressed, uint32_t& newMicLevel)
{
    std::cout << "[-----------] record data avaliable " << nSamples << nBytesPerSample << nChannels << samplesPerSec << std::endl;
    rtc::CritScope cs(&cs_audio_record_);

    if (microphone_enable_ && bgm_enable_) {
        audio_device_mixer_ptr_->RecordedDataIsAvailable(audioSamples, nSamples,
            nBytesPerSample, nChannels, samplesPerSec, totalDelayMS,
            clockDrift, currentMicLevel, keyPressed, newMicLevel);
        if (audio_mixer_) {
            audio_mixer_->Process();
        }        
    }
    else
    {
        // 当只有一种声音时，不进行混音
        if (audio_record_callback_) {
            if (audio_record_sample_hz_ != samplesPerSec || nChannels != audio_record_channels_) {
                int16_t temp_output[kMaxDataSizeSamples];
                int samples_per_channel_int = resampler_record_.Resample10Msec((int16_t*)audioSamples, samplesPerSec * nChannels,
                    audio_record_sample_hz_ * audio_record_channels_, 1, kMaxDataSizeSamples, temp_output);
                audio_record_callback_->OnRecordAudio(temp_output, audio_record_sample_hz_ / 100, nBytesPerSample, audio_record_channels_, audio_record_sample_hz_, totalDelayMS);
            }
            else {
                audio_record_callback_->OnRecordAudio(audioSamples, nSamples, nBytesPerSample, audio_record_channels_, samplesPerSec, totalDelayMS);
            }
        }
    }
        
    return 0;
}

标签：kTraceAudioDevice,混流,hr,背景,WEBRTC,ptrAudioClientIn,ERROR,audio,WebRTC
来源： https://www.cnblogs.com/vectorli/p/12810196.html

本站声明： 1. iCode9 技术分享网（下文简称本站）提供的所有内容，仅供技术学习、探讨和分享；
2. 关于本站的所有留言、评论、转载及引用，纯属内容发起人的个人观点，与本站观点和立场无关；
3. 关于本站的所有言论和文字，纯属内容发起人的个人观点，与本站观点和立场无关；
4. 本站文章均是网友提供，不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属；如您发现该文章侵犯了您的权益，可联系我们第一时间进行删除；
5. 本站为非盈利性的个人网站，所有内容不会用来进行牟利，也不会利用任何形式的广告来间接获益，纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

ICode9

WebRTC实现背景声音的混流

背景

思路

核心代码