1. 程式人生 > >WebRTC :NetEQ原始碼分析

WebRTC :NetEQ原始碼分析

      隨著WebRTC版本的不斷更新,內部程式碼結構也有了很大的變化,但是對於底層的音視訊引擎來講,卻沒有什麼太大的變化,因為引擎早很早之前就已經達到了一個非常成熟穩定的狀態(早期的QQ音訊就已經使用了GIPS的NetEQ)。讀研期間我就研究過NetEQ中的一些理論,不過那時候研究的很淺,主要是通過查詢各種資料來學習NetEQ,其實我也很奇怪,如此有大名鼎鼎的技術,並且已經開源了,為什麼網上研究它的資料少之又少,除了西安電子大學吳江銳的那篇碩士論文,基本上就沒有任何有價值的資料了。估計自己以後還會持續的研究NetEQ,所以準備一邊研究一邊記錄。

      NetEQ是GIPS公司的核心音訊引擎技術,後來GIPS公司被Google收購,這項技術落到了Google手裡並隨著WebRTC的開源一起被公之於眾。NetEQ是從接收端來處理語音包的,主要的功能是抖動消除,丟包隱藏,在網路延遲大的時候降低丟包率,在網路條件好的時候減小時延。

      M66版本的WebRTC中NetEQ程式碼介面的定義在src/modules/audio_coding/neteq/include/neteq.h標頭檔案中,而具體的實現是在src/modules/audio_coding/neteq/neteq_impl.cc檔案中,neteq.h檔案中定義了相當多的介面,但其實最重要的介面就只有兩個,第一個是向NetEQ模組中插入解析過後的從網路中來到的RTP資料包,二是從NetEQ模組中取解碼過後的pcm音訊資料。所以其實我們可以很簡單的將NetEQ看成是一個黑盒,我們往裡面扔網路中接收到的RTP資料包,它會給我們吐出解碼或者經過其它處理過後的pcm音訊資料,然後我們拿去播放,至於中間的一些過程,例如抖動消除,解碼,丟包隱藏,語音的拉伸和壓縮以及它們之間如何配合,都是我們可以不用去關心的,其實也就是NetEQ真正的價值所在。

      neteq.h的中兩個重要函式分別是InsertPacket()和GetAudio(),在neteq_impl.cc中的具體實現InsertPacketInternal()和GetAudioInternal(),與前面的塞包和取包的描述一一對應。下面主要介紹這兩個函式的主要流程,這兩個函式的流程也就體現了NetEQ整個處理語音包的流程。

int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
                                    rtc::ArrayView<const uint8_t> payload,
                                    uint32_t receive_timestamp) {
  if (payload.empty()) {
    RTC_LOG_F(LS_ERROR) << "payload is empty";
    return kInvalidPointer;
  }
  PacketList packet_list;
  // Insert packet in a packet list.
  packet_list.push_back([&rtp_header, &payload] {
    // Convert to Packet.
    Packet packet;
    packet.payload_type = rtp_header.payloadType;
    packet.sequence_number = rtp_header.sequenceNumber;
    packet.timestamp = rtp_header.timestamp;

    packet.payload.SetData(payload.data(), payload.size());
    // Waiting time will be set upon inserting the packet in the buffer.
    RTC_DCHECK(!packet.waiting_time);
    return packet;
  }());

  bool update_sample_rate_and_channels =
      first_packet_ || (rtp_header.ssrc != ssrc_);
  
  if (update_sample_rate_and_channels) {
    // Reset timestamp scaling.
    timestamp_scaler_->Reset();
  }
//  RTC_LOG(LS_WARNING) << "external timestamp = " << rtp_header.timestamp;
  if (!decoder_database_->IsRed(rtp_header.payloadType)) {
    // Scale timestamp to internal domain (only for some codecs).
    timestamp_scaler_->ToInternal(&packet_list);
//    RTC_LOG(LS_WARNING) << "internal timestamp = " << packet_list.front().timestamp;
  }

  // Store these for later use, since the first packet may very well disappear
  // before we need these values.
  uint32_t main_timestamp = packet_list.front().timestamp;
  uint8_t main_payload_type = packet_list.front().payload_type;
  uint16_t main_sequence_number = packet_list.front().sequence_number;

  // Reinitialize NetEq if it's needed (changed SSRC or first call).
  if (update_sample_rate_and_channels) {
    // Note: |first_packet_| will be cleared further down in this method, once
    // the packet has been successfully inserted into the packet buffer.

    rtcp_.Init(rtp_header.sequenceNumber);

    // Flush the packet buffer and DTMF buffer.
    packet_buffer_->Flush();
    dtmf_buffer_->Flush();

    // Store new SSRC.
    ssrc_ = rtp_header.ssrc;

    // Update audio buffer timestamp.
    sync_buffer_->IncreaseEndTimestamp(main_timestamp - timestamp_);

    // Update codecs.
    timestamp_ = main_timestamp;
  }

  // Update RTCP statistics, only for regular packets.
  rtcp_.Update(rtp_header, receive_timestamp);

  if (nack_enabled_) {
    RTC_DCHECK(nack_);
    if (update_sample_rate_and_channels) {
      nack_->Reset();
    }
    nack_->UpdateLastReceivedPacket(rtp_header.sequenceNumber,
                                    rtp_header.timestamp);
  }

  // Check for RED payload type, and separate payloads into several packets.
  if (decoder_database_->IsRed(rtp_header.payloadType)) {
    if (!red_payload_splitter_->SplitRed(&packet_list)) {
      return kRedundancySplitError;
    }
    // Only accept a few RED payloads of the same type as the main data,
    // DTMF events and CNG.
    red_payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_);
  }

  // Check payload types.
  if (decoder_database_->CheckPayloadTypes(packet_list) ==
      DecoderDatabase::kDecoderNotFound) {
    return kUnknownRtpPayloadType;
  }

  RTC_DCHECK(!packet_list.empty());

  // Update main_timestamp, if new packets appear in the list
  // after RED splitting.
  if (decoder_database_->IsRed(rtp_header.payloadType)) {
    timestamp_scaler_->ToInternal(&packet_list);
    main_timestamp = packet_list.front().timestamp;
    main_payload_type = packet_list.front().payload_type;
    main_sequence_number = packet_list.front().sequence_number;
  }

  // Process DTMF payloads. Cycle through the list of packets, and pick out any
  // DTMF payloads found.
  PacketList::iterator it = packet_list.begin();
  while (it != packet_list.end()) {
    const Packet& current_packet = (*it);
    RTC_DCHECK(!current_packet.payload.empty());
    if (decoder_database_->IsDtmf(current_packet.payload_type)) {
      DtmfEvent event;
      int ret = DtmfBuffer::ParseEvent(current_packet.timestamp,
                                       current_packet.payload.data(),
                                       current_packet.payload.size(), &event);
      if (ret != DtmfBuffer::kOK) {
        return kDtmfParsingError;
      }
      if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) {
        return kDtmfInsertError;
      }
      it = packet_list.erase(it);
    } else {
      ++it;
    }
  }

  // Update bandwidth estimate, if the packet is not comfort noise.
  if (!packet_list.empty() &&
      !decoder_database_->IsComfortNoise(main_payload_type)) {
    // The list can be empty here if we got nothing but DTMF payloads.
    AudioDecoder* decoder = decoder_database_->GetDecoder(main_payload_type);
    RTC_DCHECK(decoder);  // Should always get a valid object, since we have
                          // already checked that the payload types are known.
    decoder->IncomingPacket(packet_list.front().payload.data(),
                            packet_list.front().payload.size(),
                            packet_list.front().sequence_number,
                            packet_list.front().timestamp, receive_timestamp);
  }

  PacketList parsed_packet_list;
  while (!packet_list.empty()) {
    Packet& packet = packet_list.front();
    const DecoderDatabase::DecoderInfo* info =
        decoder_database_->GetDecoderInfo(packet.payload_type);
    if (!info) {
      RTC_LOG(LS_WARNING) << "SplitAudio unknown payload type";
      return kUnknownRtpPayloadType;
    }

    if (info->IsComfortNoise()) {
      // Carry comfort noise packets along.
      parsed_packet_list.splice(parsed_packet_list.end(), packet_list,
                                packet_list.begin());
    } else {
      const auto sequence_number = packet.sequence_number;
      const auto payload_type = packet.payload_type;
      const Packet::Priority original_priority = packet.priority;
      auto packet_from_result = [&](AudioDecoder::ParseResult& result) {
        Packet new_packet;
        new_packet.sequence_number = sequence_number;
        new_packet.payload_type = payload_type;
        new_packet.timestamp = result.timestamp;
        new_packet.priority.codec_level = result.priority;
        new_packet.priority.red_level = original_priority.red_level;
        new_packet.frame = std::move(result.frame);
        return new_packet;
      };

      std::vector<AudioDecoder::ParseResult> results =
          info->GetDecoder()->ParsePayload(std::move(packet.payload),
                                           packet.timestamp);
      if (results.empty()) {
        packet_list.pop_front();
      } else {
        bool first = true;
        for (auto& result : results) {
          RTC_DCHECK(result.frame);
          RTC_DCHECK_GE(result.priority, 0);
          if (first) {
            // Re-use the node and move it to parsed_packet_list.
            packet_list.front() = packet_from_result(result);
            parsed_packet_list.splice(parsed_packet_list.end(), packet_list,
                                      packet_list.begin());
            first = false;
          } else {
            parsed_packet_list.push_back(packet_from_result(result));
          }
        }
      }
    }
  }

  // Calculate the number of primary (non-FEC/RED) packets.
  const int number_of_primary_packets = std::count_if(
      parsed_packet_list.begin(), parsed_packet_list.end(),
      [](const Packet& in) { return in.priority.codec_level == 0; });

  // Insert packets in buffer.
  const int ret = packet_buffer_->InsertPacketList(
      &parsed_packet_list, *decoder_database_, &current_rtp_payload_type_,
      &current_cng_rtp_payload_type_, &stats_);
  if (ret == PacketBuffer::kFlushed) {
    // Reset DSP timestamp etc. if packet buffer flushed.
    new_codec_ = true;
    update_sample_rate_and_channels = true;
  } else if (ret != PacketBuffer::kOK) {
    return kOtherError;
  }

  if (first_packet_) {
    first_packet_ = false;
    // Update the codec on the next GetAudio call.
    new_codec_ = true;
  }

  if (current_rtp_payload_type_) {
    RTC_DCHECK(decoder_database_->GetDecoderInfo(*current_rtp_payload_type_))
        << "Payload type " << static_cast<int>(*current_rtp_payload_type_)
        << " is unknown where it shouldn't be";
  }

  if (update_sample_rate_and_channels && !packet_buffer_->Empty()) {
    // We do not use |current_rtp_payload_type_| to |set payload_type|, but
    // get the next RTP header from |packet_buffer_| to obtain the payload type.
    // The reason for it is the following corner case. If NetEq receives a
    // CNG packet with a sample rate different than the current CNG then it
    // flushes its buffer, assuming send codec must have been changed. However,
    // payload type of the hypothetically new send codec is not known.
    const Packet* next_packet = packet_buffer_->PeekNextPacket();
    RTC_DCHECK(next_packet);
    const int payload_type = next_packet->payload_type;
    size_t channels = 1;
    if (!decoder_database_->IsComfortNoise(payload_type)) {
      AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type);
      assert(decoder);  // Payloads are already checked to be valid.
      channels = decoder->Channels();
    }
    const DecoderDatabase::DecoderInfo* decoder_info =
        decoder_database_->GetDecoderInfo(payload_type);
    assert(decoder_info);
    if (decoder_info->SampleRateHz() != fs_hz_ ||
        channels != algorithm_buffer_->Channels()) {
      SetSampleRateAndChannels(decoder_info->SampleRateHz(), channels);
    }
    if (nack_enabled_) {
      RTC_DCHECK(nack_);
      // Update the sample rate even if the rate is not new, because of Reset().
      nack_->UpdateSampleRate(fs_hz_);
    }
  }

  // TODO(hlundin): Move this code to DelayManager class.
  const DecoderDatabase::DecoderInfo* dec_info =
      decoder_database_->GetDecoderInfo(main_payload_type);
  assert(dec_info);  // Already checked that the payload type is known.
  delay_manager_->LastDecodedWasCngOrDtmf(dec_info->IsComfortNoise() ||
                                          dec_info->IsDtmf());
  if (delay_manager_->last_pack_cng_or_dtmf() == 0) {
    // Calculate the total speech length carried in each packet.
    if (number_of_primary_packets > 0) {
      const size_t packet_length_samples =
          number_of_primary_packets * decoder_frame_length_;
      if (packet_length_samples != decision_logic_->packet_length_samples()) {
        decision_logic_->set_packet_length_samples(packet_length_samples);
        delay_manager_->SetPacketAudioLength(
            rtc::dchecked_cast<int>((1000 * packet_length_samples) / fs_hz_));
      }
    }

    // Update statistics.
    if ((int32_t)(main_timestamp - timestamp_) >= 0 && !new_codec_) {
      // Only update statistics if incoming packet is not older than last played
      // out packet, and if new codec flag is not set.
      delay_manager_->Update(main_sequence_number, main_timestamp, fs_hz_);
    }
  } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) {
    // This is first "normal" packet after CNG or DTMF.
    // Reset packet time counter and measure time until next packet,
    // but don't update statistics.
    delay_manager_->set_last_pack_cng_or_dtmf(0);
    delay_manager_->ResetPacketIatCount();
  }
  return 0;
}