webrtc_m130/net/dcsctp/socket/stream_reset_handler.cc

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

353 lines
14 KiB
C++
Raw Normal View History

/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "net/dcsctp/socket/stream_reset_handler.h"
#include <cstdint>
#include <memory>
#include <utility>
#include <vector>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "net/dcsctp/common/internal_types.h"
#include "net/dcsctp/common/str_join.h"
#include "net/dcsctp/packet/chunk/reconfig_chunk.h"
#include "net/dcsctp/packet/parameter/add_incoming_streams_request_parameter.h"
#include "net/dcsctp/packet/parameter/add_outgoing_streams_request_parameter.h"
#include "net/dcsctp/packet/parameter/incoming_ssn_reset_request_parameter.h"
#include "net/dcsctp/packet/parameter/outgoing_ssn_reset_request_parameter.h"
#include "net/dcsctp/packet/parameter/parameter.h"
#include "net/dcsctp/packet/parameter/reconfiguration_response_parameter.h"
#include "net/dcsctp/packet/parameter/ssn_tsn_reset_request_parameter.h"
#include "net/dcsctp/packet/sctp_packet.h"
#include "net/dcsctp/packet/tlv_trait.h"
#include "net/dcsctp/public/dcsctp_socket.h"
#include "net/dcsctp/rx/data_tracker.h"
#include "net/dcsctp/rx/reassembly_queue.h"
#include "net/dcsctp/socket/context.h"
#include "net/dcsctp/timer/timer.h"
#include "net/dcsctp/tx/retransmission_queue.h"
#include "rtc_base/logging.h"
namespace dcsctp {
namespace {
using ResponseResult = ReconfigurationResponseParameter::Result;
bool DescriptorsAre(const std::vector<ParameterDescriptor>& c,
uint16_t e1,
uint16_t e2) {
return (c[0].type == e1 && c[1].type == e2) ||
(c[0].type == e2 && c[1].type == e1);
}
} // namespace
bool StreamResetHandler::Validate(const ReConfigChunk& chunk) {
const Parameters& parameters = chunk.parameters();
// https://tools.ietf.org/html/rfc6525#section-3.1
// "Note that each RE-CONFIG chunk holds at least one parameter
// and at most two parameters. Only the following combinations are allowed:"
std::vector<ParameterDescriptor> descriptors = parameters.descriptors();
if (descriptors.size() == 1) {
if ((descriptors[0].type == OutgoingSSNResetRequestParameter::kType) ||
(descriptors[0].type == IncomingSSNResetRequestParameter::kType) ||
(descriptors[0].type == SSNTSNResetRequestParameter::kType) ||
(descriptors[0].type == AddOutgoingStreamsRequestParameter::kType) ||
(descriptors[0].type == AddIncomingStreamsRequestParameter::kType) ||
(descriptors[0].type == ReconfigurationResponseParameter::kType)) {
return true;
}
} else if (descriptors.size() == 2) {
if (DescriptorsAre(descriptors, OutgoingSSNResetRequestParameter::kType,
IncomingSSNResetRequestParameter::kType) ||
DescriptorsAre(descriptors, AddOutgoingStreamsRequestParameter::kType,
AddIncomingStreamsRequestParameter::kType) ||
DescriptorsAre(descriptors, ReconfigurationResponseParameter::kType,
OutgoingSSNResetRequestParameter::kType) ||
DescriptorsAre(descriptors, ReconfigurationResponseParameter::kType,
ReconfigurationResponseParameter::kType)) {
return true;
}
}
RTC_LOG(LS_WARNING) << "Invalid set of RE-CONFIG parameters";
return false;
}
absl::optional<std::vector<ReconfigurationResponseParameter>>
StreamResetHandler::Process(const ReConfigChunk& chunk) {
if (!Validate(chunk)) {
return absl::nullopt;
}
std::vector<ReconfigurationResponseParameter> responses;
for (const ParameterDescriptor& desc : chunk.parameters().descriptors()) {
switch (desc.type) {
case OutgoingSSNResetRequestParameter::kType:
HandleResetOutgoing(desc, responses);
break;
case IncomingSSNResetRequestParameter::kType:
HandleResetIncoming(desc, responses);
break;
case ReconfigurationResponseParameter::kType:
HandleResponse(desc);
break;
}
}
return responses;
}
void StreamResetHandler::HandleReConfig(ReConfigChunk chunk) {
absl::optional<std::vector<ReconfigurationResponseParameter>> responses =
Process(chunk);
if (!responses.has_value()) {
ctx_->callbacks().OnError(ErrorKind::kParseFailed,
"Failed to parse RE-CONFIG command");
return;
}
if (!responses->empty()) {
SctpPacket::Builder b = ctx_->PacketBuilder();
Parameters::Builder params_builder;
for (const auto& response : *responses) {
params_builder.Add(response);
}
b.Add(ReConfigChunk(params_builder.Build()));
ctx_->Send(b);
}
}
bool StreamResetHandler::ValidateReqSeqNbr(
ReconfigRequestSN req_seq_nbr,
std::vector<ReconfigurationResponseParameter>& responses) {
if (req_seq_nbr == last_processed_req_seq_nbr_) {
// https://www.rfc-editor.org/rfc/rfc6525.html#section-5.2.1 "If the
// received RE-CONFIG chunk contains at least one request and based on the
// analysis of the Re-configuration Request Sequence Numbers this is the
// last received RE-CONFIG chunk (i.e., a retransmission), the same
// RE-CONFIG chunk MUST to be sent back in response, as it was earlier."
RTC_DLOG(LS_VERBOSE) << log_prefix_ << "req=" << *req_seq_nbr
<< " already processed, returning result="
<< ToString(last_processed_req_result_);
responses.push_back(ReconfigurationResponseParameter(
req_seq_nbr, last_processed_req_result_));
return false;
}
if (req_seq_nbr != ReconfigRequestSN(*last_processed_req_seq_nbr_ + 1)) {
// Too old, too new, from wrong association etc.
// This is expected to happen when handing over a RTCPeerConnection from one
// server to another. The client will notice this and may decide to close
// old data channels, which may be sent to the wrong (or both) servers
// during a handover.
RTC_DLOG(LS_VERBOSE) << log_prefix_ << "req=" << *req_seq_nbr
<< " bad seq_nbr";
responses.push_back(ReconfigurationResponseParameter(
req_seq_nbr, ResponseResult::kErrorBadSequenceNumber));
return false;
}
return true;
}
void StreamResetHandler::HandleResetOutgoing(
const ParameterDescriptor& descriptor,
std::vector<ReconfigurationResponseParameter>& responses) {
absl::optional<OutgoingSSNResetRequestParameter> req =
OutgoingSSNResetRequestParameter::Parse(descriptor.data);
if (!req.has_value()) {
ctx_->callbacks().OnError(ErrorKind::kParseFailed,
"Failed to parse Outgoing Reset command");
return;
}
if (ValidateReqSeqNbr(req->request_sequence_number(), responses)) {
RTC_DLOG(LS_VERBOSE) << log_prefix_
<< "Reset outgoing streams with req_seq_nbr="
<< *req->request_sequence_number();
last_processed_req_seq_nbr_ = req->request_sequence_number();
last_processed_req_result_ = reassembly_queue_->ResetStreams(
*req, data_tracker_->last_cumulative_acked_tsn());
if (last_processed_req_result_ == ResponseResult::kSuccessPerformed) {
ctx_->callbacks().OnIncomingStreamsReset(req->stream_ids());
}
responses.push_back(ReconfigurationResponseParameter(
req->request_sequence_number(), last_processed_req_result_));
}
}
void StreamResetHandler::HandleResetIncoming(
const ParameterDescriptor& descriptor,
std::vector<ReconfigurationResponseParameter>& responses) {
absl::optional<IncomingSSNResetRequestParameter> req =
IncomingSSNResetRequestParameter::Parse(descriptor.data);
if (!req.has_value()) {
ctx_->callbacks().OnError(ErrorKind::kParseFailed,
"Failed to parse Incoming Reset command");
return;
}
if (ValidateReqSeqNbr(req->request_sequence_number(), responses)) {
responses.push_back(ReconfigurationResponseParameter(
req->request_sequence_number(), ResponseResult::kSuccessNothingToDo));
last_processed_req_seq_nbr_ = req->request_sequence_number();
}
}
void StreamResetHandler::HandleResponse(const ParameterDescriptor& descriptor) {
absl::optional<ReconfigurationResponseParameter> resp =
ReconfigurationResponseParameter::Parse(descriptor.data);
if (!resp.has_value()) {
ctx_->callbacks().OnError(
ErrorKind::kParseFailed,
"Failed to parse Reconfiguration Response command");
return;
}
if (current_request_.has_value() && current_request_->has_been_sent() &&
resp->response_sequence_number() == current_request_->req_seq_nbr()) {
reconfig_timer_->Stop();
switch (resp->result()) {
case ResponseResult::kSuccessNothingToDo:
case ResponseResult::kSuccessPerformed:
RTC_DLOG(LS_VERBOSE)
<< log_prefix_ << "Reset stream success, req_seq_nbr="
<< *current_request_->req_seq_nbr() << ", streams="
<< StrJoin(current_request_->streams(), ",",
[](rtc::StringBuilder& sb, StreamID stream_id) {
sb << *stream_id;
});
ctx_->callbacks().OnStreamsResetPerformed(current_request_->streams());
current_request_ = absl::nullopt;
retransmission_queue_->CommitResetStreams();
break;
case ResponseResult::kInProgress:
RTC_DLOG(LS_VERBOSE)
<< log_prefix_ << "Reset stream still pending, req_seq_nbr="
<< *current_request_->req_seq_nbr() << ", streams="
<< StrJoin(current_request_->streams(), ",",
[](rtc::StringBuilder& sb, StreamID stream_id) {
sb << *stream_id;
});
// Force this request to be sent again, but with new req_seq_nbr.
current_request_->PrepareRetransmission();
reconfig_timer_->set_duration(ctx_->current_rto());
reconfig_timer_->Start();
break;
case ResponseResult::kErrorRequestAlreadyInProgress:
case ResponseResult::kDenied:
case ResponseResult::kErrorWrongSSN:
case ResponseResult::kErrorBadSequenceNumber:
RTC_DLOG(LS_WARNING)
<< log_prefix_ << "Reset stream error=" << ToString(resp->result())
<< ", req_seq_nbr=" << *current_request_->req_seq_nbr()
<< ", streams="
<< StrJoin(current_request_->streams(), ",",
[](rtc::StringBuilder& sb, StreamID stream_id) {
sb << *stream_id;
});
ctx_->callbacks().OnStreamsResetFailed(current_request_->streams(),
ToString(resp->result()));
current_request_ = absl::nullopt;
retransmission_queue_->RollbackResetStreams();
break;
}
}
}
absl::optional<ReConfigChunk> StreamResetHandler::MakeStreamResetRequest() {
// Only send stream resets if there are streams to reset, and no current
// ongoing request (there can only be one at a time), and if the stream
// can be reset.
dcsctp: Handle rapid closing of streams When streams were to be reset, but there was already an ongoing stream reset command in-flight, those streams wouldn't be properly reset. When multiple streams were reset close to each other (within an RTT), some streams would not have their SSNs reset, which resulted in the stream resuming the SSN sequence. This could result in ordered streams not delivering all messages as the receiver wouldn't deliver any messages with SSN different from the expected SSN=0. In WebRTC data channels, this would be triggered if multiple channels were closed at roughly the same time, then re-opened, and continued to be used in ordered mode. Unordered messages would still be delivered, but the stream state could be wrong as the DATA_CHANNEL_ACK message is sent ordered, and possibly not delivered. There were unit tests for this, but not on the socket level using real components, but just on the stream reset handler using mocks, where this issue wasn't found. Also, those mocks didn't validate that the correct parameters were provided, so that's fixed now. The root cause was the PrepareResetStreams was only called if there wasn't an ongoing stream reset operation in progress. One may try to solve it by always calling PrepareResetStreams also when there is an ongoing request, or to call it when the request has finished. One would then realize that when the response of the outgoing stream request is received, and CommitResetStreams is called, it would reset all paused and (prepared) to-be-reset streams - not just the ones in the outgoing stream request. One cause of this was the lack of a single source of truth of the stream states. The SendQueue kept track of which streams that were paused, but the stream reset handler kept track of which streams that were resetting. As that's error prone, this CL moves the source of truth completely to the SendQueue and defining explicit stream pause states. A stream can be in one of these possible states: * Not paused. This is the default for an active stream. * Pending to be paused. This is when it's about to be reset, but there is a message that has been partly sent, with fragments remaining to be sent before it can be paused. * Paused, with no partly sent message. In this state, it's ready to be reset. * Resetting. A stream transitions into this state when it has been paused and has been included in an outgoing stream reset request. When this request has been responded to, the stream can really be reset (SSN=0, MID=0). This CL also improves logging, and adds socket tests to catch this issue. Bug: webrtc:13994, chromium:1320194 Change-Id: I883570d1f277bc01e52b1afad62d6be2aca930a2 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/261180 Reviewed-by: Harald Alvestrand <hta@webrtc.org> Commit-Queue: Victor Boivie <boivie@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36771}
2022-05-02 17:15:57 +02:00
if (current_request_.has_value() ||
!retransmission_queue_->HasStreamsReadyToBeReset()) {
return absl::nullopt;
}
dcsctp: Don't generate FORWARD-TSN across stream resets This was a fun bug which proved to be challenging to find a good solution for. The issue comes from the combination of partial reliability and stream resetting, which are covered in different RFCs, and where they don't refer to each other... Stream resetting (RFC 6525) is used in WebRTC for closing a Data Channel, and is done by signaling to the receiver that the stream sequence number (SSN) should be set to zero (0) at some time. Partial reliability (RFC 3758) - and expiring messages that will not be retransmitted - is done by signaling that the SSN should be set to a certain value at a certain TSN, as the messages up until the provided SSN are not to be expected to be sent again. As these two functionalities both work by signaling to the receiver what the next expected SSN should be, they need to do it correctly not to overwrite each others' intent. And here was the bug. An example scenario where this caused issues, where we are Z (the receiver), getting packets from the sender (A): 5 A->Z DATA (TSN=30, B, SID=2, SSN=0) 6 Z->A SACK (Ack=30) 7 A->Z DATA (TSN=31, E, SID=2, SSN=0) 8 A->Z RE_CONFIG (REQ=30, TSN=31, SID=2) 9 Z->A RE_CONFIG (RESP=30, Performed) 10 Z->A SACK (Ack=31) 11 A->Z DATA (TSN=32, SID=1) 12 A->Z FORWARD_TSN (TSN=32, SID=2, SSN=0) Let's assume that the path Z->A had packet loss and A never really received our responses (#6, #9, #10) in time. At #5, Z receives a DATA fragment, which it acks, and at #7 the end of that message. The stream is then reset (#8) which it signals that it was performed (#9) and acked (#10), and data on another stream (2) was received (#11). Since A hasn't received any ACKS yet, and those chunks on SID=2 all expired, A sends a FORWARD-TSN saying that "Skip to TSN=32, and don't expect SID=2, SSN=0". That makes the receiver expect the SSN on SID=2 to be SSN=1 next time at TSN=32. But that's not good at all - A reset the stream at #8 and will want to send the next message on SID=2 using SSN=0 - not 1. The FORWARD-TSN clearly can't have a TSN that is beyond the stream reset TSN for that stream. This is just one example - combining stream resetting and partial reliability, together with a lossy network, and different variants of this can occur, which results in the receiver possibly not delivering packets because it expects a different SSN than the one the sender is later using. So this CL adds "breakpoints" to how far a FORWARD-TSN can stretch. It will simply not cross any Stream Reset last assigned TSNs, and only when a receiver has acked that all TSNs up till the Stream Reset last assigned TSN has been received, it will proceed expiring chunks after that. Bug: webrtc:14600 Change-Id: Ibae8c9308f5dfe8d734377d42cce653e69e95731 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/321600 Commit-Queue: Victor Boivie <boivie@webrtc.org> Reviewed-by: Harald Alvestrand <hta@webrtc.org> Cr-Commit-Position: refs/heads/main@{#40829}
2023-09-25 09:53:22 +02:00
current_request_.emplace(retransmission_queue_->last_assigned_tsn(),
retransmission_queue_->BeginResetStreams());
reconfig_timer_->set_duration(ctx_->current_rto());
reconfig_timer_->Start();
return MakeReconfigChunk();
}
ReConfigChunk StreamResetHandler::MakeReconfigChunk() {
// The req_seq_nbr will be empty if the request has never been sent before,
// or if it was sent, but the sender responded "in progress", and then the
// req_seq_nbr will be cleared to re-send with a new number. But if the
// request is re-sent due to timeout (reconfig-timer expiring), the same
// req_seq_nbr will be used.
RTC_DCHECK(current_request_.has_value());
if (!current_request_->has_been_sent()) {
current_request_->PrepareToSend(next_outgoing_req_seq_nbr_);
next_outgoing_req_seq_nbr_ =
ReconfigRequestSN(*next_outgoing_req_seq_nbr_ + 1);
}
Parameters::Builder params_builder =
Parameters::Builder().Add(OutgoingSSNResetRequestParameter(
current_request_->req_seq_nbr(), current_request_->req_seq_nbr(),
current_request_->sender_last_assigned_tsn(),
current_request_->streams()));
return ReConfigChunk(params_builder.Build());
}
void StreamResetHandler::ResetStreams(
rtc::ArrayView<const StreamID> outgoing_streams) {
for (StreamID stream_id : outgoing_streams) {
dcsctp: Handle rapid closing of streams When streams were to be reset, but there was already an ongoing stream reset command in-flight, those streams wouldn't be properly reset. When multiple streams were reset close to each other (within an RTT), some streams would not have their SSNs reset, which resulted in the stream resuming the SSN sequence. This could result in ordered streams not delivering all messages as the receiver wouldn't deliver any messages with SSN different from the expected SSN=0. In WebRTC data channels, this would be triggered if multiple channels were closed at roughly the same time, then re-opened, and continued to be used in ordered mode. Unordered messages would still be delivered, but the stream state could be wrong as the DATA_CHANNEL_ACK message is sent ordered, and possibly not delivered. There were unit tests for this, but not on the socket level using real components, but just on the stream reset handler using mocks, where this issue wasn't found. Also, those mocks didn't validate that the correct parameters were provided, so that's fixed now. The root cause was the PrepareResetStreams was only called if there wasn't an ongoing stream reset operation in progress. One may try to solve it by always calling PrepareResetStreams also when there is an ongoing request, or to call it when the request has finished. One would then realize that when the response of the outgoing stream request is received, and CommitResetStreams is called, it would reset all paused and (prepared) to-be-reset streams - not just the ones in the outgoing stream request. One cause of this was the lack of a single source of truth of the stream states. The SendQueue kept track of which streams that were paused, but the stream reset handler kept track of which streams that were resetting. As that's error prone, this CL moves the source of truth completely to the SendQueue and defining explicit stream pause states. A stream can be in one of these possible states: * Not paused. This is the default for an active stream. * Pending to be paused. This is when it's about to be reset, but there is a message that has been partly sent, with fragments remaining to be sent before it can be paused. * Paused, with no partly sent message. In this state, it's ready to be reset. * Resetting. A stream transitions into this state when it has been paused and has been included in an outgoing stream reset request. When this request has been responded to, the stream can really be reset (SSN=0, MID=0). This CL also improves logging, and adds socket tests to catch this issue. Bug: webrtc:13994, chromium:1320194 Change-Id: I883570d1f277bc01e52b1afad62d6be2aca930a2 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/261180 Reviewed-by: Harald Alvestrand <hta@webrtc.org> Commit-Queue: Victor Boivie <boivie@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36771}
2022-05-02 17:15:57 +02:00
retransmission_queue_->PrepareResetStream(stream_id);
}
}
absl::optional<DurationMs> StreamResetHandler::OnReconfigTimerExpiry() {
if (current_request_->has_been_sent()) {
// There is an outstanding request, which timed out while waiting for a
// response.
if (!ctx_->IncrementTxErrorCounter("RECONFIG timeout")) {
// Timed out. The connection will close after processing the timers.
return absl::nullopt;
}
} else {
// There is no outstanding request, but there is a prepared one. This means
// that the receiver has previously responded "in progress", which resulted
// in retrying the request (but with a new req_seq_nbr) after a while.
}
ctx_->Send(ctx_->PacketBuilder().Add(MakeReconfigChunk()));
return ctx_->current_rto();
}
HandoverReadinessStatus StreamResetHandler::GetHandoverReadiness() const {
HandoverReadinessStatus status;
dcsctp: Handle rapid closing of streams When streams were to be reset, but there was already an ongoing stream reset command in-flight, those streams wouldn't be properly reset. When multiple streams were reset close to each other (within an RTT), some streams would not have their SSNs reset, which resulted in the stream resuming the SSN sequence. This could result in ordered streams not delivering all messages as the receiver wouldn't deliver any messages with SSN different from the expected SSN=0. In WebRTC data channels, this would be triggered if multiple channels were closed at roughly the same time, then re-opened, and continued to be used in ordered mode. Unordered messages would still be delivered, but the stream state could be wrong as the DATA_CHANNEL_ACK message is sent ordered, and possibly not delivered. There were unit tests for this, but not on the socket level using real components, but just on the stream reset handler using mocks, where this issue wasn't found. Also, those mocks didn't validate that the correct parameters were provided, so that's fixed now. The root cause was the PrepareResetStreams was only called if there wasn't an ongoing stream reset operation in progress. One may try to solve it by always calling PrepareResetStreams also when there is an ongoing request, or to call it when the request has finished. One would then realize that when the response of the outgoing stream request is received, and CommitResetStreams is called, it would reset all paused and (prepared) to-be-reset streams - not just the ones in the outgoing stream request. One cause of this was the lack of a single source of truth of the stream states. The SendQueue kept track of which streams that were paused, but the stream reset handler kept track of which streams that were resetting. As that's error prone, this CL moves the source of truth completely to the SendQueue and defining explicit stream pause states. A stream can be in one of these possible states: * Not paused. This is the default for an active stream. * Pending to be paused. This is when it's about to be reset, but there is a message that has been partly sent, with fragments remaining to be sent before it can be paused. * Paused, with no partly sent message. In this state, it's ready to be reset. * Resetting. A stream transitions into this state when it has been paused and has been included in an outgoing stream reset request. When this request has been responded to, the stream can really be reset (SSN=0, MID=0). This CL also improves logging, and adds socket tests to catch this issue. Bug: webrtc:13994, chromium:1320194 Change-Id: I883570d1f277bc01e52b1afad62d6be2aca930a2 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/261180 Reviewed-by: Harald Alvestrand <hta@webrtc.org> Commit-Queue: Victor Boivie <boivie@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36771}
2022-05-02 17:15:57 +02:00
if (retransmission_queue_->HasStreamsReadyToBeReset()) {
status.Add(HandoverUnreadinessReason::kPendingStreamReset);
}
if (current_request_.has_value()) {
status.Add(HandoverUnreadinessReason::kPendingStreamResetRequest);
}
return status;
}
void StreamResetHandler::AddHandoverState(DcSctpSocketHandoverState& state) {
state.rx.last_completed_reset_req_sn = last_processed_req_seq_nbr_.value();
state.tx.next_reset_req_sn = next_outgoing_req_seq_nbr_.value();
}
} // namespace dcsctp