queueing: remove per-peer queues

Previously, having many peers would result in many napi structs, which
could make lookups in the napi_hash in net/core/dev.c slow. So, we move
to using a single napi struct per device.

The best solution would be to replace napi_hash with an idr or just get
rid of it all together and use straight pointers. However, that isn't the
case currently, so we work with what is and begrudgingly remove per-peer
queues. On the upside, it means we reduce the per-peer memory usage by
about 8k/16k, but on the downside it means that napi_gro_receive is
called on a unified list, which might result in less GRO speedups on
systems with many peers active at once.

However, if napi_hash does ever go away, we should consider reverting
this commit.

Since this means moving to unified packet queues, flushing at peer
removal is something of a problem. So we make the slightly dubious
modification of just not flushing, and letting our reference counters do
the work. This in turn required some small changes to ensure that the
reference counter will, at some point in the future, still reach zero,
and not be kept alive by non-stop packet ingress.

Co-developed-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
This commit is contained in:
Jason A. Donenfeld 2018-07-18 17:26:03 +02:00
parent e863f40616
commit 646df74bfa
7 changed files with 49 additions and 39 deletions

View File

@ -227,9 +227,13 @@ static void destruct(struct net_device *dev)
destroy_workqueue(wg->handshake_receive_wq);
destroy_workqueue(wg->handshake_send_wq);
destroy_workqueue(wg->packet_crypt_wq);
napi_disable(&wg->napi);
packet_queue_free(&wg->rx_queue, false);
packet_queue_free(&wg->tx_queue, false);
packet_queue_free(&wg->decrypt_queue, true);
packet_queue_free(&wg->encrypt_queue, true);
rcu_barrier_bh(); /* Wait for all the peers to be actually freed. */
netif_napi_del(&wg->napi);
ratelimiter_uninit();
memzero_explicit(&wg->static_identity, sizeof(struct noise_static_identity));
skb_queue_purge(&wg->incoming_handshakes);
@ -322,13 +326,21 @@ static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *t
if (packet_queue_init(&wg->decrypt_queue, packet_decrypt_worker, true, MAX_QUEUED_PACKETS) < 0)
goto error_7;
ret = ratelimiter_init();
if (ret < 0)
if (packet_queue_init(&wg->tx_queue, packet_tx_worker, false, MAX_QUEUED_PACKETS) < 0)
goto error_8;
if (packet_queue_init(&wg->rx_queue, NULL, false, MAX_QUEUED_PACKETS) < 0)
goto error_9;
ret = ratelimiter_init();
if (ret < 0)
goto error_10;
netif_napi_add(dev, &wg->napi, packet_rx_poll, NAPI_POLL_WEIGHT);
napi_enable(&wg->napi);
ret = register_netdevice(dev);
if (ret < 0)
goto error_9;
goto error_11;
list_add(&wg->device_list, &device_list);
@ -340,8 +352,13 @@ static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *t
pr_debug("%s: Interface created\n", dev->name);
return ret;
error_9:
error_11:
netif_napi_del(&wg->napi);
ratelimiter_uninit();
error_10:
packet_queue_free(&wg->rx_queue, false);
error_9:
packet_queue_free(&wg->tx_queue, false);
error_8:
packet_queue_free(&wg->decrypt_queue, true);
error_7:

View File

@ -38,7 +38,8 @@ struct crypt_queue {
struct wireguard_device {
struct net_device *dev;
struct crypt_queue encrypt_queue, decrypt_queue;
struct crypt_queue encrypt_queue, decrypt_queue, tx_queue, rx_queue;
struct napi_struct napi;
struct sock __rcu *sock4, *sock6;
struct net *creating_net;
struct noise_static_identity static_identity;

View File

@ -50,14 +50,10 @@ struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_
INIT_WORK(&peer->transmit_handshake_work, packet_handshake_send_worker);
rwlock_init(&peer->endpoint_lock);
kref_init(&peer->refcount);
packet_queue_init(&peer->tx_queue, packet_tx_worker, false, MAX_QUEUED_PACKETS);
packet_queue_init(&peer->rx_queue, NULL, false, MAX_QUEUED_PACKETS);
skb_queue_head_init(&peer->staged_packet_queue);
list_add_tail(&peer->peer_list, &wg->peer_list);
pubkey_hashtable_add(&wg->peer_hashtable, peer);
peer->last_sent_handshake = ktime_get_boot_fast_ns() - (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC;
netif_napi_add(wg->dev, &peer->napi, packet_rx_poll, NAPI_POLL_WEIGHT);
napi_enable(&peer->napi);
pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
return peer;
}
@ -94,11 +90,6 @@ void peer_remove(struct wireguard_peer *peer)
noise_keypairs_clear(&peer->keypairs);
list_del_init(&peer->peer_list);
timers_stop(peer);
flush_workqueue(peer->device->packet_crypt_wq); /* The first flush is for encrypt/decrypt. */
flush_workqueue(peer->device->packet_crypt_wq); /* The second.1 flush is for send (but not receive, since that's napi). */
napi_disable(&peer->napi); /* The second.2 flush is for receive (but not send, since that's wq). */
flush_workqueue(peer->device->handshake_send_wq);
netif_napi_del(&peer->napi);
--peer->device->num_peers;
peer_put(peer);
}
@ -109,8 +100,6 @@ static void rcu_release(struct rcu_head *rcu)
pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n", peer->device->dev->name, peer->internal_id, &peer->endpoint.addr);
dst_cache_destroy(&peer->endpoint_cache);
packet_queue_free(&peer->rx_queue, false);
packet_queue_free(&peer->tx_queue, false);
kzfree(peer);
}

View File

@ -35,7 +35,6 @@ struct endpoint {
struct wireguard_peer {
struct wireguard_device *device;
struct crypt_queue tx_queue, rx_queue;
struct sk_buff_head staged_packet_queue;
int serial_work_cpu;
struct noise_keypairs keypairs;
@ -57,7 +56,6 @@ struct wireguard_peer {
struct rcu_head rcu;
struct list_head peer_list;
u64 internal_id;
struct napi_struct napi;
};
struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_key[NOISE_PUBLIC_KEY_LEN], const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]);

View File

@ -139,12 +139,12 @@ static inline void queue_enqueue_per_peer(struct crypt_queue *queue, struct sk_b
peer_put(peer);
}
static inline void queue_enqueue_per_peer_napi(struct crypt_queue *queue, struct sk_buff *skb, enum packet_state state)
static inline void queue_enqueue_per_device_napi(struct crypt_queue *queue, struct sk_buff *skb, enum packet_state state)
{
struct wireguard_peer *peer = peer_rcu_get(PACKET_PEER(skb));
atomic_set(&PACKET_CB(skb)->state, state);
napi_schedule(&peer->napi);
napi_schedule(&peer->device->napi);
peer_put(peer);
}

View File

@ -342,7 +342,7 @@ static void packet_consume_data_done(struct sk_buff *skb, struct endpoint *endpo
if (unlikely(routed_peer != peer))
goto dishonest_packet_peer;
if (unlikely(napi_gro_receive(&peer->napi, skb) == NET_RX_DROP)) {
if (unlikely(napi_gro_receive(&peer->device->napi, skb) == NET_RX_DROP)) {
++dev->stats.rx_dropped;
net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n", dev->name, peer->internal_id, &peer->endpoint.addr);
} else
@ -370,9 +370,10 @@ packet_processed:
int packet_rx_poll(struct napi_struct *napi, int budget)
{
struct wireguard_peer *peer = container_of(napi, struct wireguard_peer, napi);
struct crypt_queue *queue = &peer->rx_queue;
struct wireguard_device *wg = container_of(napi, struct wireguard_device, napi);
struct crypt_queue *queue = &wg->rx_queue;
struct noise_keypair *keypair;
struct wireguard_peer *peer;
struct sk_buff *skb;
struct endpoint endpoint;
enum packet_state state;
@ -392,7 +393,7 @@ int packet_rx_poll(struct napi_struct *napi, int budget)
goto next;
if (unlikely(!counter_validate(&keypair->receiving.counter, PACKET_CB(skb)->nonce))) {
net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", peer->device->dev->name, PACKET_CB(skb)->nonce, keypair->receiving.counter.receive.counter);
net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", wg->dev->name, PACKET_CB(skb)->nonce, keypair->receiving.counter.receive.counter);
goto next;
}
@ -427,7 +428,7 @@ void packet_decrypt_worker(struct work_struct *work)
while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
enum packet_state state = likely(skb_decrypt(skb, &PACKET_CB(skb)->keypair->receiving, have_simd)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
queue_enqueue_per_peer_napi(&PACKET_PEER(skb)->rx_queue, skb, state);
queue_enqueue_per_device_napi(&PACKET_PEER(skb)->device->rx_queue, skb, state);
have_simd = simd_relax(have_simd);
}
@ -443,25 +444,29 @@ static void packet_consume_data(struct wireguard_device *wg, struct sk_buff *skb
rcu_read_lock_bh();
PACKET_CB(skb)->keypair = noise_keypair_get((struct noise_keypair *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx));
rcu_read_unlock_bh();
if (unlikely(!PACKET_CB(skb)->keypair)) {
dev_kfree_skb(skb);
return;
}
if (unlikely(!PACKET_CB(skb)->keypair))
goto err_keypair;
/* The call to index_hashtable_lookup gives us a reference to its underlying peer, so we don't need to call peer_rcu_get(). */
peer = PACKET_PEER(skb);
/* If elsewhere has called peer_remove, we should not queue up more packets, so that eventually the reference count goes to zero. */
if (unlikely(list_empty(&peer->peer_list)))
goto err_peer;
ret = queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb, wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu);
ret = queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &wg->rx_queue, skb, wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu);
if (likely(!ret))
return; /* Successful. No need to drop references below. */
if (ret == -EPIPE)
queue_enqueue_per_peer(&peer->rx_queue, skb, PACKET_STATE_DEAD);
else {
peer_put(peer);
noise_keypair_put(PACKET_CB(skb)->keypair);
dev_kfree_skb(skb);
if (ret == -EPIPE) {
queue_enqueue_per_peer(&wg->rx_queue, skb, PACKET_STATE_DEAD);
return;
}
err_peer:
peer_put(peer);
noise_keypair_put(PACKET_CB(skb)->keypair);
err_keypair:
dev_kfree_skb(skb);
}
void packet_receive(struct wireguard_device *wg, struct sk_buff *skb)

View File

@ -255,7 +255,7 @@ void packet_encrypt_worker(struct work_struct *work)
break;
}
}
queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, state);
queue_enqueue_per_peer(&PACKET_PEER(first)->device->tx_queue, first, state);
have_simd = simd_relax(have_simd);
}
@ -268,12 +268,12 @@ static void packet_create_data(struct sk_buff *first)
struct wireguard_device *wg = peer->device;
int ret;
ret = queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first, wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu);
ret = queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &wg->tx_queue, first, wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu);
if (likely(!ret))
return; /* Successful. No need to fall through to drop references below. */
if (ret == -EPIPE)
queue_enqueue_per_peer(&peer->tx_queue, first, PACKET_STATE_DEAD);
queue_enqueue_per_peer(&wg->tx_queue, first, PACKET_STATE_DEAD);
else {
peer_put(peer);
noise_keypair_put(PACKET_CB(first)->keypair);