@@ -450,6 +450,9 @@ pub struct NatEngine {
450450 tcp_tw : HashMap < ( u32 , u16 , u16 ) , Instant > , // TIME_WAIT: absorb final ACKs silently
451451 icmp_nat : HashMap < ( u32 , u16 ) , NatIcmpEntry > , // key: (dst_ip, identifier)
452452 icmp_unavailable : bool , // true after first failed raw socket creation (Windows non-admin)
453+ // Replies generated while draining TX frames are deferred to the next loop iteration
454+ // so they don't race with the TX completion interrupt in IRIX's interrupt handler.
455+ deferred_rx : Vec < Vec < u8 > > ,
453456}
454457
455458impl NatEngine {
@@ -462,7 +465,7 @@ impl NatEngine {
462465 ctl : Arc < NatControl > ) -> Self {
463466 Self { config, tx_cons, rx_prod, rx_wake, tx_wake, running, ctl,
464467 udp_nat : HashMap :: new ( ) , tcp_nat : HashMap :: new ( ) , tcp_tw : HashMap :: new ( ) ,
465- icmp_nat : HashMap :: new ( ) , icmp_unavailable : false }
468+ icmp_nat : HashMap :: new ( ) , icmp_unavailable : false , deferred_rx : Vec :: new ( ) }
466469 }
467470
468471 pub fn run ( & mut self ) {
@@ -472,7 +475,7 @@ impl NatEngine {
472475 {
473476 let ( lock, cvar) = & * self . tx_wake ;
474477 let mut guard = lock. lock ( ) ;
475- let _ = cvar. wait_for ( & mut guard, Duration :: from_millis ( 10 ) ) ;
478+ let _ = cvar. wait_for ( & mut guard, Duration :: from_millis ( 1 ) ) ;
476479 }
477480
478481 // Machine reset: flush all NAT tables, close all host sockets.
@@ -483,6 +486,26 @@ impl NatEngine {
483486 self . icmp_nat . clear ( ) ; // drops all ICMP raw sockets
484487 }
485488
489+ // FIXME: investigate interrupt race between TX completion and RX delivery.
490+ // When a gateway reply (e.g. ICMP echo) is generated synchronously while
491+ // draining TX frames, it can arrive at IRIX while the TX completion interrupt
492+ // handler is still running. IRIX writes CLRINT which clears *all* pending
493+ // interrupts, silently dropping the RX interrupt. Deferring to the next loop
494+ // iteration (after the tx_wake wait) gives IRIX time to exit the TX handler
495+ // before we signal RX. This masks the symptom but the root cause — whether
496+ // IRIX's driver should re-check for new RX after CLRINT, or whether we should
497+ // hold the RX interrupt line asserted until explicitly cleared — is unclear.
498+ // Flush replies deferred from the previous iteration; stop if ring is full.
499+ let pending = std:: mem:: take ( & mut self . deferred_rx ) ;
500+ for frame in pending {
501+ if self . rx_prod . slots ( ) == 0 {
502+ self . deferred_rx . push ( frame) ;
503+ } else {
504+ let _ = self . rx_prod . push ( frame) ;
505+ self . rx_wake . 1 . notify_one ( ) ;
506+ }
507+ }
508+
486509 // Drain all pending outbound frames
487510 while let Ok ( frame) = self . tx_cons . pop ( ) {
488511 self . process ( & frame) ;
@@ -620,7 +643,7 @@ impl NatEngine {
620643 let c = ip_checksum ( & icmp) ; w16 ( & mut icmp, 2 , c) ;
621644 let frame = ip_frame ( src_mac, & self . config . gateway_mac ,
622645 self . config . gateway_ip , src_ip, IP_PROTO_ICMP , & icmp) ;
623- self . enqueue_rx ( frame) ;
646+ self . deferred_rx . push ( frame) ;
624647 return ;
625648 }
626649
0 commit comments