|
1 | 1 | #lang racket/base |
2 | | -;; Stress tests that recompile data/gvector with GVECTOR_SLEEP=1 to |
3 | | -;; widen race windows, exercising the CAS retry paths. The gvector |
4 | | -;; module is loaded in a fresh namespace with compiled file loading |
5 | | -;; disabled, so the sleep-enabled version is never cached. |
| 2 | +;; Stress tests that recompile data/gvector with GVECTOR_SLEEP set to |
| 3 | +;; widen specific race windows. Each test activates only the sleep |
| 4 | +;; point relevant to the invariant it tests, so other operations |
| 5 | +;; remain fast. The gvector module is loaded via dynamic-require with |
| 6 | +;; compiled file loading disabled, so the sleep-enabled version is |
| 7 | +;; never cached. |
6 | 8 |
|
7 | | -(require rackunit) |
| 9 | +(require rackunit |
| 10 | + racket/unsafe/ops) |
8 | 11 |
|
9 | | -(define (dr sym) |
10 | | - ;; Set the env var so the compile-time switch in gvector.rkt enables sleeps, |
11 | | - ;; and disable loading compiled files so gvector.rkt is recompiled fresh. |
12 | | - (putenv "GVECTOR_SLEEP" "1") |
13 | | - (begin0 |
14 | | - (parameterize ([use-compiled-file-paths '()]) |
15 | | - (dynamic-require 'data/gvector sym)) |
16 | | - (putenv "GVECTOR_SLEEP" ""))) |
| 12 | +(define (make-gvector-ns tag) |
| 13 | + ;; Set GVECTOR_SLEEP to the tag so only the matching maybe-sleep |
| 14 | + ;; expands to (sleep 0.01). Load in a fresh namespace with compiled |
| 15 | + ;; files disabled. Returns a lookup function for that namespace. |
| 16 | + (putenv "GVECTOR_SLEEP" tag) |
| 17 | + (define ns (make-base-namespace)) |
| 18 | + (parameterize ([use-compiled-file-paths '()] |
| 19 | + [current-namespace ns]) |
| 20 | + (dynamic-require 'data/gvector #f)) |
| 21 | + (putenv "GVECTOR_SLEEP" "") |
| 22 | + (lambda (sym) |
| 23 | + (parameterize ([use-compiled-file-paths '()] |
| 24 | + [current-namespace ns]) |
| 25 | + (dynamic-require 'data/gvector sym)))) |
17 | 26 |
|
18 | | -(define gvector?* (dr 'gvector?)) |
19 | | -(define make-gvector* (dr 'make-gvector)) |
20 | | -(define gvector-add!* (dr 'gvector-add!)) |
21 | | -(define gvector-ref* (dr 'gvector-ref)) |
22 | | -(define gvector-set!* (dr 'gvector-set!)) |
23 | | -(define gvector-count* (dr 'gvector-count)) |
24 | | -(define gvector-remove-last!* (dr 'gvector-remove-last!)) |
| 27 | +(define ref-ns (make-gvector-ns "ref")) |
| 28 | +(define trim-ns (make-gvector-ns "trim")) |
| 29 | +(define es-ns (make-gvector-ns "ensure-space")) |
25 | 30 |
|
26 | | -;; Test: parallel adds don't violate n <= vector-length(vec) |
27 | | -;; This is the exact scenario from rmculpepper's review comment: |
28 | | -;; two concurrent add! calls both snapshot the same n and vec, |
29 | | -;; both create new vectors of different sizes. |
30 | | -;; Without CAS, the smaller vector could win the vec write while |
31 | | -;; the larger n wins the n write. |
32 | | -(test-case "sleep-stress: parallel add invariant" |
33 | | - (for ([iter (in-range 10)]) |
| 31 | +;; Test 1: vec-before-n ordering in gvector-ref. |
| 32 | +;; Only gvector-ref sleeps (tag "ref"); removes run at full speed. |
| 33 | +;; A reader reads vec, sleeps 10ms, then reads n. During the sleep, |
| 34 | +;; many concurrent removes can decrement n and trigger trim! which |
| 35 | +;; replaces vec with a shorter one. With correct vec-before-n ordering, |
| 36 | +;; the reader holds the old (large) vec and reads the new (small) n — |
| 37 | +;; safe. With wrong n-before-vec ordering, the reader reads the old |
| 38 | +;; (large) n and the new (small) vec — out of bounds. |
| 39 | +(test-case "sleep-stress: ref + remove (vec-before-n ordering)" |
| 40 | + (define make-gvector* (ref-ns 'make-gvector)) |
| 41 | + (define gvector-add!* (ref-ns 'gvector-add!)) |
| 42 | + (define gvector-ref* (ref-ns 'gvector-ref)) |
| 43 | + (define gvector-count* (ref-ns 'gvector-count)) |
| 44 | + (define gvector-remove-last!* (ref-ns 'gvector-remove-last!)) |
| 45 | + (for ([iter (in-range 5)]) |
34 | 46 | (define gv (make-gvector*)) |
35 | | - ;; Fill to capacity to force resize on next add |
36 | | - (for ([i 10]) |
| 47 | + ;; Fill with enough elements that removing triggers trim! |
| 48 | + ;; trim! fires when cap >= 4*n, so starting at n=1000 with |
| 49 | + ;; cap=1024, after 744 removes n=256 and cap=1024 >= 4*256, |
| 50 | + ;; so trim! shrinks to 512. Further removes trigger more shrinks. |
| 51 | + (for ([i 1000]) |
37 | 52 | (gvector-add!* gv i)) |
38 | | - (define pool (make-parallel-thread-pool 2)) |
39 | | - ;; T1 adds 1 item (needs vec of ~20), T2 adds many (needs vec of ~1010) |
40 | | - (define t1 (thread #:pool pool (lambda () (gvector-add!* gv 'from-t1)))) |
41 | | - (define t2 |
42 | | - (thread #:pool pool |
43 | | - (lambda () |
44 | | - (for ([i (in-range 1000)]) |
45 | | - (gvector-add!* gv i))))) |
46 | | - (thread-wait t1) |
47 | | - (thread-wait t2) |
48 | | - (parallel-thread-pool-close pool) |
49 | | - ;; The key check: n must not exceed vector length |
50 | | - (define count (gvector-count* gv)) |
51 | | - (check-true (> count 10) (format "iter ~a: count should be > 10, got ~a" iter count)))) |
52 | | - |
53 | | -;; Test: parallel add + ref doesn't read garbage |
54 | | -;; With sleeps between vec-read and n-read in gvector-ref, |
55 | | -;; this exercises the vec-before-n ordering. |
56 | | -(test-case "sleep-stress: parallel add + ref" |
57 | | - (for ([iter (in-range 10)]) |
58 | | - (define gv (make-gvector*)) |
59 | | - (define stop? #f) |
60 | 53 | (define pool (make-parallel-thread-pool 4)) |
61 | | - (define writers |
62 | | - (for/list ([t (in-range 2)]) |
63 | | - (thread #:pool pool |
64 | | - (lambda () |
65 | | - (for ([i (in-range 500)]) |
66 | | - (gvector-add!* gv i)))))) |
| 54 | + (define stop? #f) |
| 55 | + (define found-garbage? #f) |
| 56 | + ;; Readers that access near the end of the gvector. |
| 57 | + ;; Each ref takes ~10ms due to the sleep. |
| 58 | + ;; Check that returned values are valid (fixnums 0-999 or #f). |
67 | 59 | (define readers |
68 | 60 | (for/list ([t (in-range 2)]) |
69 | 61 | (thread #:pool pool |
70 | 62 | (lambda () |
71 | 63 | (let loop () |
72 | 64 | (define n (gvector-count* gv)) |
73 | 65 | (when (> n 0) |
74 | | - (gvector-ref* gv (sub1 n) #f)) |
| 66 | + (define val (gvector-ref* gv (sub1 n) #f)) |
| 67 | + (when (and val (not (and (fixnum? val) (<= 0 val 999)))) |
| 68 | + (set! found-garbage? #t))) |
75 | 69 | (unless stop? |
76 | 70 | (loop))))))) |
77 | | - (for-each thread-wait writers) |
| 71 | + ;; Removers run at full speed (no sleep in remove/trim) |
| 72 | + (define removers |
| 73 | + (for/list ([t (in-range 2)]) |
| 74 | + (thread #:pool pool |
| 75 | + (lambda () |
| 76 | + (for ([_ (in-range 400)]) |
| 77 | + (with-handlers ([exn:fail? void]) |
| 78 | + (gvector-remove-last!* gv))))))) |
| 79 | + (for-each thread-wait removers) |
78 | 80 | (set! stop? #t) |
79 | 81 | (parallel-thread-pool-close pool) |
80 | | - (for-each thread-wait readers))) |
| 82 | + (for-each thread-wait readers) |
| 83 | + (check-false found-garbage? |
| 84 | + (format "iter ~a: read garbage from beyond vector bounds" iter)))) |
81 | 85 |
|
82 | | -;; Test: parallel add + remove doesn't crash |
83 | | -;; With sleeps in both trim! and gvector-ref, this exercises |
84 | | -;; the shrink path where vec is replaced with a shorter vector. |
85 | | -(test-case "sleep-stress: parallel add + remove" |
86 | | - (for ([iter (in-range 10)]) |
| 86 | +;; Test 2: trim! under concurrent add pressure. |
| 87 | +;; Only trim! sleeps (tag "trim"); adds run at full speed. |
| 88 | +;; Exercises two protections in trim!: |
| 89 | +;; (a) CAS on vec: if a grower installed a larger vec during the sleep, |
| 90 | +;; CAS fails and trim! retries rather than clobbering. |
| 91 | +;; (b) n re-read: if concurrent adds grew n past the target capacity |
| 92 | +;; without changing vec (because it had room), trim! aborts. |
| 93 | +;; The invariant violation (n > vector-length(vec)) is transient and |
| 94 | +;; hard to observe because adds immediately re-grow, so the test |
| 95 | +;; primarily verifies no crashes or memory errors occur. |
| 96 | +(test-case "sleep-stress: add + remove (trim! CAS)" |
| 97 | + (define make-gvector* (trim-ns 'make-gvector)) |
| 98 | + (define gvector-add!* (trim-ns 'gvector-add!)) |
| 99 | + (define gvector-ref* (trim-ns 'gvector-ref)) |
| 100 | + (define gvector-count* (trim-ns 'gvector-count)) |
| 101 | + (define gvector-remove-last!* (trim-ns 'gvector-remove-last!)) |
| 102 | + (for ([iter (in-range 5)]) |
87 | 103 | (define gv (make-gvector*)) |
88 | | - (for ([i 200]) |
| 104 | + ;; Start with enough elements that removing triggers trim! |
| 105 | + ;; n=1000, cap=1024. After ~744 removes, cap >= 4*n triggers shrink. |
| 106 | + (for ([i 1000]) |
89 | 107 | (gvector-add!* gv i)) |
90 | | - (define pool (make-parallel-thread-pool 4)) |
| 108 | + (define pool (make-parallel-thread-pool 6)) |
| 109 | + (define stop? #f) |
| 110 | + (define found-garbage? #f) |
| 111 | + ;; Writers add continuously so they're active during trim!'s sleep. |
| 112 | + ;; Each trim! sleeps 10ms; writers must be adding during that window |
| 113 | + ;; so they push n past trim!'s target capacity. |
91 | 114 | (define writers |
92 | 115 | (for/list ([t (in-range 2)]) |
93 | 116 | (thread #:pool pool |
94 | 117 | (lambda () |
95 | | - (for ([i (in-range 200)]) |
96 | | - (gvector-add!* gv i)))))) |
| 118 | + (let loop () |
| 119 | + (with-handlers ([exn:fail? void]) |
| 120 | + (gvector-add!* gv 42)) |
| 121 | + (unless stop? |
| 122 | + (loop))))))) |
| 123 | + ;; Checkers continuously verify n <= vector-length(vec) directly. |
| 124 | + ;; gvector-ref can't detect this because vec-before-n ordering |
| 125 | + ;; makes it safe even when the invariant is transiently violated. |
| 126 | + ;; We read the struct fields directly via unsafe-struct*-ref |
| 127 | + ;; (vec=field 0, n=field 1) to observe the transient violation. |
| 128 | + (define readers |
| 129 | + (for/list ([t (in-range 2)]) |
| 130 | + (thread #:pool pool |
| 131 | + (lambda () |
| 132 | + (let loop () |
| 133 | + ;; Read vec first, then n. If trim! just installed |
| 134 | + ;; a small vec and n is still large from adds, |
| 135 | + ;; we see v=small, n=large => violation. |
| 136 | + (define v (unsafe-struct*-ref gv 0)) |
| 137 | + (define n (unsafe-struct*-ref gv 1)) |
| 138 | + (when (> n (vector-length v)) |
| 139 | + (set! found-garbage? #t)) |
| 140 | + (unless stop? |
| 141 | + (loop))))))) |
| 142 | + ;; Removers trigger trim! which sleeps between reading vec and CAS |
97 | 143 | (define removers |
98 | 144 | (for/list ([t (in-range 2)]) |
99 | 145 | (thread #:pool pool |
100 | 146 | (lambda () |
101 | | - (for ([_ (in-range 100)]) |
| 147 | + (for ([_ (in-range 400)]) |
102 | 148 | (with-handlers ([exn:fail? void]) |
103 | 149 | (gvector-remove-last!* gv))))))) |
| 150 | + (for-each thread-wait removers) |
| 151 | + (set! stop? #t) |
104 | 152 | (for-each thread-wait writers) |
| 153 | + (for-each thread-wait readers) |
105 | 154 | (parallel-thread-pool-close pool) |
106 | | - (for-each thread-wait removers))) |
| 155 | + (check-false found-garbage? |
| 156 | + (format "iter ~a: read garbage from beyond vector bounds" iter)))) |
| 157 | + |
| 158 | +;; Test 3: CAS in define/ensure-space!. |
| 159 | +;; Only ensure-space! sleeps (tag "ensure-space"); other ops fast. |
| 160 | +;; Two growers both read the same vec, sleep 10ms, then both try to |
| 161 | +;; create and install new vectors. With CAS, only one succeeds and |
| 162 | +;; the other retries. Without CAS, both install and the last writer |
| 163 | +;; wins — which may be the thread creating the smaller vec while the |
| 164 | +;; other thread increments n past that vec's length. |
| 165 | +;; Note: see comment at test about reliable detection. |
| 166 | +(test-case "sleep-stress: concurrent add (ensure-space! CAS)" |
| 167 | + (define make-gvector* (es-ns 'make-gvector)) |
| 168 | + (define gvector-add!* (es-ns 'gvector-add!)) |
| 169 | + (define gvector-ref* (es-ns 'gvector-ref)) |
| 170 | + (define gvector-count* (es-ns 'gvector-count)) |
| 171 | + (for ([iter (in-range 5)]) |
| 172 | + (define gv (make-gvector*)) |
| 173 | + ;; Fill to capacity to force resize on next add |
| 174 | + (for ([i 10]) |
| 175 | + (gvector-add!* gv i)) |
| 176 | + (define pool (make-parallel-thread-pool 2)) |
| 177 | + ;; Use symbols so we can distinguish from garbage memory |
| 178 | + (define sentinel (gensym 'val)) |
| 179 | + (define t1 (thread #:pool pool (lambda () (gvector-add!* gv sentinel)))) |
| 180 | + (define t2 |
| 181 | + (thread #:pool pool |
| 182 | + (lambda () (apply gvector-add!* gv (build-list 100 (lambda (_) sentinel)))))) |
| 183 | + (thread-wait t1) |
| 184 | + (thread-wait t2) |
| 185 | + (parallel-thread-pool-close pool) |
| 186 | + ;; Verify invariant by reading every element. |
| 187 | + ;; With a missing CAS, this may read garbage beyond the vector. |
| 188 | + ;; However, detection is not guaranteed since the thread creating |
| 189 | + ;; the larger vector tends to win the vec write. |
| 190 | + (define count (gvector-count* gv)) |
| 191 | + (for ([i (in-range count)]) |
| 192 | + (define val (gvector-ref* gv i)) |
| 193 | + (check-true (or (fixnum? val) (eq? val sentinel)) |
| 194 | + (format "iter ~a: garbage at index ~a: ~v" iter i val))))) |
0 commit comments