diff --git a/lectures/numba.md b/lectures/numba.md index 85ba3ddd..070927fc 100644 --- a/lectures/numba.md +++ b/lectures/numba.md @@ -454,11 +454,13 @@ Compare speed with and without Numba when the sample size is large. Here is one solution: ```{code-cell} ipython3 +rng = np.random.default_rng() + @jit -def calculate_pi(n=1_000_000): +def calculate_pi(rng, n=1_000_000): count = 0 for i in range(n): - u, v = np.random.uniform(0, 1), np.random.uniform(0, 1) + u, v = rng.uniform(0, 1), rng.uniform(0, 1) d = np.sqrt((u - 0.5)**2 + (v - 0.5)**2) if d < 0.5: count += 1 @@ -471,12 +473,12 @@ Now let's see how fast it runs: ```{code-cell} ipython3 with qe.Timer(): - calculate_pi() + calculate_pi(rng) ``` ```{code-cell} ipython3 with qe.Timer(): - calculate_pi() + calculate_pi(rng) ``` If we switch off JIT compilation by removing `@jit`, the code takes around @@ -550,10 +552,12 @@ p, q = 0.1, 0.2 # Prob of leaving low and high state respectively Here's a pure Python version of the function ```{code-cell} ipython3 -def compute_series(n): +rng = np.random.default_rng() + +def compute_series(n, rng): x = np.empty(n, dtype=np.int64) x[0] = 1 # Start in state 1 - U = np.random.uniform(0, 1, size=n) + U = rng.uniform(0, 1, size=n) for t in range(1, n): current_x = x[t-1] if current_x == 0: @@ -568,7 +572,7 @@ state is about 0.666 ```{code-cell} ipython3 n = 1_000_000 -x = compute_series(n) +x = compute_series(n, rng) print(np.mean(x == 0)) # Fraction of time x is in state 0 ``` @@ -578,7 +582,7 @@ Now let's time it: ```{code-cell} ipython3 with qe.Timer(): - compute_series(n) + compute_series(n, rng) ``` Next let's implement a Numba version, which is easy @@ -590,7 +594,7 @@ compute_series_numba = jit(compute_series) Let's check we still get the right numbers ```{code-cell} ipython3 -x = compute_series_numba(n) +x = compute_series_numba(n, rng) print(np.mean(x == 0)) ``` @@ -598,7 +602,7 @@ Let's see the time ```{code-cell} ipython3 with qe.Timer(): - compute_series_numba(n) + compute_series_numba(n, rng) ``` This is a nice speed improvement for one line of code! @@ -636,11 +640,17 @@ For the size of the Monte Carlo simulation, use something substantial, such as Here is one solution: ```{code-cell} ipython3 +n = 1_000_000 +rng = np.random.default_rng() +u_draws = rng.uniform(size=n) +v_draws = rng.uniform(size=n) + @jit(parallel=True) -def calculate_pi(n=1_000_000): +def calculate_pi(u_draws, v_draws): + n = len(u_draws) count = 0 for i in prange(n): - u, v = np.random.uniform(0, 1), np.random.uniform(0, 1) + u, v = u_draws[i], v_draws[i] d = np.sqrt((u - 0.5)**2 + (v - 0.5)**2) if d < 0.5: count += 1 @@ -653,12 +663,12 @@ Now let's see how fast it runs: ```{code-cell} ipython3 with qe.Timer(): - calculate_pi() + calculate_pi(u_draws, v_draws) ``` ```{code-cell} ipython3 with qe.Timer(): - calculate_pi() + calculate_pi(u_draws, v_draws) ``` By switching parallelization on and off (selecting `True` or @@ -773,6 +783,7 @@ def compute_call_price_parallel(β=β, s = np.log(S0) h = h0 # Simulate forward in time + # Draws are kept inside the loop to avoid pre-allocating large shock arrays. for t in range(n): s = s + μ + np.exp(h) * np.random.randn() h = ρ * h + ν * np.random.randn()