diff --git a/book/docker/windows/Dockerfile b/book/docker/windows/Dockerfile
index f48fac6c0..afff8df2d 100644
--- a/book/docker/windows/Dockerfile
+++ b/book/docker/windows/Dockerfile
@@ -173,7 +173,7 @@ RUN Write-Host '=== STARTING TEX LIVE INSTALLATION ===' ; `
     $texLiveBin = Join-Path $texYearDir.FullName 'bin\windows' ; `
     Write-Host "📁 TeX Live bin: $texLiveBin" ; `
     $env:PATH = "$texLiveBin;$env:PATH" ; `
-    [Environment]::SetEnvironmentVariable('PATH', "$texLiveBin;$([Environment]::GetEnvironmentVariable('PATH','Machine'))", 'Machine') ; `
+    [Environment]::SetEnvironmentVariable('PATH', ($texLiveBin + ';' + [Environment]::GetEnvironmentVariable('PATH','Machine')), 'Machine') ; `
     Write-Host "✅ PATH updated" ; `
     `
     Write-Host '🔧 Pinning tlmgr repository to stable mirror...' ; `
diff --git a/book/quarto/contents/vol1/data_selection/data_selection.qmd b/book/quarto/contents/vol1/data_selection/data_selection.qmd
index dc4387cec..f5c6f76e5 100644
--- a/book/quarto/contents/vol1/data_selection/data_selection.qmd
+++ b/book/quarto/contents/vol1/data_selection/data_selection.qmd
@@ -886,6 +886,7 @@ def compute_el2n_scores(model, dataloader, num_epochs=5):
         scores.extend(el2n.tolist())
     return scores
 
+
 def select_coreset(scores, dataset, fraction=0.1):
     """Select top-k highest-scoring (most uncertain) samples."""
     k = int(len(dataset) * fraction)
@@ -893,6 +894,7 @@ def select_coreset(scores, dataset, fraction=0.1):
     indices = argsort(scores, descending=True)[:k]
     return Subset(dataset, indices)
 
+
 # Usage: 10x data reduction with minimal accuracy loss
 scores = compute_el2n_scores(proxy_model, full_loader)
 coreset = select_coreset(scores, full_dataset, fraction=0.1)
diff --git a/book/quarto/contents/vol1/frameworks/frameworks.qmd b/book/quarto/contents/vol1/frameworks/frameworks.qmd
index 5a664f81e..a4fc7d87b 100644
--- a/book/quarto/contents/vol1/frameworks/frameworks.qmd
+++ b/book/quarto/contents/vol1/frameworks/frameworks.qmd
@@ -733,11 +733,13 @@ PyTorch's TorchScript exemplifies both strategies. Tracing\index{JIT Compilation
 ```{.python}
 import torch
 
+
 def forward(x):
     y = x * 2
     z = y + 1
     return z
 
+
 # Trace the function by running it once
 x_example = torch.tensor([1.0])
 traced = torch.jit.trace(forward, x_example)
@@ -759,6 +761,7 @@ def conditional_forward(x):
     else:
         return x * 3
 
+
 traced = torch.jit.trace(conditional_forward, torch.tensor([1.0]))
 # Tracing captures ONLY the x.sum() > 0 branch
 # If input later has sum <= 0, traced version
@@ -780,6 +783,7 @@ def forward(x):
     z = y + 1
     return z
 
+
 # Compiles Python source code to TorchScript IR
 # No example inputs needed
 # Preserves control flow structure
@@ -797,6 +801,7 @@ def conditional_forward(x: torch.Tensor) -> torch.Tensor:
     else:
         return x * 3
 
+
 # Both branches preserved in IR
 # Correct branch executes based on runtime input values
 ```
@@ -810,6 +815,7 @@ To understand what the compiler produces, we can inspect the generated intermedi
 def example(x: torch.Tensor) -> torch.Tensor:
     return x * 2 + 1
 
+
 # Inspect generated IR:
 print(example.graph)
 # graph(%x : Tensor):
@@ -833,6 +839,7 @@ def invalid_script(x):
     print(f"Debug: {x}")  # ERROR: f-strings not supported
     return result
 
+
 # Valid alternative:
 @torch.jit.script
 def valid_script(x: torch.Tensor) -> torch.Tensor:
@@ -871,6 +878,7 @@ PyTorch 2.0's `torch.compile` [@ansel2024pytorch2] represents this approach: dev
 def forward(x):
     return x * 2 + 1
 
+
 # First call: captures execution, compiles optimized kernel (~100ms)
 result1 = forward(torch.tensor([1.0]))
 
@@ -1072,6 +1080,7 @@ def conditional_compute(x):
     else:
         return x * 3
 
+
 # Creates two compiled regions: operations before
 # and after the if statement
 # The if statement itself executes eagerly
@@ -1091,6 +1100,7 @@ def debug_compute(x):
     z = y + 1
     return z
 
+
 # Creates two compiled regions: before and after print
 ```
 :::
@@ -1105,6 +1115,7 @@ Shape changes prevent compiled code reuse, as @lst-graph-break-shapes illustrate
 def variable_length(x, length):
     return x[:, :length]  # Shape changes each call
 
+
 # Each unique length triggers recompilation
 for i in range(10):
     result = variable_length(x, i)  # 10 recompilations
@@ -1137,9 +1148,11 @@ The compilation mode controls *how aggressively* to optimize; the backend contro
 import torch
 import time
 
+
 def forward(x, w):
     return torch.matmul(x, w).relu()
 
+
 x = torch.randn(1024, 1024, device="cuda")
 w = torch.randn(1024, 512, device="cuda")
 
@@ -1693,6 +1706,7 @@ def simple_network(x, w1, w2):
     output = activated * w2  # Second layer
     return output
 
+
 # --- Forward pass stores intermediates ---
 # x=1.0, w1=2.0, w2=3.0
 # hidden=2.0, activated=2.0, output=6.0
@@ -1983,6 +1997,7 @@ class MultiplyAdd(torch.autograd.Function):
 
         return grad_x, grad_y, grad_z
 
+
 # Usage
 x = torch.tensor([2.0], requires_grad=True)
 y = torch.tensor([3.0], requires_grad=True)
@@ -2009,6 +2024,7 @@ def gradient_hook(grad):
     # Modify gradient (e.g., gradient clipping)
     return grad.clamp(-1.0, 1.0)
 
+
 x = torch.tensor([2.0], requires_grad=True)
 x.register_hook(gradient_hook)
 
@@ -3240,6 +3256,7 @@ The systems consequence is significant. Automatic parameter discovery enables `o
 import torch
 import torch.nn as nn
 
+
 class CustomLayer(nn.Module):
     def __init__(self, input_size, output_size):
         super().__init__()
@@ -3252,6 +3269,7 @@ class CustomLayer(nn.Module):
     def forward(self, x):
         return torch.matmul(x, self.weight.t()) + self.bias
 
+
 layer = CustomLayer(10, 20)
 # Framework discovers both parameters automatically:
 for name, param in layer.named_parameters():
@@ -3300,6 +3318,7 @@ The state dictionary mechanism provides the serialization half of this principle
 import torch
 import torch.nn as nn
 
+
 class ResidualBlock(nn.Module):
     def __init__(self, channels):
         super().__init__()
@@ -3314,6 +3333,7 @@ class ResidualBlock(nn.Module):
         x = self.bn2(self.conv2(x))
         return torch.relu(x + residual)
 
+
 class ResNet(nn.Module):
     def __init__(self, num_blocks, channels=64):
         super().__init__()
@@ -3330,6 +3350,7 @@ class ResNet(nn.Module):
         x = x.mean(dim=[2, 3])  # Global average pooling
         return self.fc(x)
 
+
 model = ResNet(num_blocks=4)
 total = sum(p.numel() for p in model.parameters())
 print(f"Total parameters: {total}")
@@ -3373,6 +3394,7 @@ import torch.nn as nn
 
 model = nn.Sequential(nn.Linear(10, 20), nn.ReLU(), nn.Linear(20, 5))
 
+
 # Forward hook to inspect activations
 def forward_hook(module, input, output):
     print(
@@ -3382,10 +3404,12 @@ def forward_hook(module, input, output):
         f"std={output.std():.3f}"
     )
 
+
 # Backward hook to inspect gradients
 def backward_hook(module, grad_input, grad_output):
     print(f"Gradient norm: {grad_output[0].norm():.3f}")
 
+
 # Register hooks on specific layer
 handle_fwd = model[0].register_forward_hook(forward_hook)
 handle_bwd = model[0].register_full_backward_hook(backward_hook)
@@ -3517,10 +3541,12 @@ While PyTorch and TensorFlow build computational graphs (dynamically or statical
 import jax
 import jax.numpy as jnp
 
+
 def loss_fn(params, x, y):
     pred = jnp.dot(x, params["w"]) + params["b"]
     return jnp.mean((pred - y) ** 2)
 
+
 # Transform: compute gradients
 grad_fn = jax.grad(loss_fn)
 
@@ -3576,6 +3602,7 @@ How do these architectural differences look in practice? @lst-framework-hello-wo
 # PyTorch - Dynamic, Pythonic
 import torch.nn as nn
 
+
 class SimpleNet(nn.Module):
     def __init__(self):
         super().__init__()
@@ -3584,6 +3611,7 @@ class SimpleNet(nn.Module):
     def forward(self, x):
         return self.fc(x)
 
+
 # TensorFlow/Keras - High-level API
 import tensorflow as tf
 
@@ -3595,9 +3623,11 @@ model = tf.keras.Sequential(
 import jax.numpy as jnp
 from jax import random
 
+
 def simple_net(params, x):
     return jnp.dot(x, params["w"]) + params["b"]
 
+
 key = random.PRNGKey(0)
 params = {
     "w": random.normal(key, (10, 1)),
diff --git a/book/quarto/contents/vol1/hw_acceleration/hw_acceleration.qmd b/book/quarto/contents/vol1/hw_acceleration/hw_acceleration.qmd
index bb494178f..502acb902 100644
--- a/book/quarto/contents/vol1/hw_acceleration/hw_acceleration.qmd
+++ b/book/quarto/contents/vol1/hw_acceleration/hw_acceleration.qmd
@@ -896,7 +896,9 @@ We call the hardware units that exploit these patterns *AI compute primitives*:
 ::: {#lst-dense_layer_def lst-cap="**Dense Layer Abstraction**: High-level framework APIs encapsulate 131,072 multiply-accumulate operations (256 inputs times 512 outputs) in a single function call, hiding the computational complexity from developers while enabling automatic hardware optimization."}
 ```{.python}
 # Framework abstracts compute-intensive operations
-dense = Dense(512)(input_tensor)  # $256\times512$ = 131K MACs per sample
+dense = Dense(512)(
+    input_tensor
+)  # $256\times512$ = 131K MACs per sample
 ```
 :::
 
@@ -909,7 +911,9 @@ This single line of code conceals the computational complexity that accelerators
 output = (
     matmul(input, weights) + bias
 )  # Matrix multiply dominates cost
-output = activation(output)  # Element-wise: O(output_dim$\times$batch)
+output = activation(
+    output
+)  # Element-wise: O(output_dim$\times$batch)
 ```
 :::
 
diff --git a/book/quarto/contents/vol1/nn_architectures/nn_architectures.qmd b/book/quarto/contents/vol1/nn_architectures/nn_architectures.qmd
index 8235e6cfc..7d3a6c1ae 100644
--- a/book/quarto/contents/vol1/nn_architectures/nn_architectures.qmd
+++ b/book/quarto/contents/vol1/nn_architectures/nn_architectures.qmd
@@ -2489,6 +2489,7 @@ def attention_layer_matrix(Q, K, V):
     output = matmul(weights, V)  # Combine values
     return output
 
+
 # Core computational pattern
 def attention_layer_compute(Q, K, V):
     # Initialize outputs
@@ -2814,6 +2815,7 @@ def self_attention_layer(X, W_Q, W_K, W_V, d_k):
 
     return output
 
+
 def multi_head_attention(X, W_Q, W_K, W_V, W_O, num_heads, d_k):
     outputs = []
     for i in range(num_heads):
diff --git a/book/quarto/contents/vol1/optimizations/model_compression.qmd b/book/quarto/contents/vol1/optimizations/model_compression.qmd
index 783993ff2..c3736f046 100644
--- a/book/quarto/contents/vol1/optimizations/model_compression.qmd
+++ b/book/quarto/contents/vol1/optimizations/model_compression.qmd
@@ -5011,6 +5011,7 @@ conv_out = conv2d(input, weight)
 bn_out = batch_norm(conv_out, ...)
 relu_out = relu(bn_out)
 
+
 # === FUSED: 1 kernel launch, 2 memory transfers ===
 def conv_bn_relu_fused(input, weight, gamma, beta, mean, var):
     # Read input and weight once
diff --git a/book/quarto/contents/vol1/responsible_engr/responsible_engr.qmd b/book/quarto/contents/vol1/responsible_engr/responsible_engr.qmd
index 3a93edb0a..e58e27c34 100644
--- a/book/quarto/contents/vol1/responsible_engr/responsible_engr.qmd
+++ b/book/quarto/contents/vol1/responsible_engr/responsible_engr.qmd
@@ -693,6 +693,7 @@ def compute_fairness_metrics(confusion_matrix):
         "fpr": fp / (fp + tn) if (fp + tn) else 0,
     }
 
+
 # Compare groups and flag disparities exceeding threshold
 for metric in ["approval_rate", "tpr", "fpr"]:
     disparity = abs(metrics_a[metric] - metrics_b[metric])
diff --git a/book/quarto/contents/vol1/training/training.qmd b/book/quarto/contents/vol1/training/training.qmd
index 612ba20ed..c8df07af8 100644
--- a/book/quarto/contents/vol1/training/training.qmd
+++ b/book/quarto/contents/vol1/training/training.qmd
@@ -3992,6 +3992,7 @@ Flash Attention's performance gains materialize through careful exploitation of
 import torch
 import torch.nn.functional as F
 
+
 # Standard attention (materializes n$\times$ n matrix)
 def standard_attention(q, k, v):
     # q, k, v: [batch, heads, seq_len, head_dim]
@@ -4002,15 +4003,18 @@ def standard_attention(q, k, v):
     output = torch.matmul(attn, v)
     return output
 
+
 # Flash Attention (no n$\times$ n materialization)
 def flash_attention(q, k, v):
     # Automatically uses Flash Attention if available
     output = F.scaled_dot_product_attention(q, k, v)
     return output
 
+
 # Explicit Flash Attention 2 (flash-attn library)
 from flash_attn import flash_attn_func
 
+
 def flash_attn_2(q, k, v):
     # q, k, v: [batch, seq_len, heads, head_dim]
     # Different layout for optimized memory access
diff --git a/book/quarto/contents/vol2/inference/inference.qmd b/book/quarto/contents/vol2/inference/inference.qmd
index 9302722d4..7bb6fb82c 100644
--- a/book/quarto/contents/vol2/inference/inference.qmd
+++ b/book/quarto/contents/vol2/inference/inference.qmd
@@ -2747,6 +2747,7 @@ async def swap_to_cpu(sequence_id):
     cpu_cache[sequence_id] = kv_cache.cpu()  # Async transfer
     gpu_cache.free(sequence_id)
 
+
 async def swap_to_gpu(sequence_id):
     cpu_kv = cpu_cache[sequence_id]
     gpu_cache[sequence_id] = cpu_kv.cuda()  # Async transfer
@@ -4256,6 +4257,7 @@ class TenantQuota:
     max_qps: int  # e.g., 1,000
     max_batch_tokens: int  # e.g., 50,000
 
+
 def admit_request(tenant_id, request):
     quota = get_quota(tenant_id)
     usage = get_usage(tenant_id)
diff --git a/book/quarto/contents/vol2/performance_engineering/performance_engineering.qmd b/book/quarto/contents/vol2/performance_engineering/performance_engineering.qmd
index 65982dc84..5d92bc020 100644
--- a/book/quarto/contents/vol2/performance_engineering/performance_engineering.qmd
+++ b/book/quarto/contents/vol2/performance_engineering/performance_engineering.qmd
@@ -1134,11 +1134,12 @@ A minimal example illustrates the usage:
 ```python
 import torch
 
+
 def transformer_block(x, w1, w2, ln_weight, ln_bias):
     """Unfused transformer FFN block."""
-    h = x @ w1                    # Linear projection
+    h = x @ w1  # Linear projection
     h = torch.nn.functional.gelu(h)  # Activation
-    h = h @ w2                    # Output projection
+    h = h @ w2  # Output projection
     # Layer normalization
     mean = h.mean(dim=-1, keepdim=True)
     var = h.var(dim=-1, keepdim=True, unbiased=False)
@@ -1146,6 +1147,7 @@ def transformer_block(x, w1, w2, ln_weight, ln_bias):
     h = h * ln_weight + ln_bias
     return h
 
+
 # Compile the function — TorchDynamo traces, TorchInductor optimizes
 compiled_block = torch.compile(transformer_block)
 
@@ -1213,9 +1215,11 @@ A Triton kernel for fused GELU activation illustrates the programming model:
 import triton
 import triton.language as tl
 
+
 @triton.jit
 def fused_gelu_kernel(
-    input_ptr, output_ptr,
+    input_ptr,
+    output_ptr,
     n_elements,
     BLOCK_SIZE: tl.constexpr,
 ):
@@ -1618,7 +1622,11 @@ The PyTorch Profiler integrates with the training loop to capture detailed trace
 
 ```python
 import torch
-from torch.profiler import profile, schedule, tensorboard_trace_handler
+from torch.profiler import (
+    profile,
+    schedule,
+    tensorboard_trace_handler,
+)
 
 # Profile 2 warmup steps + 3 active steps
 with profile(
diff --git a/book/quarto/contents/vol2/responsible_ai/responsible_ai.qmd b/book/quarto/contents/vol2/responsible_ai/responsible_ai.qmd
index 36c5e9209..39254d1b7 100644
--- a/book/quarto/contents/vol2/responsible_ai/responsible_ai.qmd
+++ b/book/quarto/contents/vol2/responsible_ai/responsible_ai.qmd
@@ -1563,6 +1563,7 @@ from typing import Dict, List, Optional
 import numpy as np
 from sklearn.metrics import confusion_matrix
 
+
 @dataclass
 class FairnessMetrics:
     demographic_parity_diff: float
@@ -1570,6 +1571,7 @@ class FairnessMetrics:
     equality_opportunity_diff: float
     group_counts: Dict[str, int]
 
+
 class RealTimeFairnessMonitor:
     def __init__(
         self, window_size: int = 1000, alert_threshold: float = 0.05
diff --git a/book/quarto/contents/vol2/sustainable_ai/sustainable_ai.qmd b/book/quarto/contents/vol2/sustainable_ai/sustainable_ai.qmd
index e78fe01cb..878f7d80f 100644
--- a/book/quarto/contents/vol2/sustainable_ai/sustainable_ai.qmd
+++ b/book/quarto/contents/vol2/sustainable_ai/sustainable_ai.qmd
@@ -1107,6 +1107,7 @@ Intel's Running Average Power Limit (RAPL) interface exposes power measurements
 import subprocess
 import time
 
+
 def read_rapl_energy():
     """Read current RAPL energy counters.
 
@@ -1122,6 +1123,7 @@ def read_rapl_energy():
     )
     return int(result.stdout.strip())  # Returns microjoules
 
+
 # Measure training energy
 start_energy = read_rapl_energy()
 start_time = time.time()
@@ -1156,6 +1158,7 @@ import time
 pynvml.nvmlInit()
 handle = pynvml.nvmlDeviceGetHandleByIndex(0)  # First GPU
 
+
 def measure_inference_power(model, input_data, num_iterations=100):
     """Measure average GPU power during inference."""
     power_readings = []
@@ -1174,6 +1177,7 @@ def measure_inference_power(model, input_data, num_iterations=100):
     avg_power = sum(power_readings) / len(power_readings)
     return avg_power
 
+
 avg_power = measure_inference_power(model, sample_input)
 print(f"Average inference power: {avg_power:.1f} W")
 ```
@@ -1558,6 +1562,7 @@ def calculate_carbon_footprint(
         / (operational_kg + embodied_kg),
     }
 
+
 # Example: 7B model training
 result = calculate_carbon_footprint(
     gpu_power_watts=400,