mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-28 00:32:43 -05:00
Unifies Quarto metadata into shared base/format/volume fragments while carrying through chapter path, asset, and tooling updates to keep the repository consistent and easier to maintain.
394 lines
11 KiB
JSON
394 lines
11 KiB
JSON
{
|
|
"appendix_algorithm": {
|
|
"ALLREDUCE_FACTOR": 2,
|
|
"ANOMALY_MODEL_AUC": 0.86,
|
|
"DAYS_PER_MONTH": 30,
|
|
"DAYS_PER_YEAR": 365,
|
|
"DLRM_EMBEDDING_DIM": 128,
|
|
"DLRM_EMBEDDING_ENTRIES": 25000000000.0,
|
|
"FP32_BITS": 32,
|
|
"GMAIL_EMAILS_PER_DAY": 121000000000.0,
|
|
"GOOGLE_SEARCHES_PER_DAY": 8500000000.0,
|
|
"GPT2_HIDDEN_DIM": 1600,
|
|
"GPT2_LAYERS": 48,
|
|
"GPT3_TRAINING_DAYS_REF": 25,
|
|
"GPT4_TRAINING_GPU_DAYS": 2500000.0,
|
|
"HOURS_PER_DAY": 24,
|
|
"HOURS_PER_YEAR": 8760,
|
|
"INT8_BITS": 8,
|
|
"KS_TEST_COEFFICIENT": 1.36,
|
|
"MINUTES_PER_HOUR": 60,
|
|
"MNIST_IMAGE_HEIGHT": 28,
|
|
"MNIST_IMAGE_WIDTH": 28,
|
|
"SECONDS_PER_MINUTE": 60,
|
|
"SIMD_REGISTER_BITS": 512,
|
|
"SYSTOLIC_ARRAY_DIM": 128,
|
|
"TPU_POD_CHIPS": 4096,
|
|
"TRANSFORMER_HEADS_EXAMPLE": 12,
|
|
"TRANSFORMER_HIDDEN_DIM_EXAMPLE": 768,
|
|
"TRANSFORMER_SEQ_LEN_EXAMPLE": 512,
|
|
"VIDEO_1080P_HEIGHT": 1080,
|
|
"VIDEO_1080P_WIDTH": 1920,
|
|
"a100_ridge_str": "153",
|
|
"a100_ridge_value": 153,
|
|
"bytes_fp16_str": "2",
|
|
"bytes_fp32_str": "4",
|
|
"bytes_int32_str": "4",
|
|
"bytes_per_fp16_value": 2,
|
|
"bytes_per_fp32_value": 4,
|
|
"bytes_per_int32_value": 4,
|
|
"dense_bytes_value": 4000000000,
|
|
"dense_gb_str": "4",
|
|
"dense_gb_value": 4.0,
|
|
"embed_dim_str": "10,000",
|
|
"embed_dim_value": 10000,
|
|
"n_small_str": "64",
|
|
"n_small_value": 64,
|
|
"nonzeros_str": "10",
|
|
"nonzeros_value": 10000000,
|
|
"optimizer_overhead_str": "8\u201312",
|
|
"reduction_factor_str": "50",
|
|
"reduction_factor_value": 50,
|
|
"small_efficiency_pct_str": "13",
|
|
"small_efficiency_pct_value": 13.943355119825707,
|
|
"small_intensity_str": "21",
|
|
"small_intensity_value": 21.333333333333332,
|
|
"sparse_bytes_value": 80000000,
|
|
"sparse_mb_str": "80",
|
|
"sparse_mb_value": 80.0,
|
|
"sparsity_pct_str": "1",
|
|
"sparsity_pct_value": 1,
|
|
"total_elements_str": "1",
|
|
"total_elements_value": 1000000000,
|
|
"vocab_size_str": "100,000",
|
|
"vocab_size_value": 100000
|
|
},
|
|
"appendix_assumptions": {},
|
|
"appendix_dam": {
|
|
"ALLREDUCE_FACTOR": 2,
|
|
"ANOMALY_MODEL_AUC": 0.86,
|
|
"DAYS_PER_MONTH": 30,
|
|
"DAYS_PER_YEAR": 365,
|
|
"DLRM_EMBEDDING_DIM": 128,
|
|
"DLRM_EMBEDDING_ENTRIES": 25000000000.0,
|
|
"FP32_BITS": 32,
|
|
"GMAIL_EMAILS_PER_DAY": 121000000000.0,
|
|
"GOOGLE_SEARCHES_PER_DAY": 8500000000.0,
|
|
"GPT2_HIDDEN_DIM": 1600,
|
|
"GPT2_LAYERS": 48,
|
|
"GPT3_TRAINING_DAYS_REF": 25,
|
|
"GPT4_TRAINING_GPU_DAYS": 2500000.0,
|
|
"HOURS_PER_DAY": 24,
|
|
"HOURS_PER_YEAR": 8760,
|
|
"INT8_BITS": 8,
|
|
"KS_TEST_COEFFICIENT": 1.36,
|
|
"MINUTES_PER_HOUR": 60,
|
|
"MNIST_IMAGE_HEIGHT": 28,
|
|
"MNIST_IMAGE_WIDTH": 28,
|
|
"SECONDS_PER_MINUTE": 60,
|
|
"SIMD_REGISTER_BITS": 512,
|
|
"SYSTOLIC_ARRAY_DIM": 128,
|
|
"TPU_POD_CHIPS": 4096,
|
|
"TRANSFORMER_HEADS_EXAMPLE": 12,
|
|
"TRANSFORMER_HIDDEN_DIM_EXAMPLE": 768,
|
|
"TRANSFORMER_SEQ_LEN_EXAMPLE": 512,
|
|
"VIDEO_1080P_HEIGHT": 1080,
|
|
"VIDEO_1080P_WIDTH": 1920,
|
|
"ex1_disk_sat_pct": 100,
|
|
"ex1_disk_sat_str": "100",
|
|
"ex1_gpu_util_pct": 25,
|
|
"ex1_gpu_util_str": "25",
|
|
"ex2_achieved_str": "0.28",
|
|
"ex2_achieved_tflops_val": 0.27999999999999997,
|
|
"ex2_bytes_per_param": 2,
|
|
"ex2_flops_per_param": 2,
|
|
"ex2_flops_per_pass_str": "0.014",
|
|
"ex2_flops_per_pass_val": 0.014,
|
|
"ex2_latency_ms_str": "50",
|
|
"ex2_latency_s": 0.05,
|
|
"ex2_model_size_gb_str": "14",
|
|
"ex2_model_size_gb_val": 14.0,
|
|
"ex2_params": 7000000000.0,
|
|
"ex2_params_str": "7B",
|
|
"ex2_util_str": "0.03",
|
|
"ex2_util_val": 0.02831142568250758,
|
|
"ex3_chin_pred_pct": 15,
|
|
"ex3_chin_pred_str": "15",
|
|
"ex3_imp_pct": 6.666666666666672,
|
|
"ex3_imp_str": "6.7",
|
|
"ex3_loss_end": 0.42,
|
|
"ex3_loss_start": 0.45,
|
|
"ex3_params_end_str": "1B",
|
|
"ex3_params_start_str": "125M",
|
|
"ex3_scale_factor": 8,
|
|
"ex4_cost_k": 200,
|
|
"ex4_cost_str": "$200K",
|
|
"ex4_gpu_new_n": 8,
|
|
"ex4_gpu_new_str": "8\u00d7 H100",
|
|
"ex4_gpu_new_type": "H100",
|
|
"ex4_gpu_old_n": 4,
|
|
"ex4_gpu_old_str": "4\u00d7 A100",
|
|
"ex4_gpu_old_type": "A100",
|
|
"h100_fp16_tflops_str": "989",
|
|
"h100_fp16_tflops_val": 989
|
|
},
|
|
"appendix_data": {
|
|
"bw_100g_value": 100000000000.0,
|
|
"bw_10g_value": 10000000000.0,
|
|
"bw_1g_value": 1000000000.0,
|
|
"csv_cycles": 100,
|
|
"csv_cycles_str": "~100",
|
|
"csv_speed_mb": 100,
|
|
"csv_speed_str": "~100",
|
|
"int_size_bytes": 4,
|
|
"join_network_tb": 2,
|
|
"join_network_tb_str": "~2",
|
|
"join_table_tb": 1,
|
|
"join_table_tb_str": "1",
|
|
"logit_val": 100,
|
|
"n_requests_value": 100,
|
|
"n_unhappy": 10000.0,
|
|
"n_unhappy_str": "10,000",
|
|
"n_users": 1000000,
|
|
"n_users_str": "1M",
|
|
"p99_ratio": 0.01,
|
|
"p_all_fast_str": "0.366",
|
|
"p_all_fast_value": 0.3660323412732292,
|
|
"p_fast_value": 0.99,
|
|
"p_slow_pct_str": "63.4",
|
|
"p_slow_value": 0.6339676587267709,
|
|
"parquet_cycles": 10,
|
|
"parquet_cycles_str": "~10",
|
|
"parquet_speed_mb": 1000,
|
|
"parquet_speed_str": "> 1,000",
|
|
"pb_value": 1000000000000000.0,
|
|
"proto_cycles": 200,
|
|
"proto_cycles_str": "~200",
|
|
"proto_speed_mb": 300,
|
|
"proto_speed_str": "~300",
|
|
"psi_threshold": 0.2,
|
|
"row_size_kb": 1,
|
|
"t_100tb_100g": "2.2 Hours",
|
|
"t_100tb_100g_value": 8000.0,
|
|
"t_100tb_10g": "22.2 Hours",
|
|
"t_100tb_10g_value": 80000.0,
|
|
"t_100tb_1g": "9 Days",
|
|
"t_100tb_1g_value": 800000.0,
|
|
"t_1pb_100g": "22.2 Hours",
|
|
"t_1pb_100g_value": 80000.0,
|
|
"t_1pb_10g": "9 Days",
|
|
"t_1pb_10g_value": 800000.0,
|
|
"t_1pb_1g": "3 Months",
|
|
"t_1pb_1g_value": 8000000.0,
|
|
"t_1tb_100g": "1 Minutes",
|
|
"t_1tb_100g_value": 80.0,
|
|
"t_1tb_10g": "13 Minutes",
|
|
"t_1tb_10g_value": 800.0,
|
|
"t_1tb_1g": "2.2 Hours",
|
|
"t_1tb_1g_value": 8000.0,
|
|
"tb_value": 1000000000000.0,
|
|
"waste_pct": 99.609375,
|
|
"waste_pct_str": "99.6"
|
|
},
|
|
"appendix_machine": {
|
|
"ALLREDUCE_FACTOR": 2,
|
|
"ANOMALY_MODEL_AUC": 0.86,
|
|
"DAYS_PER_MONTH": 30,
|
|
"DAYS_PER_YEAR": 365,
|
|
"DLRM_EMBEDDING_DIM": 128,
|
|
"DLRM_EMBEDDING_ENTRIES": 25000000000.0,
|
|
"FP32_BITS": 32,
|
|
"GMAIL_EMAILS_PER_DAY": 121000000000.0,
|
|
"GOOGLE_SEARCHES_PER_DAY": 8500000000.0,
|
|
"GPT2_HIDDEN_DIM": 1600,
|
|
"GPT2_LAYERS": 48,
|
|
"GPT3_TRAINING_DAYS_REF": 25,
|
|
"GPT4_TRAINING_GPU_DAYS": 2500000.0,
|
|
"HOURS_PER_DAY": 24,
|
|
"HOURS_PER_YEAR": 8760,
|
|
"INT8_BITS": 8,
|
|
"KS_TEST_COEFFICIENT": 1.36,
|
|
"L_concurrent_str": "50",
|
|
"MINUTES_PER_HOUR": 60,
|
|
"MNIST_IMAGE_HEIGHT": 28,
|
|
"MNIST_IMAGE_WIDTH": 28,
|
|
"SECONDS_PER_MINUTE": 60,
|
|
"SIMD_REGISTER_BITS": 512,
|
|
"SYSTOLIC_ARRAY_DIM": 128,
|
|
"TPU_POD_CHIPS": 4096,
|
|
"TRANSFORMER_HEADS_EXAMPLE": 12,
|
|
"TRANSFORMER_HIDDEN_DIM_EXAMPLE": 768,
|
|
"TRANSFORMER_SEQ_LEN_EXAMPLE": 512,
|
|
"T_days_str": "11",
|
|
"T_minutes_str": "16026",
|
|
"T_seconds_str": "961538",
|
|
"VIDEO_1080P_HEIGHT": 1080,
|
|
"VIDEO_1080P_WIDTH": 1920,
|
|
"a100_bw_raw_value": 2.039,
|
|
"a100_bw_tb": "2.0",
|
|
"a100_fp16": "312",
|
|
"a100_fp16_raw_value": 312.0,
|
|
"amdahl_8_str": "5.9",
|
|
"amdahl_8_value": 5.925925925925926,
|
|
"amdahl_inf_str": "20",
|
|
"amdahl_inf_value": 20.0,
|
|
"bw_dram": 50,
|
|
"bw_gbps_str": "10",
|
|
"bw_gbps_value": 10000000000.0,
|
|
"bw_hbm_h100": "3.4",
|
|
"bw_nvme": "3.5",
|
|
"bw_pcie5": 64,
|
|
"d_tokens_str": "20B",
|
|
"d_tokens_value": 20000000000.0,
|
|
"data_kb_str": "1",
|
|
"data_kb_value": 1000.0,
|
|
"dc_mobile_ratio": 28,
|
|
"dram_pj": 640,
|
|
"dram_pj_value": 640,
|
|
"dram_vs_compute": 581,
|
|
"energy_ratio_str": "581",
|
|
"energy_ratio_value": 581,
|
|
"flop_pj": "1",
|
|
"flop_pj_value": 1.1,
|
|
"flops_a100_fp16": 312,
|
|
"flops_h100_fp16": 989,
|
|
"flops_h100_fp8": 1979,
|
|
"flops_mobile_int8": 35,
|
|
"fp32_vs_fp16": 3.4,
|
|
"fp32_vs_int8": 18,
|
|
"gemm_intensity": 1365,
|
|
"gemm_intensity_value": 1365,
|
|
"gpu_bw_vs_pcie": 52,
|
|
"gpu_mem_gb_str": "24",
|
|
"gpu_mem_gb_value": 24,
|
|
"gustafson_1000_str": "950",
|
|
"gustafson_1000_value": 950.05,
|
|
"gustafson_8_serial": "0.35",
|
|
"gustafson_8_serial_value": 0.35000000000000003,
|
|
"gustafson_8_str": "7.65",
|
|
"gustafson_8_value": 7.65,
|
|
"h100_bw": "3.35",
|
|
"h100_bw_value": 3.35,
|
|
"h100_cap": 80,
|
|
"h100_cap_value": 80,
|
|
"h100_flops": 989,
|
|
"h100_flops_value": 989,
|
|
"h100_l2_mb": 50,
|
|
"h100_l2_mb_value": 50,
|
|
"h100_nvlink": 900,
|
|
"h100_nvlink_value": 900,
|
|
"hbm_ns": 300,
|
|
"hbm_ns_value": 300,
|
|
"hbm_vs_l1": 300,
|
|
"ib_ns": 5000,
|
|
"ib_ns_value": 5000,
|
|
"l1_ns": 1,
|
|
"l1_ns_value": 1,
|
|
"l1_vs_reg": 50,
|
|
"l2_ns": 4,
|
|
"l2_ns_value": 4,
|
|
"l_concurrent_value": 50.0,
|
|
"lambda_qps_raw_str": "1000",
|
|
"lambda_qps_str": "1,000",
|
|
"lambda_qps_value": 1000,
|
|
"large_data_bits_value": 8000000000.0,
|
|
"large_data_gb_str": "1",
|
|
"large_data_gb_value": 1,
|
|
"large_tx_time_s_value": 0.8,
|
|
"lat_hbm_ns": 300,
|
|
"lat_ib_ns": 5000,
|
|
"lat_l1_ns": 1,
|
|
"lat_l2_ns": 4,
|
|
"lat_pcie_ns": 1000,
|
|
"lat_ssd_ns": 100000,
|
|
"max_concurrent_str": "24",
|
|
"max_concurrent_value": 24,
|
|
"max_throughput_value": 480.0,
|
|
"mem_per_req_gb_str": "1",
|
|
"mem_per_req_gb_value": 1,
|
|
"n_1000_minus_1_str": "999",
|
|
"n_1000_str": "1000",
|
|
"n_1000_value": 1000,
|
|
"n_8_g_minus_1_str": "7",
|
|
"n_8_g_str": "8",
|
|
"n_8_g_value": 8,
|
|
"n_8_str": "8",
|
|
"n_8_value": 8,
|
|
"n_gemm": 4096,
|
|
"n_gemm_value": 4096,
|
|
"n_gpus_str": "1",
|
|
"n_gpus_value": 1,
|
|
"network_vs_local": 16,
|
|
"nvlink_ns": 500,
|
|
"nvlink_ns_value": 500,
|
|
"p_params_str": "1B",
|
|
"p_params_value": 1000000000.0,
|
|
"p_pct_str": "95",
|
|
"p_str": "0.95",
|
|
"p_value": 0.95,
|
|
"pcie_ns": 1000,
|
|
"pcie_ns_value": 1000,
|
|
"ping_ms_str": "10",
|
|
"ping_ms_value": 10,
|
|
"relu_achieved_tflops_value": 0.50975,
|
|
"relu_intensity": 0.25,
|
|
"relu_intensity_value": 0.25,
|
|
"relu_util_str": "0.16",
|
|
"relu_utilization_value": 0.1633814102564103,
|
|
"ridge_a100": 153,
|
|
"ridge_h100": 295,
|
|
"ridge_point": 153,
|
|
"ridge_point_value": 153,
|
|
"s_g_pct_str": "5",
|
|
"s_g_str": "0.05",
|
|
"s_g_value": 0.05,
|
|
"s_pct_str": "5",
|
|
"s_str": "0.05",
|
|
"s_value": 0.05,
|
|
"speed_of_light_km_ms": 200,
|
|
"ssd_ns": 100000,
|
|
"ssd_ns_value": 100000,
|
|
"ssd_vs_l1": 100000,
|
|
"t_days_value": 11.128917378917379,
|
|
"t_minutes_value": 16025.641025641025,
|
|
"t_seconds_value": 961538.4615384615,
|
|
"throughput_value": 124800000000000.0,
|
|
"total_flops_value": 1.2e+20,
|
|
"total_large_time_ms_value": 810.0,
|
|
"tpuv5_bw": "2.76",
|
|
"tpuv5_bw_value": 2.76,
|
|
"tpuv5_cap": 95,
|
|
"tpuv5_cap_value": 95,
|
|
"tpuv5_flops": 459,
|
|
"tpuv5_flops_value": 459,
|
|
"tpuv5_ici": 1600,
|
|
"tpuv5_ici_value": 1600,
|
|
"tpuv5_l2_mb": 100,
|
|
"tpuv5_l2_mb_value": 100,
|
|
"tx_time_s_value": 8e-07,
|
|
"tx_time_us_str": "0.8",
|
|
"tx_time_us_value": 0.7999999999999999,
|
|
"u_mfu_pct_str": "40",
|
|
"u_mfu_value": 0.4,
|
|
"w_latency_ms_str": "50",
|
|
"w_latency_s_str": "0.05",
|
|
"w_latency_s_value": 0.05,
|
|
"x_flops_value": 312000000000000.0
|
|
},
|
|
"benchmarking": {},
|
|
"conclusion": {},
|
|
"data_engineering": {},
|
|
"data_selection": {},
|
|
"nn_computation": {},
|
|
"nn_architectures": {},
|
|
"frameworks": {},
|
|
"hw_acceleration": {},
|
|
"introduction": {},
|
|
"ml_systems": {},
|
|
"model_compression": {},
|
|
"ml_ops": {},
|
|
"responsible_engr": {},
|
|
"model_serving": {},
|
|
"training": {},
|
|
"ml_workflow": {}
|
|
} |