{ "appendix_algorithm": { "ALLREDUCE_FACTOR": 2, "ANOMALY_MODEL_AUC": 0.86, "DAYS_PER_MONTH": 30, "DAYS_PER_YEAR": 365, "DLRM_EMBEDDING_DIM": 128, "DLRM_EMBEDDING_ENTRIES": 25000000000.0, "FP32_BITS": 32, "GMAIL_EMAILS_PER_DAY": 121000000000.0, "GOOGLE_SEARCHES_PER_DAY": 8500000000.0, "GPT2_HIDDEN_DIM": 1600, "GPT2_LAYERS": 48, "GPT3_TRAINING_DAYS_REF": 25, "GPT4_TRAINING_GPU_DAYS": 2500000.0, "HOURS_PER_DAY": 24, "HOURS_PER_YEAR": 8760, "INT8_BITS": 8, "KS_TEST_COEFFICIENT": 1.36, "MINUTES_PER_HOUR": 60, "MNIST_IMAGE_HEIGHT": 28, "MNIST_IMAGE_WIDTH": 28, "SECONDS_PER_MINUTE": 60, "SIMD_REGISTER_BITS": 512, "SYSTOLIC_ARRAY_DIM": 128, "TPU_POD_CHIPS": 4096, "TRANSFORMER_HEADS_EXAMPLE": 12, "TRANSFORMER_HIDDEN_DIM_EXAMPLE": 768, "TRANSFORMER_SEQ_LEN_EXAMPLE": 512, "VIDEO_1080P_HEIGHT": 1080, "VIDEO_1080P_WIDTH": 1920, "a100_ridge_str": "153", "a100_ridge_value": 153, "bytes_fp16_str": "2", "bytes_fp32_str": "4", "bytes_int32_str": "4", "bytes_per_fp16_value": 2, "bytes_per_fp32_value": 4, "bytes_per_int32_value": 4, "dense_bytes_value": 4000000000, "dense_gb_str": "4", "dense_gb_value": 4.0, "embed_dim_str": "10,000", "embed_dim_value": 10000, "n_small_str": "64", "n_small_value": 64, "nonzeros_str": "10", "nonzeros_value": 10000000, "optimizer_overhead_str": "8\u201312", "reduction_factor_str": "50", "reduction_factor_value": 50, "small_efficiency_pct_str": "13", "small_efficiency_pct_value": 13.943355119825707, "small_intensity_str": "21", "small_intensity_value": 21.333333333333332, "sparse_bytes_value": 80000000, "sparse_mb_str": "80", "sparse_mb_value": 80.0, "sparsity_pct_str": "1", "sparsity_pct_value": 1, "total_elements_str": "1", "total_elements_value": 1000000000, "vocab_size_str": "100,000", "vocab_size_value": 100000 }, "appendix_assumptions": {}, "appendix_dam": { "ALLREDUCE_FACTOR": 2, "ANOMALY_MODEL_AUC": 0.86, "DAYS_PER_MONTH": 30, "DAYS_PER_YEAR": 365, "DLRM_EMBEDDING_DIM": 128, "DLRM_EMBEDDING_ENTRIES": 25000000000.0, "FP32_BITS": 32, "GMAIL_EMAILS_PER_DAY": 121000000000.0, "GOOGLE_SEARCHES_PER_DAY": 8500000000.0, "GPT2_HIDDEN_DIM": 1600, "GPT2_LAYERS": 48, "GPT3_TRAINING_DAYS_REF": 25, "GPT4_TRAINING_GPU_DAYS": 2500000.0, "HOURS_PER_DAY": 24, "HOURS_PER_YEAR": 8760, "INT8_BITS": 8, "KS_TEST_COEFFICIENT": 1.36, "MINUTES_PER_HOUR": 60, "MNIST_IMAGE_HEIGHT": 28, "MNIST_IMAGE_WIDTH": 28, "SECONDS_PER_MINUTE": 60, "SIMD_REGISTER_BITS": 512, "SYSTOLIC_ARRAY_DIM": 128, "TPU_POD_CHIPS": 4096, "TRANSFORMER_HEADS_EXAMPLE": 12, "TRANSFORMER_HIDDEN_DIM_EXAMPLE": 768, "TRANSFORMER_SEQ_LEN_EXAMPLE": 512, "VIDEO_1080P_HEIGHT": 1080, "VIDEO_1080P_WIDTH": 1920, "ex1_disk_sat_pct": 100, "ex1_disk_sat_str": "100", "ex1_gpu_util_pct": 25, "ex1_gpu_util_str": "25", "ex2_achieved_str": "0.28", "ex2_achieved_tflops_val": 0.27999999999999997, "ex2_bytes_per_param": 2, "ex2_flops_per_param": 2, "ex2_flops_per_pass_str": "0.014", "ex2_flops_per_pass_val": 0.014, "ex2_latency_ms_str": "50", "ex2_latency_s": 0.05, "ex2_model_size_gb_str": "14", "ex2_model_size_gb_val": 14.0, "ex2_params": 7000000000.0, "ex2_params_str": "7B", "ex2_util_str": "0.03", "ex2_util_val": 0.02831142568250758, "ex3_chin_pred_pct": 15, "ex3_chin_pred_str": "15", "ex3_imp_pct": 6.666666666666672, "ex3_imp_str": "6.7", "ex3_loss_end": 0.42, "ex3_loss_start": 0.45, "ex3_params_end_str": "1B", "ex3_params_start_str": "125M", "ex3_scale_factor": 8, "ex4_cost_k": 200, "ex4_cost_str": "$200K", "ex4_gpu_new_n": 8, "ex4_gpu_new_str": "8\u00d7 H100", "ex4_gpu_new_type": "H100", "ex4_gpu_old_n": 4, "ex4_gpu_old_str": "4\u00d7 A100", "ex4_gpu_old_type": "A100", "h100_fp16_tflops_str": "989", "h100_fp16_tflops_val": 989 }, "appendix_data": { "bw_100g_value": 100000000000.0, "bw_10g_value": 10000000000.0, "bw_1g_value": 1000000000.0, "csv_cycles": 100, "csv_cycles_str": "~100", "csv_speed_mb": 100, "csv_speed_str": "~100", "int_size_bytes": 4, "join_network_tb": 2, "join_network_tb_str": "~2", "join_table_tb": 1, "join_table_tb_str": "1", "logit_val": 100, "n_requests_value": 100, "n_unhappy": 10000.0, "n_unhappy_str": "10,000", "n_users": 1000000, "n_users_str": "1M", "p99_ratio": 0.01, "p_all_fast_str": "0.366", "p_all_fast_value": 0.3660323412732292, "p_fast_value": 0.99, "p_slow_pct_str": "63.4", "p_slow_value": 0.6339676587267709, "parquet_cycles": 10, "parquet_cycles_str": "~10", "parquet_speed_mb": 1000, "parquet_speed_str": "> 1,000", "pb_value": 1000000000000000.0, "proto_cycles": 200, "proto_cycles_str": "~200", "proto_speed_mb": 300, "proto_speed_str": "~300", "psi_threshold": 0.2, "row_size_kb": 1, "t_100tb_100g": "2.2 Hours", "t_100tb_100g_value": 8000.0, "t_100tb_10g": "22.2 Hours", "t_100tb_10g_value": 80000.0, "t_100tb_1g": "9 Days", "t_100tb_1g_value": 800000.0, "t_1pb_100g": "22.2 Hours", "t_1pb_100g_value": 80000.0, "t_1pb_10g": "9 Days", "t_1pb_10g_value": 800000.0, "t_1pb_1g": "3 Months", "t_1pb_1g_value": 8000000.0, "t_1tb_100g": "1 Minutes", "t_1tb_100g_value": 80.0, "t_1tb_10g": "13 Minutes", "t_1tb_10g_value": 800.0, "t_1tb_1g": "2.2 Hours", "t_1tb_1g_value": 8000.0, "tb_value": 1000000000000.0, "waste_pct": 99.609375, "waste_pct_str": "99.6" }, "appendix_machine": { "ALLREDUCE_FACTOR": 2, "ANOMALY_MODEL_AUC": 0.86, "DAYS_PER_MONTH": 30, "DAYS_PER_YEAR": 365, "DLRM_EMBEDDING_DIM": 128, "DLRM_EMBEDDING_ENTRIES": 25000000000.0, "FP32_BITS": 32, "GMAIL_EMAILS_PER_DAY": 121000000000.0, "GOOGLE_SEARCHES_PER_DAY": 8500000000.0, "GPT2_HIDDEN_DIM": 1600, "GPT2_LAYERS": 48, "GPT3_TRAINING_DAYS_REF": 25, "GPT4_TRAINING_GPU_DAYS": 2500000.0, "HOURS_PER_DAY": 24, "HOURS_PER_YEAR": 8760, "INT8_BITS": 8, "KS_TEST_COEFFICIENT": 1.36, "L_concurrent_str": "50", "MINUTES_PER_HOUR": 60, "MNIST_IMAGE_HEIGHT": 28, "MNIST_IMAGE_WIDTH": 28, "SECONDS_PER_MINUTE": 60, "SIMD_REGISTER_BITS": 512, "SYSTOLIC_ARRAY_DIM": 128, "TPU_POD_CHIPS": 4096, "TRANSFORMER_HEADS_EXAMPLE": 12, "TRANSFORMER_HIDDEN_DIM_EXAMPLE": 768, "TRANSFORMER_SEQ_LEN_EXAMPLE": 512, "T_days_str": "11", "T_minutes_str": "16026", "T_seconds_str": "961538", "VIDEO_1080P_HEIGHT": 1080, "VIDEO_1080P_WIDTH": 1920, "a100_bw_raw_value": 2.039, "a100_bw_tb": "2.0", "a100_fp16": "312", "a100_fp16_raw_value": 312.0, "amdahl_8_str": "5.9", "amdahl_8_value": 5.925925925925926, "amdahl_inf_str": "20", "amdahl_inf_value": 20.0, "bw_dram": 50, "bw_gbps_str": "10", "bw_gbps_value": 10000000000.0, "bw_hbm_h100": "3.4", "bw_nvme": "3.5", "bw_pcie5": 64, "d_tokens_str": "20B", "d_tokens_value": 20000000000.0, "data_kb_str": "1", "data_kb_value": 1000.0, "dc_mobile_ratio": 28, "dram_pj": 640, "dram_pj_value": 640, "dram_vs_compute": 581, "energy_ratio_str": "581", "energy_ratio_value": 581, "flop_pj": "1", "flop_pj_value": 1.1, "flops_a100_fp16": 312, "flops_h100_fp16": 989, "flops_h100_fp8": 1979, "flops_mobile_int8": 35, "fp32_vs_fp16": 3.4, "fp32_vs_int8": 18, "gemm_intensity": 1365, "gemm_intensity_value": 1365, "gpu_bw_vs_pcie": 52, "gpu_mem_gb_str": "24", "gpu_mem_gb_value": 24, "gustafson_1000_str": "950", "gustafson_1000_value": 950.05, "gustafson_8_serial": "0.35", "gustafson_8_serial_value": 0.35000000000000003, "gustafson_8_str": "7.65", "gustafson_8_value": 7.65, "h100_bw": "3.35", "h100_bw_value": 3.35, "h100_cap": 80, "h100_cap_value": 80, "h100_flops": 989, "h100_flops_value": 989, "h100_l2_mb": 50, "h100_l2_mb_value": 50, "h100_nvlink": 900, "h100_nvlink_value": 900, "hbm_ns": 300, "hbm_ns_value": 300, "hbm_vs_l1": 300, "ib_ns": 5000, "ib_ns_value": 5000, "l1_ns": 1, "l1_ns_value": 1, "l1_vs_reg": 50, "l2_ns": 4, "l2_ns_value": 4, "l_concurrent_value": 50.0, "lambda_qps_raw_str": "1000", "lambda_qps_str": "1,000", "lambda_qps_value": 1000, "large_data_bits_value": 8000000000.0, "large_data_gb_str": "1", "large_data_gb_value": 1, "large_tx_time_s_value": 0.8, "lat_hbm_ns": 300, "lat_ib_ns": 5000, "lat_l1_ns": 1, "lat_l2_ns": 4, "lat_pcie_ns": 1000, "lat_ssd_ns": 100000, "max_concurrent_str": "24", "max_concurrent_value": 24, "max_throughput_value": 480.0, "mem_per_req_gb_str": "1", "mem_per_req_gb_value": 1, "n_1000_minus_1_str": "999", "n_1000_str": "1000", "n_1000_value": 1000, "n_8_g_minus_1_str": "7", "n_8_g_str": "8", "n_8_g_value": 8, "n_8_str": "8", "n_8_value": 8, "n_gemm": 4096, "n_gemm_value": 4096, "n_gpus_str": "1", "n_gpus_value": 1, "network_vs_local": 16, "nvlink_ns": 500, "nvlink_ns_value": 500, "p_params_str": "1B", "p_params_value": 1000000000.0, "p_pct_str": "95", "p_str": "0.95", "p_value": 0.95, "pcie_ns": 1000, "pcie_ns_value": 1000, "ping_ms_str": "10", "ping_ms_value": 10, "relu_achieved_tflops_value": 0.50975, "relu_intensity": 0.25, "relu_intensity_value": 0.25, "relu_util_str": "0.16", "relu_utilization_value": 0.1633814102564103, "ridge_a100": 153, "ridge_h100": 295, "ridge_point": 153, "ridge_point_value": 153, "s_g_pct_str": "5", "s_g_str": "0.05", "s_g_value": 0.05, "s_pct_str": "5", "s_str": "0.05", "s_value": 0.05, "speed_of_light_km_ms": 200, "ssd_ns": 100000, "ssd_ns_value": 100000, "ssd_vs_l1": 100000, "t_days_value": 11.128917378917379, "t_minutes_value": 16025.641025641025, "t_seconds_value": 961538.4615384615, "throughput_value": 124800000000000.0, "total_flops_value": 1.2e+20, "total_large_time_ms_value": 810.0, "tpuv5_bw": "2.76", "tpuv5_bw_value": 2.76, "tpuv5_cap": 95, "tpuv5_cap_value": 95, "tpuv5_flops": 459, "tpuv5_flops_value": 459, "tpuv5_ici": 1600, "tpuv5_ici_value": 1600, "tpuv5_l2_mb": 100, "tpuv5_l2_mb_value": 100, "tx_time_s_value": 8e-07, "tx_time_us_str": "0.8", "tx_time_us_value": 0.7999999999999999, "u_mfu_pct_str": "40", "u_mfu_value": 0.4, "w_latency_ms_str": "50", "w_latency_s_str": "0.05", "w_latency_s_value": 0.05, "x_flops_value": 312000000000000.0 }, "benchmarking": {}, "conclusion": {}, "data_engineering": {}, "data_selection": {}, "nn_computation": {}, "nn_architectures": {}, "frameworks": {}, "hw_acceleration": {}, "introduction": {}, "ml_systems": {}, "model_compression": {}, "ml_ops": {}, "responsible_engr": {}, "model_serving": {}, "training": {}, "ml_workflow": {} }