mcdp{"""Matrix Multiplication with bf16 type (2 bytes per element)"""implements`matmul_interfacea_matrix_size=(provideddim_m*provideddim_k)b_matrix_size=(provideddim_n*provideddim_k)c_matrix_size=(provideddim_m*provideddim_n)tile_compute=(provideddim_m)*(provideddim_n)*(provideddim_k)num_tiles=(providednum_tiles_m)*(providednum_tiles_k)*(providednum_tiles_n)a_matrix_movement=(a_matrix_size*(providednum_tiles_m)*(providednum_tiles_k))*(providednum_tiles_n)b_matrix_movement=(b_matrix_size*(providednum_tiles_k)*(providednum_tiles_n))*(providednum_tiles_m)c_matrix_movement=(c_matrix_size*(providednum_tiles_n)*(providednum_tiles_m))requiredcache_size>=(2B)*(a_matrix_size+b_matrix_size+c_matrix_size)requiredmemory_movement>=(2B)*(a_matrix_movement+b_matrix_movement+c_matrix_movement)requiredcompute>=((2ops)*tile_compute*num_tiles)}