# int8
--reset
--dir=FWD_B,FWD_D
--cfg=s8s8s32,s8s8s8,s8s8u8,u8s8s32,u8s8s8,u8s8u8
--attr-oscale=per_oc:2.25 --attr-post-ops='sum:0.5;add:f32;add:u8:per_dim_01;linear:0.5:1.5:2.0;mul:f32:per_dim_0;add:s8:per_oc;add:f32:per_dim_01;relu:0.5'
--mb=2 --batch=set_gpu
--mb=0 --batch=shapes_0d_gpu
--attr-oscale=common:2.25 --attr-post-ops='sum:0.5;add:f32;add:u8:per_dim_01;linear:-0.5:0.5:0.5;mul:f32:per_dim_0;add:s8:per_oc;add:f32:per_dim_01;square'
--batch=shapes_1d

# f32
--reset
--cfg=f32
--mb=2
--dir=FWD_B,BWD_D,BWD_WB
--batch=set_gpu

# f32 + post_ops
--reset
--cfg=f32
--dir=FWD_B,FWD_D
--attr-post-ops='sum:2.0;relu:3.0'
--mb=2 --batch=set_gpu
--mb=0 --batch=shapes_0d_gpu
--attr-post-ops='sum:2','linear:3:2','add:f32;add:u8:per_dim_01;mul:f32:per_dim_0;add:s8:per_oc;add:f32:per_dim_01;linear:0.5:1.5:2'
--batch=shapes_1d

# bf16
--reset
--cfg=bf16bf16bf16
--dir=FWD_B,BWD_D,BWD_WB
--batch=shapes_1d

# f16
--reset
--mb=0,2
--cfg=f16
--dir=FWD_B,FWD_D
--attr-post-ops='linear:1:2','add:f32;add:u8:per_dim_01;mul:f32:per_dim_0;add:s8:per_oc;add:f32:per_dim_01;linear:2:3:0.5'
--batch=shapes_1d
--attr-post-ops=

# bf16 + f32
--dir=FWD_B
--cfg=bf16bf16f32
--batch=shapes_1d

--dir=BWD_D
--cfg=f32bf16bf16
--batch=shapes_1d

--dir=BWD_WB
--cfg=bf16f32bf16
--batch=shapes_1d

# bf16 + post_ops
--mb=0
--dir=FWD_B
--cfg=bf16bf16bf16,bf16bf16f32
--attr-post-ops='sum:0.5;add:f32;add:u8:per_dim_01;linear:0.5:1.5:2.0;mul:f32:per_dim_0;add:s8:per_oc;add:f32:per_dim_01;relu:0.5',\
                'sum:0.5;tanh','clip:0.5:1.5:2'
--batch=shapes_1d

# diff mem tags
--batch=harness_ip_tag_gpu
