/* TARGET VEGA10 */ {HSA_VEGA10_ID, "", "gfx900", "gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 0, 0},[/TD]
/* TARGET VEGA10_HBCC */ {HSA_VEGA10_HBCC_ID, "", "gfx901", "gfx901", "gfx901", 4, 16, 1, 256, 64 * Ki, 32, 0, 0},[/TD]
/* TARGET RAVEN */ {HSA_RAVEN_ID, "", "gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 0, 0},[/TD]
/* TARGET VEGA12 */ {HSA_VEGA12_ID, "", "gfx904", "gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 0, 0},[/TD]
/* TARGET VEGA20 */ {HSA_VEGA20_ID, "", "gfx906", "gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 0, 0},[/TD]
/* TARGET ARIEL */ {HSA_ARIEL_ID, "", "gfx1000", "gfx1000", "gfx1000", 2, 32, 1, 256, 64 * Ki, 32, 0, 0},[/TD]
/* TARGET NAVI10 */ {HSA_NAVI10_ID, "", "gfx1010", "gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 0, 0},[/TD]
/* TARGET MI100 */ {HSA_MI100_ID, "", "gfx908", "gfx908", "gfx908", 4, 16, 1, 256, 64 * Ki, 32, 0, 0},[/TD]
/* TARGET NAVI12 */ {HSA_NAVI12_ID, "", "gfx1011", "gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 0, 0},[/TD]
/* TARGET NAVI14 */ {HSA_NAVI14_ID, "", "gfx1012", "gfx1012", "gfx1012", 2, 32, 1, 256, 64 * Ki, 32, 0, 0}
uint simdPerCU_; //!< Number of SIMDs per CU[/TD]
uint simdWidth_; //!< Number of workitems processed per SIMD[/TD]
uint simdInstructionWidth_; //!< Number of instructions processed per SIMD[/TD]
uint memChannelBankWidth_; //!< Memory channel bank width[/TD]
uint localMemSizePerCU_; //!< Local memory size per CU[/TD]
uint localMemBanks_; //!< Number of banks of local memory[/TD]
uint gfxipVersion_; //!< The core engine GFXIP version[/TD]
uint pciDeviceId_; //!< PCIe device id[/TD]
after HSA_CODENAME and three gfx### ;
the first number is number of SIMD per CU => for Vega cards there are 4 SIMD per CU => for Ariel 2 SIMD per CU
the next number is Number of work items processed per SIMD => for Vega cards it can do 16 per SIMD => for Ariel it can do 32 much like Navi cards
So a little math says that although these are different architectures the output is same albeit at a different configuration (16x4 = 2x32 = 64), so I think the difference comes from elsewhere, like fab optimizations and more suitable configuration for the target.