← Back to Architecture
GQA
ArchitectureUsed in
245 PRs
Best BPB
0.0235
Avg BPB
1.0737
Submissions
PR #31by JackYoung27
1.2663PR #178by timowhite88
1.1667PR #195by chasewebb
1.2355PR #201by machdragon
1.1551PR #252by greqone
1.1554PR #298by MrINVISO
1.2271PR #330by bopmite
1.1609PR #367by ksang123
1.1770PR #388by ElliotSlusky
1.1231PR #420by leofeasby
1.1454PR #426by aniketio-ctrl
1.2026PR #486by ndokutovich
1.1101PR #488by pkim02
1.3267PR #498by newjordan
1.1478PR #516by Asukabot0
1.1428PR #531by pragnyanramtha
1.1324PR #614by bigbag
0.6864PR #637by bryjudy
1.1477PR #669by amabito
1.4942PR #685by andrewbaggio1
1.0366PR #692by EthanYangTW
1.1186PR #693by EthanYangTW
1.1186PR #715by Asukabot0
1.0337PR #731by pentxayc
1.0400PR #761by Asukabot0
0.9581PR #769by MatoTeziTanka
0.8508PR #778by raahilshah
0.9605PR #798by travispchen
0.5466PR #805by zeytx
1.1807PR #810by Idan3011
0.9393PR #826by himanshudongre
0.2951PR #828by bigbag
0.9076PR #840by quietsmile
0.2873PR #864by aryanbhosale
0.2841PR #865by aryanbhosale
0.2841PR #870by simon-marcus
0.0935PR #871by greqone
0.8004PR #874by fielding
1.6070PR #876by Bortlesboat
0.5863PR #889by anthony-maio
0.9642PR #896by MVPandey
1.1896PR #900by Robby955
0.1156PR #904by anthony-maio
1.2734PR #905by anthony-maio
1.8587PR #907by resouer
0.0960PR #912by Bortlesboat
0.3461PR #913by RoyiRa
0.0887PR #914by mkenney2
1.1873PR #915by anthony-maio
0.9642PR #916by Bortlesboat
0.3461PR #920by CiprianFlorin-Ifrim
1.1539PR #921by TimPietrusky
0.0939PR #922by greqone
0.0972PR #923by CiprianFlorin-Ifrim
1.1090PR #924by THUQiXuan
0.0280PR #926by NandhuRajRK
0.8705PR #927by Tonyy1977
1.1696PR #932by anthony-maio
1.1580PR #940by antaloaalonso
0.9581PR #948by dentity007
0.1156PR #961by callithyia
0.0881PR #963by sunnypatneedi
0.8609PR #968by dentity007
0.1154PR #972by Idan3011
0.3922PR #975by Abhishek8108
1.1216PR #978by AnirudhRahul
1.5134PR #979by 0xadvait
1.1387PR #981by BurguerJohn
1.4893PR #982by haikosys
0.0638PR #989by alexanderaperry-arch
1.1402PR #992by TimS-ml
1.4054PR #994by singhaikshitijjain
1.4315PR #997by randy06122001-boop
1.4182PR #1006by NewyorkDev
1.1085PR #1007by dillon-blake
1.2252PR #1016by ADIITJ
1.1269PR #1027by Syed-M-Zeeshan
1.3036PR #1029by fielding
1.1520PR #1033by Naazimsnh02
0.4311PR #1037by TimPietruskyRunPod
1.1179PR #1044by greqone
1.8989PR #1046by Jayteare
1.2174PR #1055by sanyalsunny111
0.9693PR #1062by yaowubarbara
1.4508PR #1065by rithunkp
1.1536PR #1066by adityakm24
1.1259PR #1068by LappyG
1.1510PR #1071by AbhayAnandUCSD
1.1455PR #1072by vimeto
1.1170PR #1074by ldh-at
1.3288PR #1080by ciach
1.1228PR #1085by adityasasidhar
1.2831PR #1086by Omrigotlieb
1.1349PR #1095by vimeto
0.0905PR #1097by danielxmed
1.3355PR #1098by adityakm24
1.1187PR #1101by amrayach
1.1290PR #1107by mradassaad
1.5633PR #1108by DbBested
1.1502PR #1110by gowtham0992
1.2249PR #1112by dillon-blake
1.2252PR #1113by gowtham0992
1.3705PR #1114by minh-stakc
0.0235PR #1117by adityakm24
1.1187PR #1118by adityakm24
1.1187PR #1125by jainpranjal97
1.1946PR #1126by AnirudhRahul
1.1091PR #1129by EthanYangTW
1.1174PR #1141by ivanontech
1.1801PR #1144by inFaaa
1.3572PR #1150by sahiee-dev
1.1151PR #1169by Bortlesboat
1.1126PR #1170by Christopher-Lee-McClendon
1.1199PR #1174by Okropniak
1.3069PR #1176by bigbag
1.0962PR #1180by estesryan
1.0577PR #1182by adityakm24
1.1227PR #1209by andrewbaggio1
1.1064PR #1226by Wolfie8935
1.1428PR #1227by himanshudongre
1.4841PR #1232by Christopher-Lee-McClendon
1.0929PR #1235by maksblu
1.3527PR #1239by tmancino
1.5918PR #1240by andrewbaggio1
1.1064PR #1245by mkenney2
1.1470PR #1246by deborahnelson8788726
0.9650PR #1247by fahmitech
1.2208PR #1248by ibarrajo
1.1264PR #1249by ibarrajo
1.1240PR #1253by Okropniak
1.2326PR #1254by Elarwei001
1.1070PR #1263by xexyz
0.9354PR #1268by samquiring
1.1875PR #1279by dexhunter
1.0924PR #1284by tyrel-beede
1.1207PR #1287by dentity007
1.1048PR #1291by dentity007
1.0925PR #1293by 5en5e1
1.2409PR #1300by Ribin545
1.8184PR #1302by vlivashkin
1.1078PR #1303by anthony-maio
0.9462PR #1305by DariusFeher
1.2070PR #1307by amrayach
1.1101PR #1310by cadenmcmann
1.1177PR #1312by adi-suresh01
1.3299PR #1313by anthony-maio
0.8637PR #1318by renqianluo
1.0095PR #1319by canivel
0.6951PR #1321by anthony-maio
0.7406PR #1322by newjordan
1.0854PR #1324by yahya010
0.8275PR #1330by luciobaiocchi
1.4617PR #1335by WeijieChen2017
1.1948PR #1337by sergimichi
1.2079PR #1351by resouer
1.0807PR #1359by LucasErcolano
0.4188PR #1361by jorge-asenjo
1.1220PR #1364by stukenov
1.1025PR #1367by ieuko
1.3319PR #1368by JKSNS
0.8503PR #1371by aarjunsrinivasan
1.4709PR #1376by stukenov
0.7094PR #1379by LucasErcolano
0.4162PR #1385by korentomas
1.4465PR #1388by CiprianFlorin-Ifrim
1.5390PR #1399by AnubhavBharadwaaj
1.0898PR #1400by tmancino
1.1035PR #1401by teerthsharma
1.1100PR #1405by anthony-maio
1.0856PR #1411by Blakethefn
1.5568PR #1414by Abhishek8108
0.7093PR #1417by BruhTheMomentum
1.3039PR #1421by X-Abhishek-X
1.0925PR #1425by dentity007
1.4479PR #1428by ntwari-bruce
1.2370PR #1431by Idan3011
1.1266PR #1434by ranausmanai
1.5207PR #1435by AbhayAnandUCSD
1.0980PR #1436by DevWizard-Vandan
1.5546PR #1445by X-Abhishek-X
1.0889PR #1449by codeprakhar25
1.3680PR #1457by DilpreetBansi
1.1454PR #1458by newjordan
1.1057PR #1463by tsubasagit
1.2774PR #1471by X-Abhishek-X
1.0866PR #1473by AVINASH0052
1.1156PR #1479by andrewbaggio1
1.1450PR #1484by AlirezaAlampour
1.6656PR #1485by ndokutovich
1.0679PR #1486by AlirezaAlampour
1.6656PR #1488by ndokutovich
0.8265PR #1489by joshkmartinez
1.0736PR #1492by bigbag
1.0810PR #1494by G3sparky
1.1220PR #1499by dippatel1994
1.6323PR #1501by SPThole
1.1159PR #1502by SPThole
1.1147PR #1507by ChideraIbe123
0.2282PR #1508by jpfeiffe
1.1135PR #1515by dexhunter
1.0872PR #1517by RulinShao
1.0632PR #1539by translatingthename
1.0587PR #1543by PavelPaha
1.3286PR #1546by SPThole
1.0850PR #1548by dljr-github
1.3220PR #1555by andrewbaggio1
1.0764PR #1559by adityasasidhar
1.2498PR #1561by EthanYangTW
1.0783PR #1565by Idan3011
1.1036PR #1568by yuitokyouni
1.1639PR #1579by Tonyy1977
1.1372PR #1580by liveyourday
1.2286PR #1581by aiejvn
1.2321PR #1582by He-Wenhao
1.3428PR #1600by sayujshah
1.2781PR #1601by SPThole
1.1190PR #1606by AlirezaAlampour
1.3969PR #1607by inin-zou
1.4765PR #1616by Vickyrrrrrr
1.4100PR #1617by adityasasidhar
1.2192PR #1619by AVINASH0052
1.1156PR #1621by mrbese
1.1531PR #1623by divagr18
1.1942PR #1632by Hkoyuer
1.0274PR #1640by thestbobo
1.1412PR #1643by mradassaad
1.1473PR #1655by himanalot
1.1135PR #1658by AVINASH0052
1.0810PR #1665by mrbese
1.3571PR #1666by mrbese
1.1531PR #1667by MarioPaerle
1.0714PR #1679by ChideraIbe123
0.7625PR #1683by yunoshev
1.1280PR #1691by AVINASH0052
1.2244PR #1699by lsb
1.4831PR #1714by Anakintano
1.0857PR #1722by deborahnelson8788726
0.6580PR #1724by Unwindology
1.1803PR #1732by Victory963
1.0785PR #1747by swapp1990
1.0820PR #1749by gracebml
1.0996PR #1750by teslaeco
1.0809PR #1753by Abhishek-Dalvi410
1.2917PR #1754by upascal
1.0881PR #1757by aiejvn
1.5194Hyperparameters Across PRs
| pr_number | parameters |
|---|---|
| 31 | {"query_heads":12,"kv_heads":6} |
| 178 | {"heads":8,"kv_heads":4} |
| 195 | {"heads":8,"kv_heads":4} |
| 201 | {"heads":8,"kv_heads":4} |
| 252 | — |
| 298 | {"q_heads":8,"kv_heads":2} |
| 330 | {"query_heads":8,"kv_heads":4} |
| 367 | {"heads":12,"kv_heads":6} |
| 388 | {"heads":8,"kv_heads":4} |
| 420 | {"num_heads":16,"num_kv_heads":8} |
| 426 | {"kv_heads":4} |
| 486 | {"layers":11,"heads":8,"kv_heads":4} |
| 488 | {"num_heads":8,"num_kv_heads":4} |
| 498 | {"heads":10,"kv_heads":5,"head_dim":64} |
| 516 | {"heads":8,"kv_heads":4} |
| 531 | {"heads":8,"kv_heads":4} |
| 614 | {"query_heads":8,"kv_heads":4} |
| 637 | {"heads":8,"kv_heads":4} |
| 669 | {"heads":8,"kv_heads":4} |
| 685 | {"kv_heads":4} |
| 692 | {"heads":8,"kv_heads":4} |
| 693 | {"heads":8,"kv_heads":4} |
| 715 | {"heads":8,"kv_heads":4} |
| 731 | {"query_heads":8,"kv_heads":4} |
| 761 | {"heads":8,"kv_heads":4} |
| 769 | {"heads":8,"kv_heads":4} |
| 778 | {"heads":8,"kv_heads":4} |
| 798 | {"kv_heads":8,"query_heads":8} |
| 805 | {"query_heads":8,"kv_heads":4} |
| 810 | {"heads":8,"kv_heads":4} |
| 826 | {"query_heads":8,"kv_heads":4} |
| 828 | {"heads":8,"kv_heads":4} |
| 840 | {"heads":"8/4"} |
| 864 | {"heads":8,"kv_heads":4} |
| 865 | {"heads":8,"kv_heads":4} |
| 870 | {"layers":11,"dimensions":512,"kv_heads":4,"query_heads":8} |
| 871 | {"heads":8,"kv_heads":4} |
| 874 | {"heads":8,"kv_heads":4} |
| 876 | {"heads":8,"kv_heads":4} |
| 889 | {"heads":8,"kv_heads":4} |
| 896 | {"heads":8,"kv_heads":4} |
| 900 | {"heads":8,"kv_heads":4} |
| 904 | {"heads":4,"kv_heads":2} |
| 905 | {"heads":4,"kv_heads":2} |
| 907 | {"layers":11,"dimensions":512,"heads":"8/4"} |
| 912 | {"heads":8,"kv_heads":4} |
| 913 | {"heads":4,"kv_heads":2} |
| 914 | {"heads":8,"kv_heads":4} |
| 915 | {"heads":8,"kv_heads":4} |
| 916 | {"heads":8,"kv_heads":4} |
| 920 | {"num_heads":8,"num_kv_heads":4} |
| 921 | {"heads":8,"kv_heads":4} |
| 922 | {"query_heads":8,"kv_heads":8} |
| 923 | {"heads":8,"kv_heads":4} |
| 924 | {"query_heads":8,"kv_heads":4} |
| 926 | {"attention_heads":8,"kv_heads":4} |
| 927 | {"heads":32,"kv_heads":8} |
| 932 | {"layers":11,"d_model":512,"heads":8,"kv_heads":4} |
| 940 | {"heads":8,"kv_heads":4} |
| 948 | {"heads":8,"kv_heads":4} |
| 961 | {"query_heads":8,"kv_heads":4} |
| 963 | {"heads":8,"kv_heads":4} |
| 968 | {"kv_heads":4} |
| 972 | {"heads":8,"kv_heads":4} |
| 975 | {"query_heads":8,"kv_heads":4} |
| 978 | {"layers":10,"dimensions":512,"heads":8,"kv_heads":4} |
| 979 | {"heads":8,"kv_heads":4} |
| 981 | {"num_heads":8,"num_kv_heads":4} |
| 982 | {"heads":4,"kv_heads":2} |
| 989 | {"heads":8,"kv_heads":4} |
| 992 | — |
| 994 | {"heads":8,"kv_heads":2} |
| 997 | {"heads":8,"kv_heads":4,"dimension":512} |
| 1006 | {"heads":8,"kv_heads":4} |
| 1007 | {"heads":8,"kv_heads":4} |
| 1016 | {"kv_heads":4} |
| 1027 | {"kv_heads":4,"heads":8} |
| 1029 | {"heads":8,"kv_heads":4} |
| 1033 | {"heads":"8/4"} |
| 1037 | {"heads":8,"kv_heads":4} |
| 1044 | {"heads":8,"kv_heads":4} |
| 1046 | {"heads":8,"kv_heads":4,"layers":11,"dim":512} |
| 1055 | {"layers":9,"dimensions":512,"heads":8,"kv_heads":4} |
| 1062 | — |
| 1065 | {"layers":10,"num_heads":8,"num_kv_heads":4,"model_dim":512,"mlp_hidden":1536} |
| 1066 | {"num_heads":8,"num_kv_heads":4} |
| 1068 | {"num_heads":8,"num_kv_heads":4} |
| 1071 | {"heads":8,"kv_heads":4} |
| 1072 | {"query_heads":8,"kv_heads":4} |
| 1074 | {"heads":8,"kv_heads":4} |
| 1080 | {"num_heads":8,"num_kv_heads":4} |
| 1085 | {"query_heads":8,"kv_heads":4} |
| 1086 | {"query_heads":8,"kv_heads":4,"head_dim":64} |
| 1095 | {"heads":8,"kv_heads":4} |
| 1097 | {"heads":8,"kv_heads":4} |
| 1098 | {"num_heads":8,"num_kv_heads":4} |
| 1101 | {"heads":8,"kv_heads":4} |
| 1107 | {"heads":8,"kv_heads":4} |
| 1108 | {"heads":8,"kv_heads":4} |
| 1110 | {"heads":8,"kv_heads":4} |
| 1112 | {"heads":8,"kv_heads":4} |
| 1113 | {"heads":8,"kv_heads":4} |
| 1114 | {"layers":11,"dimensions":512,"kv_ratio":"8/4"} |
| 1117 | {"num_heads":8,"num_kv_heads":4} |
| 1118 | {"heads":8,"kv_heads":4} |
| 1125 | {"heads":8,"kv_heads":4} |
| 1126 | {"query_heads":8,"kv_heads":4} |
| 1129 | {"layers":11,"kv_heads":4,"query_heads":8} |
| 1141 | {"heads":8,"kv_heads":4} |
| 1144 | {"heads":8,"kv_heads":4} |
| 1150 | {"heads":8,"kv_heads":4} |
| 1169 | {"heads":8,"kv_heads":4} |
| 1170 | {"heads":8,"kv_heads":4} |
| 1174 | {"num_heads":8,"num_kv_heads":4} |
| 1176 | {"heads":8,"kv_heads":4} |
| 1180 | {"heads":8,"kv_heads":4} |
| 1182 | {"heads":8,"kv_heads":4} |
| 1209 | {"kv_heads":4,"query_groups":8} |
| 1226 | {"heads":8,"kv_heads":4} |
| 1227 | {"heads":6} |
| 1232 | {"query_heads":8,"kv_heads":4} |
| 1235 | {"num_heads":8,"num_kv_heads":2} |
| 1239 | {"heads":12,"kv_grouping":"6:1","head_dim":64} |
| 1240 | {"kv_heads":4} |
| 1245 | {"heads":8,"kv_heads":4} |
| 1246 | {"heads":8,"kv_heads":4} |
| 1247 | {"kv_heads":4} |
| 1248 | {"kv_heads":4} |
| 1249 | — |
| 1253 | {"num_heads":8,"num_kv_heads":4} |
| 1254 | {"attention_heads":8,"kv_heads":4,"d_model":416} |
| 1263 | {"heads":8,"kv_heads":4} |
| 1268 | {"layers":2} |
| 1279 | {"heads":8,"kv_heads":4} |
| 1284 | {"heads":8,"kv_heads":4} |
| 1287 | {"heads":8,"kv_heads":4} |
| 1291 | {"layers":11,"d_model":512,"q_heads":8,"kv_heads":4} |
| 1293 | {"heads":8,"kv_heads":4} |
| 1300 | {"num_heads":8,"num_kv_heads":4} |
| 1302 | {"heads":8,"kv_heads":4} |
| 1303 | {"heads":8,"kv_heads":4} |
| 1305 | {"kv_heads":4} |
| 1307 | {"attention_heads":8,"kv_heads":4} |
| 1310 | {"heads":8,"kv_heads":4} |
| 1312 | {"query_heads":8,"kv_heads":4} |
| 1313 | {"heads":8,"kv_heads":4} |
| 1318 | {"heads":"8/4"} |
| 1319 | {"heads":8,"kv_heads":4} |
| 1321 | {"heads":8,"kv_heads":4} |
| 1322 | {"layers":11,"dim":512,"heads":8,"kv_heads":4} |
| 1324 | {"heads":8,"kv_heads":4} |
| 1330 | {"query_heads":8,"kv_heads":4} |
| 1335 | {"heads":8,"kv_heads":4} |
| 1337 | {"heads":8,"kv_heads":4} |
| 1351 | — |
| 1359 | {"heads":8,"kv_heads":4} |
| 1361 | {"query_heads":8,"kv_heads":4} |
| 1364 | {"layers":11,"heads":8,"kv_heads":4} |
| 1367 | {"kv_heads":4} |
| 1368 | {"query_heads":8,"kv_heads":4} |
| 1371 | {"q_heads":8,"kv_heads":4} |
| 1376 | {"heads":8,"kv_heads":4} |
| 1379 | {"heads":8,"kv_heads":4} |
| 1385 | {"heads":14,"kv_heads":2} |
| 1388 | {"heads":8,"kv_heads":4} |
| 1399 | {"heads":8,"kv_heads":4} |
| 1400 | {"attention_heads":8,"kv_heads":4} |
| 1401 | {"layers":11,"dimensions":512,"heads":8,"kv_heads":4} |
| 1405 | {"heads":8,"kv_heads":4} |
| 1411 | {"heads":8,"kv_heads":4} |
| 1414 | {"num_heads":8,"num_kv_heads":4} |
| 1417 | {"heads":8,"kv_heads":4} |
| 1421 | {"heads":8,"kv_heads":4} |
| 1425 | {"heads":8,"kv_heads":4} |
| 1428 | {"num_heads":8,"num_kv_heads":4} |
| 1431 | {"heads":8,"kv_heads":4} |
| 1434 | {"num_kv_heads":2} |
| 1435 | {"heads":8,"kv_heads":4} |
| 1436 | {"num_heads":8,"num_kv_heads":4} |
| 1445 | {"heads":8,"kv_heads":4} |
| 1449 | — |
| 1457 | {"heads":8,"kv_heads":4} |
| 1458 | {"query_heads":8,"kv_heads":4} |
| 1463 | {"heads":8,"kv_heads":4} |
| 1471 | {"heads":8,"kv_heads":4} |
| 1473 | {"heads":8,"kv_heads":4} |
| 1479 | {"q_heads":8,"kv_heads":4} |
| 1484 | {"layers":9,"dim":512,"heads":8,"kv_heads":4} |
| 1485 | {"heads":8,"kv_heads":4} |
| 1486 | {"layers":9,"heads":8,"kv_heads":4,"dim":512} |
| 1488 | {"heads":"8/4"} |
| 1489 | {"heads":8,"kv_heads":4} |
| 1492 | {"heads":8,"kv_heads":4} |
| 1494 | {"layers":11,"heads":8,"kv_heads":4} |
| 1499 | {"query_heads":8,"kv_heads":4} |
| 1501 | {"q_heads":8,"kv_heads":4} |
| 1502 | {"q_heads":8,"kv_heads":4} |
| 1507 | {"heads":8,"kv_heads":4} |
| 1508 | {"heads":8,"kv_heads":4} |
| 1515 | {"heads":8,"kv_heads":4} |
| 1517 | {"heads":8,"kv_heads":4} |
| 1539 | {"heads":8,"kv_heads":4} |
| 1543 | {"heads":8,"kv_heads":4} |
| 1546 | {"heads":8,"kv_heads":4} |
| 1548 | {"heads":8,"kv_heads":4} |
| 1555 | {"heads":8,"kv_heads":4} |
| 1559 | {"num_heads":8,"num_kv_heads":4} |
| 1561 | {"q_heads":8,"kv_heads":4,"head_dim":64} |
| 1565 | {"query_heads":8,"kv_heads":4} |
| 1568 | {"heads":8,"kv_heads":4} |
| 1579 | {"heads":16,"kv_heads":8} |
| 1580 | {"layers":9,"width":512,"heads":8,"kv_heads":4} |
| 1581 | {"query_heads":8,"kv_heads":4} |
| 1582 | {"heads":8,"kv_groups":4} |
| 1600 | {"num_heads":8,"num_kv_heads":4} |
| 1601 | {"q_heads":8,"kv_heads":4} |
| 1606 | {"heads":8,"kv_heads":4} |
| 1607 | {"heads":8,"kv_heads":4} |
| 1616 | {"qk_gain":5.5} |
| 1617 | {"query_heads":8,"kv_heads":4} |
| 1619 | {"query_heads":8,"kv_heads":4} |
| 1621 | {"heads":8,"kv_heads":4} |
| 1623 | {"query_heads":8,"kv_heads":4} |
| 1632 | {"heads":8,"kv_heads":4} |
| 1640 | {"num_heads":8,"kv_heads":4} |
| 1643 | {"heads":8,"kv_heads":4} |
| 1655 | {"layers":14,"dimensions":512,"query_heads":8,"kv_heads":4} |
| 1658 | {"query_heads":8,"kv_heads":4} |
| 1665 | {"num_heads":8,"num_kv_heads":4} |
| 1666 | {"heads":8,"kv_heads":4} |
| 1667 | {"heads":8,"kv_heads":4} |
| 1679 | {"kv_heads":4} |
| 1683 | {"layers":13,"d_model":512,"heads":8,"kv":4} |
| 1691 | {"heads":8,"kv_heads":4} |
| 1699 | {"layers":8,"dim":576,"heads":8,"kv_heads":4} |
| 1714 | {"heads":8,"kv_heads":4} |
| 1722 | {"heads":8,"kv_heads":4} |
| 1724 | {"heads":8,"kv_heads":4} |
| 1732 | {"num_heads":8,"num_kv_heads":4} |
| 1747 | {"heads":8,"kv_heads":4} |
| 1749 | {"heads":8,"kv_heads":4} |
| 1750 | {"qk_gain":5.25} |
| 1753 | {"num_heads":8,"num_kv_heads":4} |
| 1754 | {"heads":8,"kv_heads":4} |
| 1757 | {"kv_heads":4} |