← Back to Architecture
KV head count
ArchitectureUsed in
245 PRs
Best BPB
0.0180
Avg BPB
1.1776
Submissions
PR #30by JackYoung27
1.2663PR #34by ChenLiu-1996
1.2244PR #39by nanlliuRECORD
1.2139PR #41by kiankyars
1.2296PR #46by vavo
1.2697PR #48by MajdiZamim
1.2381PR #49by spokane-wayRECORD
1.2058PR #50by mattqlf
1.1925PR #53by kshitizz36
1.1888PR #56by cschubiner
1.8440PR #65by aquariouseworkmanRECORD
1.1556PR #69by TevBenji
1.1708PR #70by jfprincz
1.1659PR #71by AntDX316
1.3509PR #74by takhir-iota
1.1884PR #75by takhir-iota
1.1768PR #76by unixmadtoonslab
1.1433PR #79by Marvbuster
1.8698PR #81by polarizedfortnite-cpu
1.1670PR #86by aruniyerRECORD
1.1502PR #94by aamodbhatt
1.3486PR #111by aamodbhatt
1.2540PR #114by saml212
1.1574PR #125by akshai0296
1.3797PR #126by Athenox14
1.7510PR #136by ibarrajo
1.2101PR #139by ksang123
1.2029PR #143by Julz19
1.1779PR #144by DJLougen
1.3517PR #146by swapp1990
1.2987PR #147by ankitmaloo
1.1631PR #148by iverbovoy
1.2196PR #150by yahya010
1.1478PR #156by dexhunter
1.1602PR #157by santosh5541
1.1957PR #159by santosh5541
1.1957PR #160by ChaseWNorton
1.1623PR #161by santosh5541
1.1957PR #163by Focus2321
1.2091PR #168by spokane-way
1.0217PR #169by beee003
1.1973PR #173by tamoghnokandar
1.1532PR #180by thwu1RECORD
1.1428PR #185by dttdrv
1.3043PR #190by newjordan
1.1725PR #191by chris-buckley
1.1598PR #192by baudrillardsgh0st
1.1502PR #193by KHUCHAN
1.2917PR #194by baudrillardsgh0st
1.1480PR #196by sicauzxl
1.3825PR #204by Akasxh
1.2320PR #206by dexhunter
1.1507PR #211by dubthecat
1.1719PR #212by mrdavtan
1.1329PR #215by JayCheng113
1.1548PR #217by kshitizz36
1.1753PR #219by alertcat
1.1541PR #223by 0xjaishy
1.1326PR #231by lenguyen1807
1.2036PR #236by saml212
1.1400PR #237by takoyakisoft
1.8389PR #238by kellyvv
1.5164PR #240by riatzukiza
1.6660PR #247by riatzukiza
1.6114PR #248by riatzukiza
1.6231PR #249by kvmukilan
1.1704PR #251by kshitizz36
1.1596PR #256by IvGolovach
1.1779PR #258by riatzukiza
1.6572PR #264by stukenov
1.1455PR #267by andrewgcodes
1.1374PR #271by xexyz
1.3003PR #273by dentity007
1.1575PR #276by riatzukiza
1.6577PR #278by nicolasdickenmann
1.0365PR #281by charmquark1984
1.1381PR #294by sseanliu
1.1645PR #295by gowtham0992
1.1477PR #296by sseanliu
1.1645PR #304by Bortlesboat
1.4245PR #305by Naazimsnh02
1.1672PR #306by xuafeng
1.1448PR #309by NewyorkDev
1.1914PR #312by chanwoo-park-official
1.1668PR #317by chris-buckley
1.1442PR #319by Arth-Singh
1.2716PR #325by Aum08Desai
1.1462PR #327by Ananddna
1.1450PR #331by Rhodrium
1.1487PR #334by nathon-lee
1.2207PR #339by sheeki03
1.1364PR #343by joeynyc
1.2459PR #349by Mapika
1.1399PR #351by sp00mm
1.1659PR #352by sp00mm
1.1659PR #355by josusanmartin
1.1929PR #362by mkenney2
1.1497PR #366by shivnarainms22
1.1574PR #369by signalrush
1.1328PR #372by HyperPotatoNeo
1.1361PR #373by JoeProAI
1.1634PR #374by unnirRECORD
1.1246PR #376by anthony-maio
1.1399PR #379by dannywillowliu-uchi
1.1257PR #383by joelnishanth
1.1320PR #385by dentity007
1.1488PR #386by Sambhav242005
1.4061PR #390by newjordan
1.1295PR #391by NishantDahal
1.2374PR #393by CrimsonSithria
1.2417PR #395by NishantDahal
1.2374PR #398by felipe-parodi
1.1213PR #400by chanwoo-park-official
1.1296PR #401by newjordan
1.1243PR #406by dentity007
1.1287PR #410by EthanYangTW
1.1216PR #415by EthanYangTW
1.1216PR #417by EthanYangTW
1.1227PR #418by yashverms
1.1715PR #422by albertorkive
1.1396PR #433by Robby955
1.3441PR #434by parinzee
1.1370PR #436by CrimsonSithria
1.2392PR #442by sjp611
1.1027PR #443by CREVIOS
1.1431PR #444by AymanMahfuz27
1.4536PR #446by sofiabod
1.1933PR #447by CREVIOS
1.1431PR #448by handemanai
1.2006PR #450by zachgoldfine44
1.1466PR #451by harborglowvintage-oss
1.1464PR #455by kasimte
1.1299PR #456by Christopher-Lee-McClendon
1.1532PR #462by JoeProAI
1.0672PR #465by LoquiAuris
1.1508PR #466by simonbissonnette
1.1354PR #467by ADIITJ
1.1428PR #469by cmcdnd
1.1418PR #470by leofeasby
1.1454PR #474by joshuaswarren
1.1690PR #477by harsha-gouru
1.1522PR #483by tmustier
1.1346PR #485by harsha-gouru
1.1522PR #493by parinzee
1.1309PR #503by EthanYangTW
1.1195PR #508by newjordan
1.1215PR #512by MatoTeziTanka
0.9512PR #525by hypery11
1.1160PR #528by EthanYangTW
1.1195PR #529by EthanYangTW
1.1195PR #532by NotADevIAmaMeatPopsicle
1.0487PR #545by EthanYangTW
1.1179PR #548by LoquiAuris
1.0865PR #549by abaybektursunRECORD
1.1194PR #563by instax-dutta
1.1428PR #568by MatoTeziTanka
0.7853PR #580by micoverde
1.2623PR #585by EthanYangTW
1.1179PR #588by andyluo22
1.4120PR #595by LoquiAuris
1.1100PR #596by AriaAnima
0.6430PR #622by Upsalla
1.0941PR #634by raahilshah
1.1171PR #649by pall23-mech
1.2073PR #661by andrewbaggio1
1.1175PR #664by tsbiosky
1.2982PR #665by harborglowvintage-oss
1.1464PR #666by chrislovescoding
1.1932PR #678by SPThole
1.3525PR #684by DeepReinforce
1.0574PR #691by xexyz
1.0988PR #694by Bortlesboat
1.1507PR #700by RoyiRa
1.0541PR #709by StolbaJ
1.1478PR #710by Dhruba531
1.1240PR #716by SHN2004
1.4239PR #728by abaybektursun
1.1142PR #734by Robby955
1.1198PR #738by gowtham0992
1.0970PR #741by andrewbaggio1
0.9850PR #746by C0neF
1.3556PR #749by FyeJordy
1.3684PR #755by dcrow85
1.0321PR #757by fielding
1.1124PR #760by erikqu
1.2185PR #773by siddhantparadox
1.1532PR #777by Robby955
0.9623PR #793by pall23-mech
1.2500PR #796by Robby955
0.6567PR #799by yuvraajbains
1.2005PR #802by Bortlesboat
0.9123PR #807by connectwithprakash
1.0116PR #809by AayushBaniya2006
0.2952PR #811by quietsmile
0.4377PR #820by mtybadger
1.6252PR #835by iverbovoy
1.1980PR #838by aryanbhosale
1.1215PR #841by someone114514
1.1157PR #854by ivanontech
1.4530PR #856by iverbovoy
1.1454PR #858by nickferrantelive
1.2135PR #920by CiprianFlorin-Ifrim
1.1539PR #941by aptsalt
1.3620PR #945by TimPietrusky
0.0274PR #953by dexhunter
1.0722PR #967by dexhunter
1.0450PR #995by dexhunter
1.0362PR #1025by Zagot-byte
1.3579PR #1036by ivanontech
1.1974PR #1056by sofiabod
0.0180PR #1059by edidisheng
1.1996PR #1104by DariusFeher
1.3595PR #1127by dentity007
1.1311PR #1139by ivanontech
1.1801PR #1140by newjordan
1.1874PR #1142by ymrohit
1.1493PR #1198by ymrohit
1.5992PR #1234by ibarrajo
1.1461PR #1240by andrewbaggio1
1.1064PR #1245by mkenney2
1.1470PR #1250by ibarrajo
1.2094PR #1258by jorge-asenjo
1.3874PR #1264by andrewmouldon
1.2225PR #1273by DushyantChetiwal
1.2196PR #1311by htrung1105
1.1303PR #1352by alientony
1.2450PR #1387by Muhammad-Ahmed-Rayyan
1.2919PR #1396by erichroepke
1.1067PR #1440by Mertyandimata
1.1026PR #1460by resouer
1.0827PR #1490by wisebreadloaf
1.6110PR #1505by Rohan-Abhilash
1.1791PR #1514by dexhunter
1.0798PR #1527by alphastar1111
1.2026PR #1556by sidhanth97
1.4352PR #1570by yufang67
1.0970PR #1602by SPThole
1.0744PR #1617by adityasasidhar
1.2192PR #1628by yu314-coder
1.1921PR #1646by sergeevii123
1.0909PR #1661by anderamondarainh-stack
1.1444PR #1691by AVINASH0052
1.2244PR #1723by SlavH
0.5116PR #1744by MuhammedErinArchitecture
1.0889PR #1748by elad-simbalista
1.2098Hyperparameters Across PRs
| pr_number | parameters |
|---|---|
| 30 | {"heads":12,"kv_heads":6} |
| 34 | {"num_heads":8,"num_kv_heads":4} |
| 39 | {"kv_heads":4} |
| 41 | {"num_heads":8,"num_kv_heads":4} |
| 46 | {"layers":9,"dim":432,"heads":8,"kv_heads":2,"mlp_mult":2} |
| 48 | {"num_heads":8,"num_kv_heads":4,"layers":9,"model_dim":512} |
| 49 | {"num_heads":8,"num_kv_heads":4} |
| 50 | {"num_heads":8,"num_kv_heads":4} |
| 53 | {"num_heads":8,"num_kv_heads":4} |
| 56 | {"layers":14,"model_dim":416,"num_heads":8,"num_kv_heads":2,"mlp_mult":2} |
| 65 | {"heads":8,"kv_heads":4} |
| 69 | {"heads":8,"kv_heads":4} |
| 70 | {"num_heads":8,"num_kv_heads":4} |
| 71 | {"num_heads":8,"num_kv_heads":4} |
| 74 | {"num_heads":8,"num_kv_heads":4} |
| 75 | {"num_heads":8,"num_kv_heads":4} |
| 76 | {"num_heads":8,"num_kv_heads":4} |
| 79 | {"heads":24,"kv_heads":12} |
| 81 | {"heads":8,"kv_heads":4} |
| 86 | {"attention_heads":8,"kv_heads":4} |
| 94 | {"layers":9,"model_dim":512,"num_heads":8,"num_kv_heads":4,"mlp_mult":2} |
| 111 | {"layers":9,"model_dim":512,"num_heads":8,"num_kv_heads":4,"mlp_mult":2,"vocab_size":1024} |
| 114 | {"layers":9,"dim":512,"heads":8,"kv_heads":4} |
| 125 | {"num_heads":8,"num_kv_heads":4} |
| 126 | {"heads":8,"kv_heads":4} |
| 136 | {"heads":8,"kv_heads":4} |
| 139 | {"heads":12,"kv_heads":6} |
| 143 | {"num_heads":8,"num_kv_heads":4} |
| 144 | {"heads":8,"kv_heads":4} |
| 146 | {"num_heads":8,"num_kv_heads":4} |
| 147 | {"num_heads":8,"num_kv_heads":4} |
| 148 | {"heads":8,"kv_heads":4} |
| 150 | {"heads":8,"kv_heads":4} |
| 156 | {"layers":9,"model_dim":512,"attention_heads":8,"kv_heads":4} |
| 157 | {"layers":9,"model_dim":512,"num_heads":8,"num_kv_heads":4} |
| 159 | {"layers":9,"model_dim":512,"heads":8,"kv_heads":4} |
| 160 | {"num_heads":8,"num_kv_heads":4} |
| 161 | {"layers":9,"dimensions":512,"heads":8,"kv_heads":4} |
| 163 | {"num_heads":8,"num_kv_heads":4} |
| 168 | {"layers":7,"dim":384,"heads":6,"kv_heads":3} |
| 169 | {"heads":8,"kv_heads":4} |
| 173 | {"heads":8,"kv_heads":4,"layers":9,"dim":512} |
| 180 | {"heads":8,"kv_heads":4} |
| 185 | {"heads":12,"kv_heads":4} |
| 190 | {"num_heads":8,"num_kv_heads":4} |
| 191 | {"num_heads":8,"num_kv_heads":4} |
| 192 | {"heads":8,"kv_heads":4} |
| 193 | {"kv_heads":4} |
| 194 | {"heads":8,"kv_heads":4} |
| 196 | {"num_heads":12,"num_kv_heads":4} |
| 204 | {"layers":10,"model_dim":512,"num_heads":8,"num_kv_heads":4,"mlp_hidden":1088} |
| 206 | {"heads":8,"kv_heads":4} |
| 211 | {"layers":12,"heads":8,"kv_heads":4,"dim":512} |
| 212 | {"num_heads":8,"num_kv_heads":4} |
| 215 | {"kv_heads":4} |
| 217 | {"heads":8,"kv_heads":4} |
| 219 | {"heads":8,"kv_heads":4} |
| 223 | {"heads":8,"kv_heads":4} |
| 231 | {"num_heads":8,"num_kv_heads":4} |
| 236 | {"heads":8,"kv_heads":4} |
| 237 | {"num_heads":8,"num_kv_heads":4} |
| 238 | {"num_heads":8,"num_kv_heads":4} |
| 240 | {"num_heads":8,"num_kv_heads":2} |
| 247 | {"layers":8,"model_dim":512,"num_heads":8,"num_kv_heads":4} |
| 248 | {"num_heads":8,"num_kv_heads":4} |
| 249 | — |
| 251 | {"heads":8,"kv_heads":4} |
| 256 | {"num_heads":8,"num_kv_heads":4} |
| 258 | {"layers":7,"model_dim":512,"num_heads":8,"num_kv_heads":4} |
| 264 | {"heads":8,"kv_heads":4} |
| 267 | {"kv_heads":4,"heads":8} |
| 271 | {"num_kv_heads":2} |
| 273 | {"heads":8,"kv_heads":4} |
| 276 | {"num_heads":8,"num_kv_heads":4} |
| 278 | {"heads":8,"kv_heads":4} |
| 281 | {"heads":8,"kv_heads":4} |
| 294 | {"heads":12,"kv_heads":6} |
| 295 | {"heads":8,"kv_heads":4} |
| 296 | {"kv_heads":4,"attention_heads":8} |
| 304 | {"layers":10,"dim":512,"heads":8,"kv_heads":4} |
| 305 | {"heads":8,"kv_heads":4} |
| 306 | {"heads":8,"kv_heads":4} |
| 309 | {"heads":8,"kv_heads":4} |
| 312 | {"num_heads":8,"num_kv_heads":4} |
| 317 | {"heads":8,"kv_heads":4} |
| 319 | {"heads":8,"kv_heads":4} |
| 325 | {"num_heads":10,"num_kv_heads":5} |
| 327 | {"heads":8,"kv_heads":4} |
| 331 | {"heads":8,"kv_heads":4} |
| 334 | {"heads":8,"kv_heads":4} |
| 339 | {"heads":8,"kv_heads":4} |
| 343 | {"heads":4,"kv_heads":2} |
| 349 | {"heads":8,"kv_heads":4} |
| 351 | {"attention_heads":8,"kv_heads":4} |
| 352 | {"attention_heads":8,"kv_heads":4} |
| 355 | {"num_heads":8,"num_kv_heads":4} |
| 362 | {"num_heads":8,"num_kv_heads":4} |
| 366 | {"heads":8,"kv_heads":4} |
| 369 | {"heads":8,"kv_heads":4} |
| 372 | {"num_kv_heads":4} |
| 373 | {"layers":10,"dim":512,"heads":8,"kv_heads":4} |
| 374 | {"heads":8,"kv_heads":4} |
| 376 | {"heads":8,"kv_heads":4} |
| 379 | {"heads":8,"kv_heads":4} |
| 383 | {"heads":8,"kv_heads":4} |
| 385 | {"heads":8,"kv_heads":4} |
| 386 | {"num_heads":12,"num_kv_heads":4,"model_dim":768} |
| 390 | {"heads":8,"kv_heads":4} |
| 391 | {"kv_heads":8} |
| 393 | {"heads":8,"kv_heads":4} |
| 395 | {"heads":8,"kv_heads":8} |
| 398 | {"heads":8,"kv_heads":4} |
| 400 | {"heads":8,"kv_heads":4} |
| 401 | {"heads":8,"kv_heads":4} |
| 406 | {"heads":8,"kv_heads":4} |
| 410 | {"heads":8,"kv_heads":4} |
| 415 | {"heads":8,"kv_heads":4} |
| 417 | {"heads":8,"kv_heads":4} |
| 418 | {"heads":8,"kv_heads":4} |
| 422 | {"kv_heads":4} |
| 433 | {"q_heads":16,"kv_heads":4} |
| 434 | {"heads":8,"kv_heads":4} |
| 436 | {"heads":8,"kv_heads":4} |
| 442 | {"heads":8,"kv_heads":4} |
| 443 | {"heads":8,"kv_heads":4} |
| 444 | {"num_heads":8,"num_kv_heads":4} |
| 446 | {"attention_heads":8,"kv_heads":4} |
| 447 | {"heads":8,"kv_heads":4} |
| 448 | {"num_heads":8,"num_kv_heads":4} |
| 450 | {"heads":8,"kv_heads":4} |
| 451 | {"heads":8,"kv_heads":4} |
| 455 | {"heads":8,"kv_heads":4} |
| 456 | {"heads":8,"kv_heads":4} |
| 462 | {"heads":8,"kv_heads":8} |
| 465 | {"attention_heads":8,"kv_heads":4} |
| 466 | {"heads":8,"kv_heads":4} |
| 467 | {"heads":8,"kv_heads":4} |
| 469 | {"heads":9,"kv_heads":3} |
| 470 | {"num_heads":16,"num_kv_heads":8} |
| 474 | {"heads":8,"kv_heads":4} |
| 477 | {"heads":8,"kv_heads":4} |
| 483 | {"heads":8,"kv_heads":4} |
| 485 | {"heads":8,"kv_heads":4} |
| 493 | {"heads":8,"kv_heads":4} |
| 503 | {"heads":8,"kv_heads":4} |
| 508 | {"kv_heads":4,"heads":8} |
| 512 | {"heads":8,"kv_heads":4} |
| 525 | {"heads":8,"kv_heads":4} |
| 528 | {"heads":8,"kv_heads":4} |
| 529 | {"heads":8,"kv_heads":4} |
| 532 | {"heads":8,"kv_heads":4} |
| 545 | {"heads":8,"kv_heads":8} |
| 548 | {"heads":8,"kv_heads":4} |
| 549 | {"heads":8,"kv_heads":4} |
| 563 | {"kv_heads":4,"attention_heads":8,"model_dim":512,"layers":10} |
| 568 | {"heads":8,"kv_heads":4} |
| 580 | {"NUM_KV_HEADS":4,"NUM_HEADS":8} |
| 585 | {"heads":8,"kv_heads":8} |
| 588 | {"NUM_KV_HEADS":2} |
| 595 | {"heads":8,"kv_heads":4} |
| 596 | {"attention_heads":8,"kv_heads":4} |
| 622 | {"heads":8,"kv_heads":4} |
| 634 | {"heads":8,"kv_heads":4} |
| 649 | {"attention_heads":8,"kv_heads":4} |
| 661 | {"kv_heads":8,"heads":8,"layers":11,"dim":512} |
| 664 | {"attention_heads":8,"kv_heads":4} |
| 665 | {"heads":8,"kv_heads":4} |
| 666 | {"layers":12,"dimensions":768,"heads":12,"kv_heads":6} |
| 678 | {"num_heads":8,"num_kv_heads":4} |
| 684 | {"heads":8,"kv_heads":4} |
| 691 | {"heads":8,"kv_heads":4} |
| 694 | {"layers":10,"heads":8,"kv_heads":4,"d_model":512} |
| 700 | {"layers":11,"hidden_size":512,"heads":8,"kv_heads":8} |
| 709 | {"heads":8,"kv_heads":4,"layers":10,"dim":512} |
| 710 | {"heads":8,"kv_heads":4} |
| 716 | {"heads":8,"kv_heads":4} |
| 728 | {"gqa_heads":8,"kv_heads":4} |
| 734 | {"heads":8,"kv_heads":4} |
| 738 | {"heads":8,"kv_heads":4} |
| 741 | {"kv_heads":4} |
| 746 | {"heads":8,"kv_heads":4} |
| 749 | {"num_heads":8,"num_kv_heads":4} |
| 755 | {"heads":6,"kv_heads":2} |
| 757 | {"heads":8,"kv_heads":4} |
| 760 | {"heads":8,"kv_heads":4} |
| 773 | {"num_heads":10,"num_kv_heads":5} |
| 777 | {"heads":8,"kv_heads":4} |
| 793 | {"attention_heads":8,"kv_heads":4} |
| 796 | {"kv_heads":4,"attention_heads":8} |
| 799 | {"num_heads":8,"num_kv_heads":4} |
| 802 | {"heads":8,"kv_heads":4} |
| 807 | {"heads":8,"kv_heads":4} |
| 809 | {"query_heads":8,"kv_heads":4} |
| 811 | {"heads":8,"kv_heads":4} |
| 820 | {"num_heads":8,"num_kv_heads":4} |
| 835 | {"heads":8,"kv_heads":4} |
| 838 | {"heads":8,"kv_heads":4} |
| 841 | {"heads":8,"kv_heads":4} |
| 854 | {"heads":8,"kv_heads":4} |
| 856 | {"heads":8,"kv_heads":4} |
| 858 | {"heads":8,"kv_heads":4} |
| 920 | {"num_kv_heads":4} |
| 941 | {"heads":8,"kv_heads":4} |
| 945 | {"heads":8,"kv_heads":8} |
| 953 | {"heads":8,"kv_heads":8} |
| 967 | {"kv_heads":8} |
| 995 | {"heads":8,"kv_heads":8} |
| 1025 | {"heads":8,"kv_heads":8} |
| 1036 | {"heads":8,"kv_heads":8} |
| 1056 | {"kv_heads":8} |
| 1059 | {"kv_heads":4} |
| 1104 | {"byte260":4,"sp1024":2} |
| 1127 | {"heads":4} |
| 1139 | {"heads":5,"kv_heads":5} |
| 1140 | {"heads":8,"kv_heads":4} |
| 1142 | {"num_heads":8,"num_kv_heads":4} |
| 1198 | {"layers":9,"model_dim":512,"num_heads":8,"num_kv_heads":4} |
| 1234 | {"heads":8,"kv_heads":8} |
| 1240 | {"heads":4} |
| 1245 | {"kv_heads":4} |
| 1250 | {"kv_heads":8} |
| 1258 | {"heads":8,"kv_heads":4} |
| 1264 | {"kv_heads":4,"head_dim":64} |
| 1273 | {"kv_heads":8} |
| 1311 | {"heads":8,"kv_heads":4} |
| 1352 | {"heads":6,"kv_heads":2} |
| 1387 | {"num_heads":16,"num_kv_heads":8} |
| 1396 | {"kv_heads":4} |
| 1440 | {"attention_heads":8,"kv_heads":4} |
| 1460 | {"heads":8,"kv_heads":4} |
| 1490 | {"num_heads":4,"num_kv_heads":2} |
| 1505 | {"num_heads":8,"num_kv_heads":4} |
| 1514 | {"kv_heads":4} |
| 1527 | {"heads":4,"kv_heads":4} |
| 1556 | {"kv_heads":1} |
| 1570 | {"heads":8,"kv_heads":4} |
| 1602 | {"kv_heads":8} |
| 1617 | {"query_heads":8,"kv_heads":4} |
| 1628 | {"heads":8,"kv_heads":4} |
| 1646 | {"heads":8,"kv_heads":4} |
| 1661 | {"num_heads":8,"num_kv_heads":4} |
| 1691 | {"kv_heads":2} |
| 1723 | {"heads":8,"kv_heads":4} |
| 1744 | {"layers":11,"dimensions":512,"heads":8,"kv_heads":4} |
| 1748 | {"num_heads":8,"num_kv_heads":4} |