← Back to Architecture
XSA
ArchitectureUsed in
479 PRs
Best BPB
0.0180
Avg BPB
1.0589
Submissions
PR #64by yesbhautik
1.1250PR #175by anthony-maio
1.1229PR #186by mahsumaktas
1.1565PR #187by Idan3011
1.1629PR #218by bopmite
1.1248PR #265by unnir
1.1307PR #267by andrewgcodes
1.1374PR #287by jfprinczRECORD
1.1271PR #290by ibarrajo
1.1354PR #302by JackYoung27
1.1520PR #303by sseanliu
1.1436PR #307by dennisimoo
1.1357PR #315by jfprincz
1.1248PR #317by chris-buckley
1.1442PR #318by sseanliu
1.1284PR #325by Aum08Desai
1.1462PR #330by bopmite
1.1609PR #332by saml212
1.1320PR #333by mahsumaktas
1.1565PR #338by alertcat
1.1254PR #344by aryanbhosale
1.1330PR #349by Mapika
1.1399PR #356by sjp611
1.8338PR #359by tmustier
1.1345PR #369by signalrush
1.1328PR #371by mrdavtan
1.1401PR #372by HyperPotatoNeo
1.1361PR #374by unnirRECORD
1.1246PR #375by charmquark1984
1.1257PR #376by anthony-maio
1.1399PR #379by dannywillowliu-uchi
1.1257PR #383by joelnishanth
1.1320PR #389by trasnake87
1.1466PR #394by greqone
1.1247PR #397by translatingthename
1.1364PR #399by abaybektursun
1.1247PR #400by chanwoo-park-official
1.1296PR #406by dentity007
1.1287PR #410by EthanYangTW
1.1216PR #413by anantdgoel
1.4525PR #414by signalrush
1.1233PR #415by EthanYangTW
1.1216PR #417by EthanYangTW
1.1227PR #418by yashverms
1.1715PR #422by albertorkive
1.1396PR #429by AbhisekBasu1
1.1231PR #430by sahiee-dev
1.1428PR #434by parinzee
1.1370PR #445by newjordan
1.1236PR #450by zachgoldfine44
1.1466PR #452by ofirkris
1.1366PR #453by Divyesh-Thirukonda
1.1248PR #455by kasimte
1.1299PR #456by Christopher-Lee-McClendon
1.1532PR #457by carlesonielfa
1.1839PR #458by ofirkris
1.1365PR #461by Christopher-Lee-McClendon
1.1446PR #462by JoeProAI
1.0672PR #469by cmcdnd
1.1418PR #473by abaybektursun
1.1214PR #477by harsha-gouru
1.1522PR #478by gowtham0992
1.1268PR #482by harsha-gouru
1.1522PR #483by tmustier
1.1346PR #484by Robby955
1.1185PR #485by harsha-gouru
1.1522PR #487by anantdgoel
1.1720PR #492by Divyesh-Thirukonda
1.1591PR #493by parinzee
1.1309PR #498by newjordan
1.1478PR #499by newjordan
1.1478PR #503by EthanYangTW
1.1195PR #507by skarakulak
1.1558PR #508by newjordan
1.1215PR #516by Asukabot0
1.1428PR #518by sofiabod
1.0622PR #526by Christopher-Lee-McClendon
1.1425PR #528by EthanYangTW
1.1195PR #529by EthanYangTW
1.1195PR #532by NotADevIAmaMeatPopsicle
1.0487PR #533by newjordan
1.1207PR #534by rarce
1.1804PR #537by Christopher-Lee-McClendon
1.1387PR #543by rarce
1.1804PR #544by EthanYangTW
1.1179PR #545by EthanYangTW
1.1179PR #549by abaybektursunRECORD
1.1194PR #554by chrisnkuno
1.4612PR #562by bigbag
1.1354PR #564by sadeghja1070
1.1270PR #573by Sarimsaljook
1.0523PR #576by cmcdnd
1.1164PR #577by newjordan
1.1207PR #585by EthanYangTW
1.1179PR #586by EaCognitive
1.1365PR #587by newjordan
1.1208PR #589by RoyiRa
1.1178PR #592by Skytuhua
1.1476PR #593by abaybektursun
1.1163PR #598by Christopher-Lee-McClendon
1.1334PR #601by anantdgoel
1.1418PR #606by EthanYangTW
1.1162PR #609by saml212
1.1154PR #612by Christopher-Lee-McClendon
1.1079PR #628by Christopher-Lee-McClendon
1.0983PR #633by MatoTeziTanka
1.1526PR #634by raahilshah
1.1171PR #636by NewyorkDev
1.1234PR #638by Asukabot0
1.1164PR #639by Robby955
1.1158PR #642by minh-stakc
0.8173PR #644by Christopher-Lee-McClendon
1.0944PR #645by FlynnCruse
1.8990PR #649by pall23-mech
1.2073PR #656by newjordan
1.1190PR #659by deanbrr
1.0920PR #667by suchitj2702
1.1352PR #670by abaybektursun
1.1171PR #674by newjordan
1.0461PR #681by Alfaxad
1.4775PR #688by RoyiRa
1.0745PR #690by EthanYangTW
1.1186PR #691by xexyz
1.0988PR #692by EthanYangTW
1.1186PR #693by EthanYangTW
1.1186PR #695by 0xNoramiya
1.1360PR #698by hesong0222-dev
1.1642PR #700by RoyiRa
1.0541PR #702by lukacf
1.0244PR #703by Gusanidas
1.1176PR #706by newjordan
1.0461PR #710by Dhruba531
1.1240PR #714by Upsalla
1.1187PR #715by Asukabot0
1.0337PR #720by agalimova
1.1078PR #726by DeepReinforce
1.1147PR #727by Asukabot0
0.9674PR #728by abaybektursun
1.1142PR #733by stukenov
1.0278PR #734by Robby955
1.1198PR #738by gowtham0992
1.0970PR #740by resouer
1.0909PR #741by andrewbaggio1
0.9850PR #745by stukenov
1.0222PR #752by Naazimsnh02
1.1182PR #753by newjordan
0.9625PR #754by aryanbhosale
1.1253PR #757by fielding
1.1124PR #758by hypery11
1.0465PR #760by erikqu
1.2185PR #761by Asukabot0
0.9581PR #763by hypery11
0.9917PR #767by RichiiiTV
0.9209PR #768by mradassaad
1.1201PR #770by minh-stakc
0.6672PR #771by sunnypatneedi
1.0705PR #773by siddhantparadox
1.1532PR #774by travispchen
0.9370PR #776by agalimova
0.9258PR #777by Robby955
0.9623PR #778by raahilshah
0.9605PR #779by deanbrr
0.6683PR #784by iverbovoy
1.2065PR #785by SirSaltySalmon
1.5364PR #786by shinegami-2002
0.8128PR #788by hypery11
0.9059PR #790by danialht
1.1172PR #792by xexyz
1.0340PR #794by jeremyschied
1.3346PR #796by Robby955
0.6567PR #797by armantsaturian
0.8960PR #798by travispchen
0.5466PR #802by Bortlesboat
0.9123PR #803by pentxayc
0.4416PR #808by Naazimsnh02
0.6364PR #809by AayushBaniya2006
0.2952PR #810by Idan3011
0.9393PR #811by quietsmile
0.4377PR #813by hypery11
0.6671PR #816by jimliu741523
1.1194PR #826by himanshudongre
0.2951PR #827by Programmerryoki
1.3999PR #828by bigbag
0.9076PR #831by sseanliu
1.1284PR #832by jfprincz
1.1903PR #835by iverbovoy
1.1980PR #836by autocode-rayes
1.1219PR #838by aryanbhosale
1.1215PR #841by someone114514
1.1157PR #843by quietsmile
0.2834PR #849by dttdrv
1.1105PR #850by callithyia
0.3212PR #851by RoyiRa
0.2071PR #857by aruniyer
1.1093PR #861by JoeProAI
1.1326PR #862by grim-hitman0XX
1.3036PR #865by aryanbhosale
0.2841PR #871by greqone
0.8004PR #872by gowtham0992
1.0467PR #876by Bortlesboat
0.5863PR #880by RoyiRa
0.1003PR #887by anthony-maio
0.9642PR #889by anthony-maio
0.9642PR #890by sofiabod
0.4405PR #891by robbiebusinessacc
1.1428PR #892by robbiebusinessacc
1.1428PR #893by aryanbhosale
0.1310PR #894by albertorkive
1.1821PR #896by MVPandey
1.1896PR #900by Robby955
0.1156PR #908by albertorkive
1.1734PR #909by sunnypatneedi
0.8609PR #912by Bortlesboat
0.3461PR #915by anthony-maio
0.9642PR #916by Bortlesboat
0.3461PR #918by haikosys
0.1653PR #922by greqone
0.0972PR #924by THUQiXuan
0.0280PR #925by THUQiXuan
0.0281PR #926by NandhuRajRK
0.8705PR #927by Tonyy1977
1.1696PR #928by autocode-rayes
1.1211PR #932by anthony-maio
1.1580PR #933by haikosys
0.0804PR #940by antaloaalonso
0.9581PR #945by TimPietrusky
0.0274PR #948by dentity007
0.1156PR #952by FlashyFlash3011
1.1144PR #953by dexhunter
1.0722PR #958by shouryamaanjain
1.1382PR #961by callithyia
0.0881PR #963by sunnypatneedi
0.8609PR #964by vivekvar-dl
1.3900PR #965by Adam-Jacuch
1.1184PR #967by dexhunter
1.0450PR #968by dentity007
0.1154PR #972by Idan3011
0.3922PR #974by anthony-maio
1.6542PR #975by Abhishek8108
1.1216PR #978by AnirudhRahul
1.5134PR #988by ymrohit
1.0857PR #991by ibarrajo
1.1145PR #993by aerosta
0.9631PR #995by dexhunter
1.0362PR #996by Idan3011
1.1478PR #999by aamodbhatt
1.1179PR #1002by SoHarshh
1.1650PR #1004by ibarrajo
1.1182PR #1005by OnlyJundong
1.0853PR #1006by NewyorkDev
1.1085PR #1007by dillon-blake
1.2252PR #1008by monkeyKingProgrammer
1.1538PR #1009by SoHarshh
1.1574PR #1013by himanshudongre
1.1682PR #1015by shram86
1.2115PR #1019by abaybektursunRECORD
1.1147PR #1026by danielxmed
1.0945PR #1029by fielding
1.1520PR #1033by Naazimsnh02
0.4311PR #1037by TimPietruskyRunPod
1.1179PR #1039by yufengli-oai
1.1184PR #1043by okezue
1.1261PR #1045by Hilo-Hilo
1.1509PR #1051by tejas-goyal
1.2826PR #1056by sofiabod
0.0180PR #1057by Programmerryoki
1.2201PR #1060by dexhunter
1.1123PR #1062by yaowubarbara
1.4508PR #1066by adityakm24
1.1259PR #1069by manfromnowhere143
1.1190PR #1070by manfromnowhere143
1.1190PR #1072by vimeto
1.1170PR #1077by malc3om
1.1130PR #1080by ciach
1.1228PR #1081by michaelwinczuk
1.1220PR #1084by AnubhavBharadwaaj
1.1185PR #1085by adityasasidhar
1.2831PR #1086by Omrigotlieb
1.1349PR #1090by swapp1990
1.1573PR #1092by teddyoweh
1.1219PR #1094by michaelwinczuk
0.4027PR #1098by adityakm24
1.1187PR #1099by Bortlesboat
1.1133PR #1101by amrayach
1.1290PR #1105by abaybektursun
1.2208PR #1108by DbBested
1.1502PR #1112by dillon-blake
1.2252PR #1113by gowtham0992
1.3705PR #1114by minh-stakc
0.0235PR #1117by adityakm24
1.1187PR #1118by adityakm24
1.1187PR #1120by newjordan
1.1099PR #1122by icryo
1.1146PR #1123by sisegod
1.1986PR #1124by NewyorkDev
1.1194PR #1125by jainpranjal97
1.1946PR #1126by AnirudhRahul
1.1091PR #1127by dentity007
1.1311PR #1128by AnubhavBharadwaaj
1.1154PR #1129by EthanYangTW
1.1174PR #1130by Gusanidas
1.1140PR #1135by barneywohl
1.1116PR #1140by newjordan
1.1874PR #1142by ymrohit
1.1493PR #1145by AnirudhRahul
1.1109PR #1148by aamodbhatt
1.1179PR #1150by sahiee-dev
1.1151PR #1152by ericdatum
1.7942PR #1166by Christopher-Lee-McClendon
1.1347PR #1170by Christopher-Lee-McClendon
1.1199PR #1171by EthanYangTW
1.1145PR #1172by dexhunter
1.1015PR #1176by bigbag
1.0962PR #1182by adityakm24
1.1227PR #1183by akaiHuang
1.5080PR #1184by icryo
0.9485PR #1185by skoustav35
0.9641PR #1186by andrewbaggio1
0.9850PR #1209by andrewbaggio1
1.1064PR #1212by Gusanidas
1.1108PR #1215by turbo-indubitable
1.1601PR #1216by SoHarshh
1.1574PR #1218by clarkkevRECORD
1.0978PR #1221by amabito
1.1915PR #1224by vermissa0ss
1.1129PR #1228by meinlebenswerk
1.1527PR #1230by nestamidavaine
1.1163PR #1231by nestamidavaine
1.1163PR #1232by Christopher-Lee-McClendon
1.0929PR #1233by ibarrajo
1.1460PR #1234by ibarrajo
1.1461PR #1236by ibarrajo
1.1179PR #1237by ibarrajo
1.1198PR #1240by andrewbaggio1
1.1064PR #1244by monkeyKingProgrammer
1.1443PR #1247by fahmitech
1.2208PR #1252by ahmetdenizyilmaz
1.0713PR #1253by Okropniak
1.2326PR #1254by Elarwei001
1.1070PR #1255by akaiHuang
1.5080PR #1259by himanshudongre
1.1533PR #1260by dexhunter
1.0929PR #1263by xexyz
0.9354PR #1269by Jtss-ux
1.1194PR #1271by andrewbaggio1
1.1289PR #1272by andrewbaggio1
1.1100PR #1273by DushyantChetiwal
1.2196PR #1274by MatoTeziTanka
1.0876PR #1275by ranausmanai
1.1492PR #1276by BiggerDABOSS
1.1100PR #1278by GitGeeks
1.1147PR #1280by aamodbhatt
1.1156PR #1282by newjordan
1.1035PR #1287by dentity007
1.1048PR #1289by MatoTeziTanka
1.0819PR #1290by aryanbhosale
1.1104PR #1291by dentity007
1.0925PR #1296by aryanbhosale
1.0926PR #1297by Omrigotlieb
1.1043PR #1298by Omrigotlieb
1.1043PR #1302by vlivashkin
1.1078PR #1303by anthony-maio
0.9462PR #1306by resouer
1.0846PR #1308by newjordan
1.1364PR #1309by cadenmcmann
1.1143PR #1310by cadenmcmann
1.1177PR #1311by htrung1105
1.1303PR #1313by anthony-maio
0.8637PR #1318by renqianluo
1.0095PR #1319by canivel
0.6951PR #1321by anthony-maio
0.7406PR #1322by newjordan
1.0854PR #1323by sohv
1.1247PR #1324by yahya010
0.8275PR #1325by monisha-max
1.3868PR #1327by mrbese
1.1276PR #1328by renqianluo
0.6361PR #1329by renqianluo
0.6361PR #1332by Omrigotlieb
1.0959PR #1351by resouer
1.0807PR #1353by Rtx09x
1.1547PR #1361by jorge-asenjo
1.1220PR #1364by stukenov
1.1025PR #1366by yunoshev
1.1371PR #1368by JKSNS
0.8503PR #1376by stukenov
0.7094PR #1378by Rajat123456789
1.1711PR #1383by nirmathur
1.3151PR #1384by iverbovoy
1.1441PR #1386by Buld1n
1.1452PR #1389by Rome-1
1.7270PR #1392by Its-Just-Crump
1.1020PR #1395by dttdrv
1.0924PR #1396by erichroepke
1.1067PR #1399by AnubhavBharadwaaj
1.0898PR #1400by tmancino
1.1035PR #1401by teerthsharma
1.1100PR #1405by anthony-maio
1.0856PR #1406by aamodbhatt
1.0887PR #1407by OnlyJundong
1.0960PR #1408by aamodbhatt
1.0800PR #1410by izlley
1.1158PR #1414by Abhishek8108
0.7093PR #1416by erichroepke
1.0795PR #1422by swapp1990
1.1172PR #1424by OnlyJundong
1.0858PR #1425by dentity007
1.4479PR #1427by kjahan
1.2092PR #1431by Idan3011
1.1266PR #1435by AbhayAnandUCSD
1.0980PR #1438by sabdulmajid
1.2029PR #1440by Mertyandimata
1.1026PR #1442by akaiHuang
1.1854PR #1444by hypnoastic
1.3081PR #1446by LauraGomezjurado
1.0960PR #1447by shram86
1.1834PR #1448by shram86
1.1834PR #1450by andrewbaggio1
1.0848PR #1452by bsisduck
0.3509PR #1453by iverbovoy
1.1324PR #1454by bsisduck
0.3509PR #1456by sisegod
1.1465PR #1457by DilpreetBansi
1.1454PR #1458by newjordan
1.1057PR #1460by resouer
1.0827PR #1467by PhamPhuHoa-23
1.1056PR #1471by X-Abhishek-X
1.0866PR #1472by trhgbao
1.2066PR #1473by AVINASH0052
1.1156PR #1474by shram86
1.1434PR #1475by Jaksenc
1.1307PR #1488by ndokutovich
0.8265PR #1494by G3sparky
1.1220PR #1495by shram86
1.1077PR #1496by shram86
1.1920PR #1499by dippatel1994
1.6323PR #1501by SPThole
1.1159PR #1502by SPThole
1.1147PR #1507by ChideraIbe123
0.2282PR #1508by jpfeiffe
1.1135PR #1517by RulinShao
1.0632PR #1528by xiehuanyi
1.1104PR #1538by davie2009kh
1.1180PR #1539by translatingthename
1.0587PR #1548by dljr-github
1.3220PR #1549by dljr-github
1.3220PR #1550by translatingthename
1.0587PR #1558by Subramanyam6
1.4500PR #1559by adityasasidhar
1.2498PR #1561by EthanYangTW
1.0783PR #1563by joshkmartinez
1.0205PR #1565by Idan3011
1.1036PR #1568by yuitokyouni
1.1639PR #1573by shivangbaveja
1.1464PR #1579by Tonyy1977
1.1372PR #1600by sayujshah
1.2781PR #1601by SPThole
1.1190PR #1602by SPThole
1.0744PR #1617by adityasasidhar
1.2192PR #1619by AVINASH0052
1.1156PR #1621by mrbese
1.1531PR #1629by channyzf6
1.0829PR #1630by KevinChunye
1.1412PR #1634by arsenis-cmd
1.1335PR #1639by kunwar-vikrant
1.0832PR #1645by scottcui-georgian
1.1131PR #1646by sergeevii123
1.0909PR #1649by joyceyan
1.1271PR #1650by Jaredcastorena
1.4233PR #1658by AVINASH0052
1.0810PR #1661by anderamondarainh-stack
1.1444PR #1666by mrbese
1.1531PR #1675by jayzuccarelli
1.1451PR #1679by ChideraIbe123
0.7625PR #1694by Rtx09x
1.1136PR #1696by kings-crown
1.1224PR #1709by Bananakin1
1.1470PR #1722by deborahnelson8788726
0.6580PR #1760by BrandtChristian
1.1863Hyperparameters Across PRs
| pr_number | parameters |
|---|---|
| 64 | — |
| 175 | — |
| 186 | {"layers":4} |
| 187 | {"last_n_layers":4} |
| 218 | {"last_n_layers":4} |
| 265 | {"layers":3,"total_layers":11,"head_count":8,"kv_heads":4} |
| 267 | {"layers":3} |
| 287 | {"layers":4} |
| 290 | {"layers":3} |
| 302 | {"layers":3} |
| 303 | {"last_n_layers":4} |
| 307 | {"layers":4} |
| 315 | {"layers":4} |
| 317 | {"layers":4} |
| 318 | {"layers":4} |
| 325 | {"last_n":4} |
| 330 | {"layers":4} |
| 332 | {"layers":4} |
| 333 | {"layers":4} |
| 338 | {"layers":4} |
| 344 | {"layers":11} |
| 349 | {"layers":4,"total_layers":11} |
| 356 | {"layers":4} |
| 359 | {"last_n_layers":4} |
| 369 | {"layers":4} |
| 371 | {"layers":4} |
| 372 | {"layers":4} |
| 374 | {"layers":4} |
| 375 | — |
| 376 | {"layers":4} |
| 379 | {"last_n_layers":4} |
| 383 | {"layers":4} |
| 389 | {"layers":5} |
| 394 | {"layers":11,"xsa_last_n":4} |
| 397 | {"layers":4} |
| 399 | {"last_n":4} |
| 400 | {"layers":4} |
| 406 | {"layers":4} |
| 410 | {"layers":4} |
| 413 | — |
| 414 | {"layers":4} |
| 415 | {"layers":4} |
| 417 | {"layers":4} |
| 418 | {"layers":6} |
| 422 | {"layers":4} |
| 429 | {"last_n":4} |
| 430 | {"layers":4} |
| 434 | {"layers":4} |
| 445 | {"variant":"XSA4"} |
| 450 | {"layers":4} |
| 452 | {"layers":4} |
| 453 | {"last_layers":4} |
| 455 | {"layers":4} |
| 456 | {"layers":3} |
| 457 | {"layers":4} |
| 458 | {"layers":4} |
| 461 | {"last_n_layers":4} |
| 462 | {"layers":4} |
| 469 | {"layers":4} |
| 473 | {"layers":4} |
| 477 | {"layers":4} |
| 478 | {"layers":11} |
| 482 | {"layers":4} |
| 483 | {"last_n":4} |
| 484 | {"layers":4} |
| 485 | {"layers":4} |
| 487 | {"layers":4} |
| 492 | {"layers":4} |
| 493 | {"layers":4} |
| 498 | {"layers":2} |
| 499 | {"last_blocks":2} |
| 503 | {"layers":11} |
| 507 | {"layers":4} |
| 508 | {"layers":4} |
| 516 | {"variant":4} |
| 518 | {"layers":4} |
| 526 | {"layers":4} |
| 528 | {"layers":11} |
| 529 | {"layers":11} |
| 532 | {"layers":4} |
| 533 | {"layers":4} |
| 534 | {"layers":4} |
| 537 | {"layers":4} |
| 543 | {"layers":4} |
| 544 | {"layers":11} |
| 545 | {"layers":11} |
| 549 | {"layers":4} |
| 554 | {"layers":4} |
| 562 | {"layers":4} |
| 564 | {"layers":4} |
| 573 | {"layers":4} |
| 576 | {"layers":"all"} |
| 577 | {"layers":4} |
| 585 | {"layers":11} |
| 586 | {"layers":4} |
| 587 | {"layers":11} |
| 589 | {"layers":4} |
| 592 | {"layers":4,"layer_indices":[8,9,10,11]} |
| 593 | {"last_n_layers":4} |
| 598 | {"layers":4} |
| 601 | {"layers":4} |
| 606 | {"layers":11} |
| 609 | {"layers":11} |
| 612 | — |
| 628 | — |
| 633 | {"layers":4} |
| 634 | {"layers":11} |
| 636 | {"layers":4} |
| 638 | {"layers":11} |
| 639 | {"layers":11} |
| 642 | {"layers":4} |
| 644 | {"layers":4} |
| 645 | — |
| 649 | {"layers":4} |
| 656 | {"layers":4} |
| 659 | {"layers":4} |
| 667 | {"layers":5} |
| 670 | {"layers":11} |
| 674 | {"last_n":4} |
| 681 | {"layers":4} |
| 688 | {"layers":11,"window_size":8} |
| 690 | {"layers":11} |
| 691 | {"layers":4} |
| 692 | {"layers":11} |
| 693 | {"layers":[7,8,9,10]} |
| 695 | {"layers":6} |
| 698 | {"layers":4} |
| 700 | {"layers":11,"ws":8} |
| 702 | {"layers":11} |
| 703 | {"layers":4} |
| 706 | {"last_n":4} |
| 710 | {"layers":4} |
| 714 | {"last_n_layers":4} |
| 715 | {"layers":11} |
| 720 | {"layers":6} |
| 726 | {"layers":4} |
| 727 | {"last_n":11} |
| 728 | {"layers":11} |
| 733 | {"layers":11} |
| 734 | {"layers":4} |
| 738 | {"layers":11} |
| 740 | {"layers":9} |
| 741 | {"version":"XSA4"} |
| 745 | {"layers":13} |
| 752 | {"last_n_layers":4} |
| 753 | {"last_n":4} |
| 754 | {"layers":4} |
| 757 | {"layers":4} |
| 758 | {"layers":11} |
| 760 | {"layers":4} |
| 761 | {"layers":11} |
| 763 | {"layers":11} |
| 767 | {"last_n":4} |
| 768 | {"layers":4} |
| 770 | {"layers":4} |
| 771 | {"layers":4} |
| 773 | {"last_n_layers":2} |
| 774 | {"layers":11} |
| 776 | {"XSA_LAST_N":6} |
| 777 | {"layers":11} |
| 778 | {"layers":11} |
| 779 | {"layers":11} |
| 784 | {"layers":4} |
| 785 | {"last_n":4} |
| 786 | {"layers":4} |
| 788 | {"layers":11} |
| 790 | {"layers":"all"} |
| 792 | {"layers":11} |
| 794 | {"layers":4} |
| 796 | {"layers":11} |
| 797 | {"layers":11} |
| 798 | {"layers":11} |
| 802 | {"layers":4} |
| 803 | {"variant":4,"layers":11} |
| 808 | {"last_n_layers":4} |
| 809 | {"layers":4} |
| 810 | {"layers":4} |
| 811 | {"layers":4} |
| 813 | {"layers":11,"dim":512,"heads":"8/8 full MHA"} |
| 816 | {"layers":4} |
| 826 | {"layers":4} |
| 827 | {"layers":4} |
| 828 | {"layers":4} |
| 831 | {"last_n":4} |
| 832 | {"last_layers":4} |
| 835 | {"layers":4} |
| 836 | {"layers":4} |
| 838 | {"layers":4} |
| 841 | {"layers":11} |
| 843 | {"variant":4} |
| 849 | {"layers":11} |
| 850 | {"layers":4} |
| 851 | {"layers":11,"window_size":8} |
| 857 | {"last_layers":4} |
| 861 | {"layers":11} |
| 862 | {"layers":4} |
| 865 | {"variant":4} |
| 871 | {"layers":4} |
| 872 | {"layers":11} |
| 876 | {"layers":4} |
| 880 | {"layers":11,"ws":8} |
| 887 | — |
| 889 | {"variant":4} |
| 890 | {"layers":11} |
| 891 | {"layers":4} |
| 892 | {"layers":4} |
| 893 | {"variant":"XSA4"} |
| 894 | {"layers":4} |
| 896 | {"layers":4} |
| 900 | {"layers":11} |
| 908 | {"layers":4} |
| 909 | {"layers":11} |
| 912 | {"layers":4} |
| 915 | — |
| 916 | {"layers":4} |
| 918 | {"layers":4} |
| 922 | {"layers":11} |
| 924 | {"layers":11} |
| 925 | {"layers":11} |
| 926 | {"layers":"late"} |
| 927 | {"last_n":4} |
| 928 | {"layers":11} |
| 932 | — |
| 933 | {"layers":4} |
| 940 | {"layers":11} |
| 945 | — |
| 948 | {"layers":11} |
| 952 | {"last_n_layers":4} |
| 953 | {"layers":11} |
| 958 | {"layers":4} |
| 961 | {"variant":4} |
| 963 | {"layers":11} |
| 964 | {"version":4} |
| 965 | {"last_n":4} |
| 967 | {"layers":11} |
| 968 | {"layers":11} |
| 972 | {"layers":4} |
| 974 | — |
| 975 | {"layers":4} |
| 978 | {"layers":4} |
| 988 | {"layers":4} |
| 991 | {"layers":11} |
| 993 | {"layers":11,"hidden_dim":512,"q_heads":8,"kv_heads":4} |
| 995 | {"layers":11} |
| 996 | {"layers":4} |
| 999 | {"last_n":4} |
| 1002 | {"last_n_layers":4} |
| 1004 | {"layers":11} |
| 1005 | {"last_layers":4} |
| 1006 | {"layers":11} |
| 1007 | {"layers":[7,10]} |
| 1008 | {"last_n_layers":4} |
| 1009 | {"layers":4} |
| 1013 | {"layers":9} |
| 1015 | {"layers":2,"mode":"gated"} |
| 1019 | {"layers":11} |
| 1026 | {"layers":4} |
| 1029 | {"last_layers":4} |
| 1033 | {"last_layers":4} |
| 1037 | {"layers":4} |
| 1039 | {"last_n_layers":4} |
| 1043 | {"layers":4} |
| 1045 | {"layers":11} |
| 1051 | — |
| 1056 | {"layers":11} |
| 1057 | {"layers":4} |
| 1060 | {"layers":11} |
| 1062 | {"layers":4} |
| 1066 | {"layers":4} |
| 1069 | — |
| 1070 | {"layers":4} |
| 1072 | {"layers":11} |
| 1077 | {"layers":4} |
| 1080 | {"layers":[7,8,9,10]} |
| 1081 | {"layers":4} |
| 1084 | {"last_n":4} |
| 1085 | {"layers":4} |
| 1086 | {"layers":4} |
| 1090 | {"layers":4} |
| 1092 | {"layers":11} |
| 1094 | {"layers":4} |
| 1098 | {"layers":4} |
| 1099 | {"layers":11} |
| 1101 | {"layers":4} |
| 1105 | {"layers":11} |
| 1108 | {"layers":4} |
| 1112 | {"layers":[7,10]} |
| 1113 | {"layers":11} |
| 1114 | {"variant":4} |
| 1117 | {"layers":4} |
| 1118 | {"layers":4} |
| 1120 | — |
| 1122 | {"layers":11} |
| 1123 | {"layers":11} |
| 1124 | {"last_n":11} |
| 1125 | {"layers":"all"} |
| 1126 | {"layers":11} |
| 1127 | {"layers":4} |
| 1128 | {"last_n":4} |
| 1129 | {"layers":11} |
| 1130 | {"layers":7} |
| 1135 | {"layers":11} |
| 1140 | {"layers":4,"loops":3} |
| 1142 | {"last_n":4} |
| 1145 | {"last_n":11,"bigram_vocab_size":2816,"bigram_dim":112} |
| 1148 | {"last_n_layers":4} |
| 1150 | {"layers":4} |
| 1152 | {"layers":3,"heads":4,"dim":128} |
| 1166 | {"layers":4} |
| 1170 | {"layers":11} |
| 1171 | {"layers":11} |
| 1172 | {"layers":11} |
| 1176 | {"layers":11} |
| 1182 | {"layers":7} |
| 1183 | {"layers":4} |
| 1184 | {"layers":11} |
| 1185 | {"layers":[6,7,8,9,10]} |
| 1186 | {"layers":11} |
| 1209 | — |
| 1212 | — |
| 1215 | {"layers":12,"learned_per_head_alpha":true} |
| 1216 | {"layers":"all"} |
| 1218 | — |
| 1221 | {"layers":11} |
| 1224 | — |
| 1228 | {"layers":4} |
| 1230 | {"last_n_layers":4} |
| 1231 | {"layers":4} |
| 1232 | {"layers":11} |
| 1233 | — |
| 1234 | {"layers":11} |
| 1236 | — |
| 1237 | {"layers":11} |
| 1240 | — |
| 1244 | {"last_n_layers":4} |
| 1247 | {"layers":4} |
| 1252 | {"layers":11} |
| 1253 | {"last_n_layers":4} |
| 1254 | {"layers":11} |
| 1255 | {"layers":4} |
| 1259 | — |
| 1260 | {"pattern":"all-11"} |
| 1263 | {"layers":11} |
| 1269 | {"last_n_layers":4} |
| 1271 | {"last_n":11} |
| 1272 | {"layers":"all"} |
| 1273 | — |
| 1274 | {"layers":4} |
| 1275 | — |
| 1276 | {"layers":11} |
| 1278 | — |
| 1280 | {"last_n_layers":4} |
| 1282 | {"layers":11} |
| 1287 | {"layers":11} |
| 1289 | {"last_layers":4} |
| 1290 | {"layers":11} |
| 1291 | {"layers":11} |
| 1296 | — |
| 1297 | {"layers":11} |
| 1298 | {"layers":11} |
| 1302 | {"layers":11} |
| 1303 | {"layers":11} |
| 1306 | — |
| 1308 | — |
| 1309 | {"layers":11} |
| 1310 | {"layers":4} |
| 1311 | {"layers":4} |
| 1313 | {"layers":11} |
| 1318 | {"layers":11} |
| 1319 | {"layers":11} |
| 1321 | {"layers":11} |
| 1322 | {"layers":11} |
| 1323 | — |
| 1324 | {"layers":11} |
| 1325 | {"last_layers":4} |
| 1327 | {"layers":11} |
| 1328 | {"layers":11} |
| 1329 | {"layers":11} |
| 1332 | {"layers":11} |
| 1351 | — |
| 1353 | {"layers":11,"scope":"all"} |
| 1361 | {"layers":11} |
| 1364 | {"layers":11} |
| 1366 | {"layers":4} |
| 1368 | — |
| 1376 | {"layers":"all"} |
| 1378 | {"layers":11} |
| 1383 | {"last_n":4} |
| 1384 | {"layers":4} |
| 1386 | {"layers":4} |
| 1389 | {"layers":11} |
| 1392 | {"layers":11} |
| 1395 | {"layers":11} |
| 1396 | {"layers":11} |
| 1399 | {"layers":11} |
| 1400 | {"layers":11} |
| 1401 | {"layers":11} |
| 1405 | {"layers":11} |
| 1406 | {"last_n_layers":4} |
| 1407 | {"layers":4} |
| 1408 | {"layers":11} |
| 1410 | {"layers":11} |
| 1414 | {"layers":4} |
| 1416 | {"layers":"all"} |
| 1422 | {"blocks":7} |
| 1424 | {"last_n_layers":4} |
| 1425 | {"layers":11} |
| 1427 | {"layers":4} |
| 1431 | {"layers":4} |
| 1435 | {"layers":11} |
| 1438 | {"layers":4} |
| 1440 | {"layers":11} |
| 1442 | {"layers":11} |
| 1444 | {"layers":4} |
| 1446 | {"layers":11} |
| 1447 | {"layers":5,"gated_last_layer":true} |
| 1448 | {"layers":5,"gated_layers":1} |
| 1450 | — |
| 1452 | {"layers":4} |
| 1453 | {"last_n":4} |
| 1454 | {"layers":4} |
| 1456 | — |
| 1457 | {"layers":11} |
| 1458 | {"layers":11} |
| 1460 | {"layers":11} |
| 1467 | {"layers":11} |
| 1471 | {"layers":11} |
| 1472 | — |
| 1473 | {"layers":11} |
| 1474 | {"layers":5,"gated_layers":1} |
| 1475 | {"layers":11} |
| 1488 | — |
| 1494 | {"layers":11} |
| 1495 | {"layers":5,"last_gated":1} |
| 1496 | {"layers":5,"last_gated":true} |
| 1499 | {"layers":4} |
| 1501 | {"layers":11} |
| 1502 | {"layers":11} |
| 1507 | — |
| 1508 | {"layers":11} |
| 1517 | {"layers":"all"} |
| 1528 | {"last_n":11} |
| 1538 | {"layers":11} |
| 1539 | {"layers":11} |
| 1548 | — |
| 1549 | — |
| 1550 | {"layers":11} |
| 1558 | {"layers":11} |
| 1559 | {"layers":2} |
| 1561 | {"layers":11} |
| 1563 | — |
| 1565 | — |
| 1568 | {"blocks":6} |
| 1573 | {"layers":4} |
| 1579 | {"last_n":4} |
| 1600 | — |
| 1601 | {"blocks":11} |
| 1602 | — |
| 1617 | {"xsa_last_n":2} |
| 1619 | {"layers":11} |
| 1621 | {"layers":4} |
| 1629 | {"layers":11} |
| 1630 | {"layers":12} |
| 1634 | — |
| 1639 | — |
| 1645 | — |
| 1646 | — |
| 1649 | {"layers":4} |
| 1650 | {"layers":3,"heads":4} |
| 1658 | {"layers":11} |
| 1661 | {"layers":4} |
| 1666 | {"layers":4} |
| 1675 | {"layers":11} |
| 1679 | — |
| 1694 | {"layers":11,"scope":"all"} |
| 1696 | {"layers":11} |
| 1709 | {"layers":11} |
| 1722 | {"layers":11} |
| 1760 | {"last_n_layers":4} |