← Back to Architecture

XSA

Architecture
Used in
479 PRs
Best BPB
0.0180
Avg BPB
1.0589

Submissions

PR #64by yesbhautik
1.1250
PR #175by anthony-maio
1.1229
PR #186by mahsumaktas
1.1565
PR #187by Idan3011
1.1629
PR #218by bopmite
1.1248
PR #265by unnir
1.1307
PR #267by andrewgcodes
1.1374
PR #287by jfprinczRECORD
1.1271
PR #290by ibarrajo
1.1354
PR #302by JackYoung27
1.1520
PR #303by sseanliu
1.1436
PR #307by dennisimoo
1.1357
PR #315by jfprincz
1.1248
PR #317by chris-buckley
1.1442
PR #318by sseanliu
1.1284
PR #325by Aum08Desai
1.1462
PR #330by bopmite
1.1609
PR #332by saml212
1.1320
PR #333by mahsumaktas
1.1565
PR #338by alertcat
1.1254
PR #344by aryanbhosale
1.1330
PR #349by Mapika
1.1399
PR #356by sjp611
1.8338
PR #359by tmustier
1.1345
PR #369by signalrush
1.1328
PR #371by mrdavtan
1.1401
PR #372by HyperPotatoNeo
1.1361
PR #374by unnirRECORD
1.1246
PR #375by charmquark1984
1.1257
PR #376by anthony-maio
1.1399
PR #379by dannywillowliu-uchi
1.1257
PR #383by joelnishanth
1.1320
PR #389by trasnake87
1.1466
PR #394by greqone
1.1247
PR #397by translatingthename
1.1364
PR #399by abaybektursun
1.1247
PR #400by chanwoo-park-official
1.1296
PR #406by dentity007
1.1287
PR #410by EthanYangTW
1.1216
PR #413by anantdgoel
1.4525
PR #414by signalrush
1.1233
PR #415by EthanYangTW
1.1216
PR #417by EthanYangTW
1.1227
PR #418by yashverms
1.1715
PR #422by albertorkive
1.1396
PR #429by AbhisekBasu1
1.1231
PR #430by sahiee-dev
1.1428
PR #434by parinzee
1.1370
PR #445by newjordan
1.1236
PR #450by zachgoldfine44
1.1466
PR #452by ofirkris
1.1366
PR #453by Divyesh-Thirukonda
1.1248
PR #455by kasimte
1.1299
PR #456by Christopher-Lee-McClendon
1.1532
PR #457by carlesonielfa
1.1839
PR #458by ofirkris
1.1365
PR #461by Christopher-Lee-McClendon
1.1446
PR #462by JoeProAI
1.0672
PR #469by cmcdnd
1.1418
PR #473by abaybektursun
1.1214
PR #477by harsha-gouru
1.1522
PR #478by gowtham0992
1.1268
PR #482by harsha-gouru
1.1522
PR #483by tmustier
1.1346
PR #484by Robby955
1.1185
PR #485by harsha-gouru
1.1522
PR #487by anantdgoel
1.1720
PR #492by Divyesh-Thirukonda
1.1591
PR #493by parinzee
1.1309
PR #498by newjordan
1.1478
PR #499by newjordan
1.1478
PR #503by EthanYangTW
1.1195
PR #507by skarakulak
1.1558
PR #508by newjordan
1.1215
PR #516by Asukabot0
1.1428
PR #518by sofiabod
1.0622
PR #526by Christopher-Lee-McClendon
1.1425
PR #528by EthanYangTW
1.1195
PR #529by EthanYangTW
1.1195
PR #532by NotADevIAmaMeatPopsicle
1.0487
PR #533by newjordan
1.1207
PR #534by rarce
1.1804
PR #537by Christopher-Lee-McClendon
1.1387
PR #543by rarce
1.1804
PR #544by EthanYangTW
1.1179
PR #545by EthanYangTW
1.1179
PR #549by abaybektursunRECORD
1.1194
PR #554by chrisnkuno
1.4612
PR #562by bigbag
1.1354
PR #564by sadeghja1070
1.1270
PR #573by Sarimsaljook
1.0523
PR #576by cmcdnd
1.1164
PR #577by newjordan
1.1207
PR #585by EthanYangTW
1.1179
PR #586by EaCognitive
1.1365
PR #587by newjordan
1.1208
PR #589by RoyiRa
1.1178
PR #592by Skytuhua
1.1476
PR #593by abaybektursun
1.1163
PR #598by Christopher-Lee-McClendon
1.1334
PR #601by anantdgoel
1.1418
PR #606by EthanYangTW
1.1162
PR #609by saml212
1.1154
PR #612by Christopher-Lee-McClendon
1.1079
PR #628by Christopher-Lee-McClendon
1.0983
PR #633by MatoTeziTanka
1.1526
PR #634by raahilshah
1.1171
PR #636by NewyorkDev
1.1234
PR #638by Asukabot0
1.1164
PR #639by Robby955
1.1158
PR #642by minh-stakc
0.8173
PR #644by Christopher-Lee-McClendon
1.0944
PR #645by FlynnCruse
1.8990
PR #649by pall23-mech
1.2073
PR #656by newjordan
1.1190
PR #659by deanbrr
1.0920
PR #667by suchitj2702
1.1352
PR #670by abaybektursun
1.1171
PR #674by newjordan
1.0461
PR #681by Alfaxad
1.4775
PR #688by RoyiRa
1.0745
PR #690by EthanYangTW
1.1186
PR #691by xexyz
1.0988
PR #692by EthanYangTW
1.1186
PR #693by EthanYangTW
1.1186
PR #695by 0xNoramiya
1.1360
PR #698by hesong0222-dev
1.1642
PR #700by RoyiRa
1.0541
PR #702by lukacf
1.0244
PR #703by Gusanidas
1.1176
PR #706by newjordan
1.0461
PR #710by Dhruba531
1.1240
PR #714by Upsalla
1.1187
PR #715by Asukabot0
1.0337
PR #720by agalimova
1.1078
PR #726by DeepReinforce
1.1147
PR #727by Asukabot0
0.9674
PR #728by abaybektursun
1.1142
PR #733by stukenov
1.0278
PR #734by Robby955
1.1198
PR #738by gowtham0992
1.0970
PR #740by resouer
1.0909
PR #741by andrewbaggio1
0.9850
PR #745by stukenov
1.0222
PR #752by Naazimsnh02
1.1182
PR #753by newjordan
0.9625
PR #754by aryanbhosale
1.1253
PR #757by fielding
1.1124
PR #758by hypery11
1.0465
PR #760by erikqu
1.2185
PR #761by Asukabot0
0.9581
PR #763by hypery11
0.9917
PR #767by RichiiiTV
0.9209
PR #768by mradassaad
1.1201
PR #770by minh-stakc
0.6672
PR #771by sunnypatneedi
1.0705
PR #773by siddhantparadox
1.1532
PR #774by travispchen
0.9370
PR #776by agalimova
0.9258
PR #777by Robby955
0.9623
PR #778by raahilshah
0.9605
PR #779by deanbrr
0.6683
PR #784by iverbovoy
1.2065
PR #785by SirSaltySalmon
1.5364
PR #786by shinegami-2002
0.8128
PR #788by hypery11
0.9059
PR #790by danialht
1.1172
PR #792by xexyz
1.0340
PR #794by jeremyschied
1.3346
PR #796by Robby955
0.6567
PR #797by armantsaturian
0.8960
PR #798by travispchen
0.5466
PR #802by Bortlesboat
0.9123
PR #803by pentxayc
0.4416
PR #808by Naazimsnh02
0.6364
PR #809by AayushBaniya2006
0.2952
PR #810by Idan3011
0.9393
PR #811by quietsmile
0.4377
PR #813by hypery11
0.6671
PR #816by jimliu741523
1.1194
PR #826by himanshudongre
0.2951
PR #827by Programmerryoki
1.3999
PR #828by bigbag
0.9076
PR #831by sseanliu
1.1284
PR #832by jfprincz
1.1903
PR #835by iverbovoy
1.1980
PR #836by autocode-rayes
1.1219
PR #838by aryanbhosale
1.1215
PR #841by someone114514
1.1157
PR #843by quietsmile
0.2834
PR #849by dttdrv
1.1105
PR #850by callithyia
0.3212
PR #851by RoyiRa
0.2071
PR #857by aruniyer
1.1093
PR #861by JoeProAI
1.1326
PR #862by grim-hitman0XX
1.3036
PR #865by aryanbhosale
0.2841
PR #871by greqone
0.8004
PR #872by gowtham0992
1.0467
PR #876by Bortlesboat
0.5863
PR #880by RoyiRa
0.1003
PR #887by anthony-maio
0.9642
PR #889by anthony-maio
0.9642
PR #890by sofiabod
0.4405
PR #891by robbiebusinessacc
1.1428
PR #892by robbiebusinessacc
1.1428
PR #893by aryanbhosale
0.1310
PR #894by albertorkive
1.1821
PR #896by MVPandey
1.1896
PR #900by Robby955
0.1156
PR #908by albertorkive
1.1734
PR #909by sunnypatneedi
0.8609
PR #912by Bortlesboat
0.3461
PR #915by anthony-maio
0.9642
PR #916by Bortlesboat
0.3461
PR #918by haikosys
0.1653
PR #922by greqone
0.0972
PR #924by THUQiXuan
0.0280
PR #925by THUQiXuan
0.0281
PR #926by NandhuRajRK
0.8705
PR #927by Tonyy1977
1.1696
PR #928by autocode-rayes
1.1211
PR #932by anthony-maio
1.1580
PR #933by haikosys
0.0804
PR #940by antaloaalonso
0.9581
PR #945by TimPietrusky
0.0274
PR #948by dentity007
0.1156
PR #952by FlashyFlash3011
1.1144
PR #953by dexhunter
1.0722
PR #958by shouryamaanjain
1.1382
PR #961by callithyia
0.0881
PR #963by sunnypatneedi
0.8609
PR #964by vivekvar-dl
1.3900
PR #965by Adam-Jacuch
1.1184
PR #967by dexhunter
1.0450
PR #968by dentity007
0.1154
PR #972by Idan3011
0.3922
PR #974by anthony-maio
1.6542
PR #975by Abhishek8108
1.1216
PR #978by AnirudhRahul
1.5134
PR #988by ymrohit
1.0857
PR #991by ibarrajo
1.1145
PR #993by aerosta
0.9631
PR #995by dexhunter
1.0362
PR #996by Idan3011
1.1478
PR #999by aamodbhatt
1.1179
PR #1002by SoHarshh
1.1650
PR #1004by ibarrajo
1.1182
PR #1005by OnlyJundong
1.0853
PR #1006by NewyorkDev
1.1085
PR #1007by dillon-blake
1.2252
PR #1008by monkeyKingProgrammer
1.1538
PR #1009by SoHarshh
1.1574
PR #1013by himanshudongre
1.1682
PR #1015by shram86
1.2115
PR #1019by abaybektursunRECORD
1.1147
PR #1026by danielxmed
1.0945
PR #1029by fielding
1.1520
PR #1033by Naazimsnh02
0.4311
PR #1037by TimPietruskyRunPod
1.1179
PR #1039by yufengli-oai
1.1184
PR #1043by okezue
1.1261
PR #1045by Hilo-Hilo
1.1509
PR #1051by tejas-goyal
1.2826
PR #1056by sofiabod
0.0180
PR #1057by Programmerryoki
1.2201
PR #1060by dexhunter
1.1123
PR #1062by yaowubarbara
1.4508
PR #1066by adityakm24
1.1259
PR #1069by manfromnowhere143
1.1190
PR #1070by manfromnowhere143
1.1190
PR #1072by vimeto
1.1170
PR #1077by malc3om
1.1130
PR #1080by ciach
1.1228
PR #1081by michaelwinczuk
1.1220
PR #1084by AnubhavBharadwaaj
1.1185
PR #1085by adityasasidhar
1.2831
PR #1086by Omrigotlieb
1.1349
PR #1090by swapp1990
1.1573
PR #1092by teddyoweh
1.1219
PR #1094by michaelwinczuk
0.4027
PR #1098by adityakm24
1.1187
PR #1099by Bortlesboat
1.1133
PR #1101by amrayach
1.1290
PR #1105by abaybektursun
1.2208
PR #1108by DbBested
1.1502
PR #1112by dillon-blake
1.2252
PR #1113by gowtham0992
1.3705
PR #1114by minh-stakc
0.0235
PR #1117by adityakm24
1.1187
PR #1118by adityakm24
1.1187
PR #1120by newjordan
1.1099
PR #1122by icryo
1.1146
PR #1123by sisegod
1.1986
PR #1124by NewyorkDev
1.1194
PR #1125by jainpranjal97
1.1946
PR #1126by AnirudhRahul
1.1091
PR #1127by dentity007
1.1311
PR #1128by AnubhavBharadwaaj
1.1154
PR #1129by EthanYangTW
1.1174
PR #1130by Gusanidas
1.1140
PR #1135by barneywohl
1.1116
PR #1140by newjordan
1.1874
PR #1142by ymrohit
1.1493
PR #1145by AnirudhRahul
1.1109
PR #1148by aamodbhatt
1.1179
PR #1150by sahiee-dev
1.1151
PR #1152by ericdatum
1.7942
PR #1166by Christopher-Lee-McClendon
1.1347
PR #1170by Christopher-Lee-McClendon
1.1199
PR #1171by EthanYangTW
1.1145
PR #1172by dexhunter
1.1015
PR #1176by bigbag
1.0962
PR #1182by adityakm24
1.1227
PR #1183by akaiHuang
1.5080
PR #1184by icryo
0.9485
PR #1185by skoustav35
0.9641
PR #1186by andrewbaggio1
0.9850
PR #1209by andrewbaggio1
1.1064
PR #1212by Gusanidas
1.1108
PR #1215by turbo-indubitable
1.1601
PR #1216by SoHarshh
1.1574
PR #1218by clarkkevRECORD
1.0978
PR #1221by amabito
1.1915
PR #1224by vermissa0ss
1.1129
PR #1228by meinlebenswerk
1.1527
PR #1230by nestamidavaine
1.1163
PR #1231by nestamidavaine
1.1163
PR #1232by Christopher-Lee-McClendon
1.0929
PR #1233by ibarrajo
1.1460
PR #1234by ibarrajo
1.1461
PR #1236by ibarrajo
1.1179
PR #1237by ibarrajo
1.1198
PR #1240by andrewbaggio1
1.1064
PR #1244by monkeyKingProgrammer
1.1443
PR #1247by fahmitech
1.2208
PR #1252by ahmetdenizyilmaz
1.0713
PR #1253by Okropniak
1.2326
PR #1254by Elarwei001
1.1070
PR #1255by akaiHuang
1.5080
PR #1259by himanshudongre
1.1533
PR #1260by dexhunter
1.0929
PR #1263by xexyz
0.9354
PR #1269by Jtss-ux
1.1194
PR #1271by andrewbaggio1
1.1289
PR #1272by andrewbaggio1
1.1100
PR #1273by DushyantChetiwal
1.2196
PR #1274by MatoTeziTanka
1.0876
PR #1275by ranausmanai
1.1492
PR #1276by BiggerDABOSS
1.1100
PR #1278by GitGeeks
1.1147
PR #1280by aamodbhatt
1.1156
PR #1282by newjordan
1.1035
PR #1287by dentity007
1.1048
PR #1289by MatoTeziTanka
1.0819
PR #1290by aryanbhosale
1.1104
PR #1291by dentity007
1.0925
PR #1296by aryanbhosale
1.0926
PR #1297by Omrigotlieb
1.1043
PR #1298by Omrigotlieb
1.1043
PR #1302by vlivashkin
1.1078
PR #1303by anthony-maio
0.9462
PR #1306by resouer
1.0846
PR #1308by newjordan
1.1364
PR #1309by cadenmcmann
1.1143
PR #1310by cadenmcmann
1.1177
PR #1311by htrung1105
1.1303
PR #1313by anthony-maio
0.8637
PR #1318by renqianluo
1.0095
PR #1319by canivel
0.6951
PR #1321by anthony-maio
0.7406
PR #1322by newjordan
1.0854
PR #1323by sohv
1.1247
PR #1324by yahya010
0.8275
PR #1325by monisha-max
1.3868
PR #1327by mrbese
1.1276
PR #1328by renqianluo
0.6361
PR #1329by renqianluo
0.6361
PR #1332by Omrigotlieb
1.0959
PR #1351by resouer
1.0807
PR #1353by Rtx09x
1.1547
PR #1361by jorge-asenjo
1.1220
PR #1364by stukenov
1.1025
PR #1366by yunoshev
1.1371
PR #1368by JKSNS
0.8503
PR #1376by stukenov
0.7094
PR #1378by Rajat123456789
1.1711
PR #1383by nirmathur
1.3151
PR #1384by iverbovoy
1.1441
PR #1386by Buld1n
1.1452
PR #1389by Rome-1
1.7270
PR #1392by Its-Just-Crump
1.1020
PR #1395by dttdrv
1.0924
PR #1396by erichroepke
1.1067
PR #1399by AnubhavBharadwaaj
1.0898
PR #1400by tmancino
1.1035
PR #1401by teerthsharma
1.1100
PR #1405by anthony-maio
1.0856
PR #1406by aamodbhatt
1.0887
PR #1407by OnlyJundong
1.0960
PR #1408by aamodbhatt
1.0800
PR #1410by izlley
1.1158
PR #1414by Abhishek8108
0.7093
PR #1416by erichroepke
1.0795
PR #1422by swapp1990
1.1172
PR #1424by OnlyJundong
1.0858
PR #1425by dentity007
1.4479
PR #1427by kjahan
1.2092
PR #1431by Idan3011
1.1266
PR #1435by AbhayAnandUCSD
1.0980
PR #1438by sabdulmajid
1.2029
PR #1440by Mertyandimata
1.1026
PR #1442by akaiHuang
1.1854
PR #1444by hypnoastic
1.3081
PR #1446by LauraGomezjurado
1.0960
PR #1447by shram86
1.1834
PR #1448by shram86
1.1834
PR #1450by andrewbaggio1
1.0848
PR #1452by bsisduck
0.3509
PR #1453by iverbovoy
1.1324
PR #1454by bsisduck
0.3509
PR #1456by sisegod
1.1465
PR #1457by DilpreetBansi
1.1454
PR #1458by newjordan
1.1057
PR #1460by resouer
1.0827
PR #1467by PhamPhuHoa-23
1.1056
PR #1471by X-Abhishek-X
1.0866
PR #1472by trhgbao
1.2066
PR #1473by AVINASH0052
1.1156
PR #1474by shram86
1.1434
PR #1475by Jaksenc
1.1307
PR #1488by ndokutovich
0.8265
PR #1494by G3sparky
1.1220
PR #1495by shram86
1.1077
PR #1496by shram86
1.1920
PR #1499by dippatel1994
1.6323
PR #1501by SPThole
1.1159
PR #1502by SPThole
1.1147
PR #1507by ChideraIbe123
0.2282
PR #1508by jpfeiffe
1.1135
PR #1517by RulinShao
1.0632
PR #1528by xiehuanyi
1.1104
PR #1538by davie2009kh
1.1180
PR #1539by translatingthename
1.0587
PR #1548by dljr-github
1.3220
PR #1549by dljr-github
1.3220
PR #1550by translatingthename
1.0587
PR #1558by Subramanyam6
1.4500
PR #1559by adityasasidhar
1.2498
PR #1561by EthanYangTW
1.0783
PR #1563by joshkmartinez
1.0205
PR #1565by Idan3011
1.1036
PR #1568by yuitokyouni
1.1639
PR #1573by shivangbaveja
1.1464
PR #1579by Tonyy1977
1.1372
PR #1600by sayujshah
1.2781
PR #1601by SPThole
1.1190
PR #1602by SPThole
1.0744
PR #1617by adityasasidhar
1.2192
PR #1619by AVINASH0052
1.1156
PR #1621by mrbese
1.1531
PR #1629by channyzf6
1.0829
PR #1630by KevinChunye
1.1412
PR #1634by arsenis-cmd
1.1335
PR #1639by kunwar-vikrant
1.0832
PR #1645by scottcui-georgian
1.1131
PR #1646by sergeevii123
1.0909
PR #1649by joyceyan
1.1271
PR #1650by Jaredcastorena
1.4233
PR #1658by AVINASH0052
1.0810
PR #1661by anderamondarainh-stack
1.1444
PR #1666by mrbese
1.1531
PR #1675by jayzuccarelli
1.1451
PR #1679by ChideraIbe123
0.7625
PR #1694by Rtx09x
1.1136
PR #1696by kings-crown
1.1224
PR #1709by Bananakin1
1.1470
PR #1722by deborahnelson8788726
0.6580
PR #1760by BrandtChristian
1.1863

Hyperparameters Across PRs

pr_numberparameters
64
175
186{"layers":4}
187{"last_n_layers":4}
218{"last_n_layers":4}
265{"layers":3,"total_layers":11,"head_count":8,"kv_heads":4}
267{"layers":3}
287{"layers":4}
290{"layers":3}
302{"layers":3}
303{"last_n_layers":4}
307{"layers":4}
315{"layers":4}
317{"layers":4}
318{"layers":4}
325{"last_n":4}
330{"layers":4}
332{"layers":4}
333{"layers":4}
338{"layers":4}
344{"layers":11}
349{"layers":4,"total_layers":11}
356{"layers":4}
359{"last_n_layers":4}
369{"layers":4}
371{"layers":4}
372{"layers":4}
374{"layers":4}
375
376{"layers":4}
379{"last_n_layers":4}
383{"layers":4}
389{"layers":5}
394{"layers":11,"xsa_last_n":4}
397{"layers":4}
399{"last_n":4}
400{"layers":4}
406{"layers":4}
410{"layers":4}
413
414{"layers":4}
415{"layers":4}
417{"layers":4}
418{"layers":6}
422{"layers":4}
429{"last_n":4}
430{"layers":4}
434{"layers":4}
445{"variant":"XSA4"}
450{"layers":4}
452{"layers":4}
453{"last_layers":4}
455{"layers":4}
456{"layers":3}
457{"layers":4}
458{"layers":4}
461{"last_n_layers":4}
462{"layers":4}
469{"layers":4}
473{"layers":4}
477{"layers":4}
478{"layers":11}
482{"layers":4}
483{"last_n":4}
484{"layers":4}
485{"layers":4}
487{"layers":4}
492{"layers":4}
493{"layers":4}
498{"layers":2}
499{"last_blocks":2}
503{"layers":11}
507{"layers":4}
508{"layers":4}
516{"variant":4}
518{"layers":4}
526{"layers":4}
528{"layers":11}
529{"layers":11}
532{"layers":4}
533{"layers":4}
534{"layers":4}
537{"layers":4}
543{"layers":4}
544{"layers":11}
545{"layers":11}
549{"layers":4}
554{"layers":4}
562{"layers":4}
564{"layers":4}
573{"layers":4}
576{"layers":"all"}
577{"layers":4}
585{"layers":11}
586{"layers":4}
587{"layers":11}
589{"layers":4}
592{"layers":4,"layer_indices":[8,9,10,11]}
593{"last_n_layers":4}
598{"layers":4}
601{"layers":4}
606{"layers":11}
609{"layers":11}
612
628
633{"layers":4}
634{"layers":11}
636{"layers":4}
638{"layers":11}
639{"layers":11}
642{"layers":4}
644{"layers":4}
645
649{"layers":4}
656{"layers":4}
659{"layers":4}
667{"layers":5}
670{"layers":11}
674{"last_n":4}
681{"layers":4}
688{"layers":11,"window_size":8}
690{"layers":11}
691{"layers":4}
692{"layers":11}
693{"layers":[7,8,9,10]}
695{"layers":6}
698{"layers":4}
700{"layers":11,"ws":8}
702{"layers":11}
703{"layers":4}
706{"last_n":4}
710{"layers":4}
714{"last_n_layers":4}
715{"layers":11}
720{"layers":6}
726{"layers":4}
727{"last_n":11}
728{"layers":11}
733{"layers":11}
734{"layers":4}
738{"layers":11}
740{"layers":9}
741{"version":"XSA4"}
745{"layers":13}
752{"last_n_layers":4}
753{"last_n":4}
754{"layers":4}
757{"layers":4}
758{"layers":11}
760{"layers":4}
761{"layers":11}
763{"layers":11}
767{"last_n":4}
768{"layers":4}
770{"layers":4}
771{"layers":4}
773{"last_n_layers":2}
774{"layers":11}
776{"XSA_LAST_N":6}
777{"layers":11}
778{"layers":11}
779{"layers":11}
784{"layers":4}
785{"last_n":4}
786{"layers":4}
788{"layers":11}
790{"layers":"all"}
792{"layers":11}
794{"layers":4}
796{"layers":11}
797{"layers":11}
798{"layers":11}
802{"layers":4}
803{"variant":4,"layers":11}
808{"last_n_layers":4}
809{"layers":4}
810{"layers":4}
811{"layers":4}
813{"layers":11,"dim":512,"heads":"8/8 full MHA"}
816{"layers":4}
826{"layers":4}
827{"layers":4}
828{"layers":4}
831{"last_n":4}
832{"last_layers":4}
835{"layers":4}
836{"layers":4}
838{"layers":4}
841{"layers":11}
843{"variant":4}
849{"layers":11}
850{"layers":4}
851{"layers":11,"window_size":8}
857{"last_layers":4}
861{"layers":11}
862{"layers":4}
865{"variant":4}
871{"layers":4}
872{"layers":11}
876{"layers":4}
880{"layers":11,"ws":8}
887
889{"variant":4}
890{"layers":11}
891{"layers":4}
892{"layers":4}
893{"variant":"XSA4"}
894{"layers":4}
896{"layers":4}
900{"layers":11}
908{"layers":4}
909{"layers":11}
912{"layers":4}
915
916{"layers":4}
918{"layers":4}
922{"layers":11}
924{"layers":11}
925{"layers":11}
926{"layers":"late"}
927{"last_n":4}
928{"layers":11}
932
933{"layers":4}
940{"layers":11}
945
948{"layers":11}
952{"last_n_layers":4}
953{"layers":11}
958{"layers":4}
961{"variant":4}
963{"layers":11}
964{"version":4}
965{"last_n":4}
967{"layers":11}
968{"layers":11}
972{"layers":4}
974
975{"layers":4}
978{"layers":4}
988{"layers":4}
991{"layers":11}
993{"layers":11,"hidden_dim":512,"q_heads":8,"kv_heads":4}
995{"layers":11}
996{"layers":4}
999{"last_n":4}
1002{"last_n_layers":4}
1004{"layers":11}
1005{"last_layers":4}
1006{"layers":11}
1007{"layers":[7,10]}
1008{"last_n_layers":4}
1009{"layers":4}
1013{"layers":9}
1015{"layers":2,"mode":"gated"}
1019{"layers":11}
1026{"layers":4}
1029{"last_layers":4}
1033{"last_layers":4}
1037{"layers":4}
1039{"last_n_layers":4}
1043{"layers":4}
1045{"layers":11}
1051
1056{"layers":11}
1057{"layers":4}
1060{"layers":11}
1062{"layers":4}
1066{"layers":4}
1069
1070{"layers":4}
1072{"layers":11}
1077{"layers":4}
1080{"layers":[7,8,9,10]}
1081{"layers":4}
1084{"last_n":4}
1085{"layers":4}
1086{"layers":4}
1090{"layers":4}
1092{"layers":11}
1094{"layers":4}
1098{"layers":4}
1099{"layers":11}
1101{"layers":4}
1105{"layers":11}
1108{"layers":4}
1112{"layers":[7,10]}
1113{"layers":11}
1114{"variant":4}
1117{"layers":4}
1118{"layers":4}
1120
1122{"layers":11}
1123{"layers":11}
1124{"last_n":11}
1125{"layers":"all"}
1126{"layers":11}
1127{"layers":4}
1128{"last_n":4}
1129{"layers":11}
1130{"layers":7}
1135{"layers":11}
1140{"layers":4,"loops":3}
1142{"last_n":4}
1145{"last_n":11,"bigram_vocab_size":2816,"bigram_dim":112}
1148{"last_n_layers":4}
1150{"layers":4}
1152{"layers":3,"heads":4,"dim":128}
1166{"layers":4}
1170{"layers":11}
1171{"layers":11}
1172{"layers":11}
1176{"layers":11}
1182{"layers":7}
1183{"layers":4}
1184{"layers":11}
1185{"layers":[6,7,8,9,10]}
1186{"layers":11}
1209
1212
1215{"layers":12,"learned_per_head_alpha":true}
1216{"layers":"all"}
1218
1221{"layers":11}
1224
1228{"layers":4}
1230{"last_n_layers":4}
1231{"layers":4}
1232{"layers":11}
1233
1234{"layers":11}
1236
1237{"layers":11}
1240
1244{"last_n_layers":4}
1247{"layers":4}
1252{"layers":11}
1253{"last_n_layers":4}
1254{"layers":11}
1255{"layers":4}
1259
1260{"pattern":"all-11"}
1263{"layers":11}
1269{"last_n_layers":4}
1271{"last_n":11}
1272{"layers":"all"}
1273
1274{"layers":4}
1275
1276{"layers":11}
1278
1280{"last_n_layers":4}
1282{"layers":11}
1287{"layers":11}
1289{"last_layers":4}
1290{"layers":11}
1291{"layers":11}
1296
1297{"layers":11}
1298{"layers":11}
1302{"layers":11}
1303{"layers":11}
1306
1308
1309{"layers":11}
1310{"layers":4}
1311{"layers":4}
1313{"layers":11}
1318{"layers":11}
1319{"layers":11}
1321{"layers":11}
1322{"layers":11}
1323
1324{"layers":11}
1325{"last_layers":4}
1327{"layers":11}
1328{"layers":11}
1329{"layers":11}
1332{"layers":11}
1351
1353{"layers":11,"scope":"all"}
1361{"layers":11}
1364{"layers":11}
1366{"layers":4}
1368
1376{"layers":"all"}
1378{"layers":11}
1383{"last_n":4}
1384{"layers":4}
1386{"layers":4}
1389{"layers":11}
1392{"layers":11}
1395{"layers":11}
1396{"layers":11}
1399{"layers":11}
1400{"layers":11}
1401{"layers":11}
1405{"layers":11}
1406{"last_n_layers":4}
1407{"layers":4}
1408{"layers":11}
1410{"layers":11}
1414{"layers":4}
1416{"layers":"all"}
1422{"blocks":7}
1424{"last_n_layers":4}
1425{"layers":11}
1427{"layers":4}
1431{"layers":4}
1435{"layers":11}
1438{"layers":4}
1440{"layers":11}
1442{"layers":11}
1444{"layers":4}
1446{"layers":11}
1447{"layers":5,"gated_last_layer":true}
1448{"layers":5,"gated_layers":1}
1450
1452{"layers":4}
1453{"last_n":4}
1454{"layers":4}
1456
1457{"layers":11}
1458{"layers":11}
1460{"layers":11}
1467{"layers":11}
1471{"layers":11}
1472
1473{"layers":11}
1474{"layers":5,"gated_layers":1}
1475{"layers":11}
1488
1494{"layers":11}
1495{"layers":5,"last_gated":1}
1496{"layers":5,"last_gated":true}
1499{"layers":4}
1501{"layers":11}
1502{"layers":11}
1507
1508{"layers":11}
1517{"layers":"all"}
1528{"last_n":11}
1538{"layers":11}
1539{"layers":11}
1548
1549
1550{"layers":11}
1558{"layers":11}
1559{"layers":2}
1561{"layers":11}
1563
1565
1568{"blocks":6}
1573{"layers":4}
1579{"last_n":4}
1600
1601{"blocks":11}
1602
1617{"xsa_last_n":2}
1619{"layers":11}
1621{"layers":4}
1629{"layers":11}
1630{"layers":12}
1634
1639
1645
1646
1649{"layers":4}
1650{"layers":3,"heads":4}
1658{"layers":11}
1661{"layers":4}
1666{"layers":4}
1675{"layers":11}
1679
1694{"layers":11,"scope":"all"}
1696{"layers":11}
1709{"layers":11}
1722{"layers":11}
1760{"last_n_layers":4}