← Back to Architecture

MLP3x

Architecture
Used in
382 PRs
Best BPB
0.0274
Avg BPB
1.0994

Submissions

PR #63by yahya010RECORD
1.1598
PR #65by aquariouseworkmanRECORD
1.1556
PR #66by arjun-krishna1
1.1632
PR #69by TevBenji
1.1708
PR #70by jfprincz
1.1659
PR #76by unixmadtoonslab
1.1433
PR #81by polarizedfortnite-cpu
1.1670
PR #86by aruniyerRECORD
1.1502
PR #88by seanward
1.1605
PR #89by vmfunc
1.1622
PR #99by takhir-iota
1.1605
PR #102by unnir
1.1618
PR #107by m0at
1.1648
PR #108by kellyvv
1.4370
PR #110by mr-ashish-panday
1.2244
PR #114by saml212
1.1574
PR #116by abhishekgahlot2
1.1666
PR #117by trovatochris
1.1702
PR #120by andrewgcodes
0.9588
PR #122by mtybadger
1.1603
PR #123by saikrishnarallabandi
1.1642
PR #128by rsavitt
1.1594
PR #135by unnir
1.1539
PR #137by abhishekgahlot2
1.1666
PR #139by ksang123
1.2029
PR #150by yahya010
1.1478
PR #156by dexhunter
1.1602
PR #160by ChaseWNorton
1.1623
PR #162by raahilshahRECORD
1.1458
PR #164by jfprincz
1.1524
PR #172by GMaN1911
1.1812
PR #173by tamoghnokandar
1.1532
PR #175by anthony-maio
1.1229
PR #178by timowhite88
1.1667
PR #180by thwu1RECORD
1.1428
PR #181by manfromnowhere143
1.2194
PR #187by Idan3011
1.1629
PR #190by newjordan
1.1725
PR #191by chris-buckley
1.1598
PR #192by baudrillardsgh0st
1.1502
PR #194by baudrillardsgh0st
1.1480
PR #196by sicauzxl
1.3825
PR #198by jfprinczRECORD
1.1318
PR #201by machdragon
1.1551
PR #206by dexhunter
1.1507
PR #207by ajkpersonal
1.1568
PR #208by ajkpersonal
1.1568
PR #212by mrdavtan
1.1329
PR #215by JayCheng113
1.1548
PR #218by bopmite
1.1248
PR #219by alertcat
1.1541
PR #222by ansh-deriv
1.1601
PR #223by 0xjaishy
1.1326
PR #225by dibdabo
1.2089
PR #230by MatthewHRockwell
1.1541
PR #232by kellyvv
1.4370
PR #236by saml212
1.1400
PR #238by kellyvv
1.5164
PR #243by kvmukilan
1.1704
PR #246by kvmukilan
1.1704
PR #249by kvmukilan
1.1704
PR #251by kshitizz36
1.1596
PR #254by timowhite88
1.1303
PR #264by stukenov
1.1455
PR #265by unnir
1.1307
PR #267by andrewgcodes
1.1374
PR #273by dentity007
1.1575
PR #274by haikosys
1.1403
PR #278by nicolasdickenmann
1.0365
PR #281by charmquark1984
1.1381
PR #287by jfprinczRECORD
1.1271
PR #289by integrate-your-mind
1.1518
PR #294by sseanliu
1.1645
PR #295by gowtham0992
1.1477
PR #296by sseanliu
1.1645
PR #297by davidpuertolas
1.1629
PR #302by JackYoung27
1.1520
PR #303by sseanliu
1.1436
PR #304by Bortlesboat
1.4245
PR #305by Naazimsnh02
1.1672
PR #306by xuafeng
1.1448
PR #307by dennisimoo
1.1357
PR #312by chanwoo-park-official
1.1668
PR #316by SkywardSyntax
1.2035
PR #317by chris-buckley
1.1442
PR #324by crony-io
1.1702
PR #326by crony-io
1.2890
PR #327by Ananddna
1.1450
PR #330by bopmite
1.1609
PR #331by Rhodrium
1.1487
PR #333by mahsumaktas
1.1565
PR #334by nathon-lee
1.2207
PR #338by alertcat
1.1254
PR #339by sheeki03
1.1364
PR #351by sp00mm
1.1659
PR #352by sp00mm
1.1659
PR #359by tmustier
1.1345
PR #361by adityagupta26
1.1400
PR #362by mkenney2
1.1497
PR #364by shikhar1729
1.1497
PR #366by shivnarainms22
1.1574
PR #368by MatoTeziTanka
1.2037
PR #369by signalrush
1.1328
PR #370by SergheiBrinza
1.2421
PR #374by unnirRECORD
1.1246
PR #375by charmquark1984
1.1257
PR #376by anthony-maio
1.1399
PR #379by dannywillowliu-uchi
1.1257
PR #383by joelnishanth
1.1320
PR #385by dentity007
1.1488
PR #388by ElliotSlusky
1.1231
PR #389by trasnake87
1.1466
PR #390by newjordan
1.1295
PR #398by felipe-parodi
1.1213
PR #400by chanwoo-park-official
1.1296
PR #401by newjordan
1.1243
PR #403by malc3om
1.1388
PR #406by dentity007
1.1287
PR #410by EthanYangTW
1.1216
PR #414by signalrush
1.1233
PR #415by EthanYangTW
1.1216
PR #418by yashverms
1.1715
PR #422by albertorkive
1.1396
PR #424by someone114514
1.1725
PR #433by Robby955
1.3441
PR #442by sjp611
1.1027
PR #443by CREVIOS
1.1431
PR #444by AymanMahfuz27
1.4536
PR #445by newjordan
1.1236
PR #446by sofiabod
1.1933
PR #447by CREVIOS
1.1431
PR #448by handemanai
1.2006
PR #450by zachgoldfine44
1.1466
PR #453by Divyesh-Thirukonda
1.1248
PR #454by nalediym
1.2055
PR #455by kasimte
1.1299
PR #456by Christopher-Lee-McClendon
1.1532
PR #458by ofirkris
1.1365
PR #461by Christopher-Lee-McClendon
1.1446
PR #465by LoquiAuris
1.1508
PR #466by simonbissonnette
1.1354
PR #467by ADIITJ
1.1428
PR #469by cmcdnd
1.1418
PR #473by abaybektursun
1.1214
PR #474by joshuaswarren
1.1690
PR #481by mrdavtan
1.0970
PR #483by tmustier
1.1346
PR #485by harsha-gouru
1.1522
PR #486by ndokutovich
1.1101
PR #487by anantdgoel
1.1720
PR #488by pkim02
1.3267
PR #489by sofiabod
1.1327
PR #499by newjordan
1.1478
PR #508by newjordan
1.1215
PR #510by SelfAnush
1.1989
PR #512by MatoTeziTanka
0.9512
PR #515by keshav55
1.1807
PR #516by Asukabot0
1.1428
PR #518by sofiabod
1.0622
PR #525by hypery11
1.1160
PR #528by EthanYangTW
1.1195
PR #529by EthanYangTW
1.1195
PR #533by newjordan
1.1207
PR #546by shajalahamedcse
1.1752
PR #547by shajalahamedcse
1.1752
PR #548by LoquiAuris
1.0865
PR #549by abaybektursunRECORD
1.1194
PR #550by haimianbaobao007
1.1890
PR #555by ymrohit
1.0916
PR #560by Rohan5commit
1.1935
PR #563by instax-dutta
1.1428
PR #564by sadeghja1070
1.1270
PR #568by MatoTeziTanka
0.7853
PR #573by Sarimsaljook
1.0523
PR #580by micoverde
1.2623
PR #583by suchihype
1.1489
PR #585by EthanYangTW
1.1179
PR #586by EaCognitive
1.1365
PR #589by RoyiRa
1.1178
PR #593by abaybektursun
1.1163
PR #595by LoquiAuris
1.1100
PR #596by AriaAnima
0.6430
PR #598by Christopher-Lee-McClendon
1.1334
PR #605by bigbag
0.7227
PR #612by Christopher-Lee-McClendon
1.1079
PR #614by bigbag
0.6864
PR #618by 0xtigerclaw
1.4702
PR #619by zeal175
1.4222
PR #622by Upsalla
1.0941
PR #633by MatoTeziTanka
1.1526
PR #642by minh-stakc
0.8173
PR #644by Christopher-Lee-McClendon
1.0944
PR #645by FlynnCruse
1.8990
PR #659by deanbrr
1.0920
PR #666by chrislovescoding
1.1932
PR #671by keshav55
1.1807
PR #678by SPThole
1.3525
PR #681by Alfaxad
1.4775
PR #682by gthgomez
1.1233
PR #684by DeepReinforce
1.0574
PR #685by andrewbaggio1
1.0366
PR #688by RoyiRa
1.0745
PR #690by EthanYangTW
1.1186
PR #692by EthanYangTW
1.1186
PR #693by EthanYangTW
1.1186
PR #698by hesong0222-dev
1.1642
PR #700by RoyiRa
1.0541
PR #705by seanward
1.2151
PR #709by StolbaJ
1.1478
PR #710by Dhruba531
1.1240
PR #713by hypery11
1.1180
PR #714by Upsalla
1.1187
PR #715by Asukabot0
1.0337
PR #728by abaybektursun
1.1142
PR #730by janwww
1.1570
PR #734by Robby955
1.1198
PR #736by Git-Aarya
1.2168
PR #738by gowtham0992
1.0970
PR #741by andrewbaggio1
0.9850
PR #744by ShihChunHao
1.2824
PR #752by Naazimsnh02
1.1182
PR #754by aryanbhosale
1.1253
PR #755by dcrow85
1.0321
PR #757by fielding
1.1124
PR #760by erikqu
1.2185
PR #761by Asukabot0
0.9581
PR #764by ndokutovich
0.9633
PR #768by mradassaad
1.1201
PR #770by minh-stakc
0.6672
PR #771by sunnypatneedi
1.0705
PR #774by travispchen
0.9370
PR #777by Robby955
0.9623
PR #778by raahilshah
0.9605
PR #779by deanbrr
0.6683
PR #786by shinegami-2002
0.8128
PR #791by ShihChunHao
1.2824
PR #794by jeremyschied
1.3346
PR #795by hypery11
0.8881
PR #796by Robby955
0.6567
PR #797by armantsaturian
0.8960
PR #798by travispchen
0.5466
PR #799by yuvraajbains
1.2005
PR #805by zeytx
1.1807
PR #806by ibarrajo
0.6678
PR #807by connectwithprakash
1.0116
PR #808by Naazimsnh02
0.6364
PR #809by AayushBaniya2006
0.2952
PR #810by Idan3011
0.9393
PR #811by quietsmile
0.4377
PR #827by Programmerryoki
1.3999
PR #828by bigbag
0.9076
PR #836by autocode-rayes
1.1219
PR #838by aryanbhosale
1.1215
PR #840by quietsmile
0.2873
PR #841by someone114514
1.1157
PR #843by quietsmile
0.2834
PR #849by dttdrv
1.1105
PR #850by callithyia
0.3212
PR #851by RoyiRa
0.2071
PR #854by ivanontech
1.4530
PR #874by fielding
1.6070
PR #880by RoyiRa
0.1003
PR #891by robbiebusinessacc
1.1428
PR #892by robbiebusinessacc
1.1428
PR #896by MVPandey
1.1896
PR #900by Robby955
0.1156
PR #907by resouer
0.0960
PR #915by anthony-maio
0.9642
PR #924by THUQiXuan
0.0280
PR #925by THUQiXuan
0.0281
PR #926by NandhuRajRK
0.8705
PR #929by andreanjos
1.1653
PR #933by haikosys
0.0804
PR #940by antaloaalonso
0.9581
PR #941by aptsalt
1.3620
PR #945by TimPietrusky
0.0274
PR #947by aamodbhatt
1.1576
PR #953by dexhunter
1.0722
PR #967by dexhunter
1.0450
PR #972by Idan3011
0.3922
PR #978by AnirudhRahul
1.5134
PR #979by 0xadvait
1.1387
PR #982by haikosys
0.0638
PR #988by ymrohit
1.0857
PR #989by alexanderaperry-arch
1.1402
PR #993by aerosta
0.9631
PR #995by dexhunter
1.0362
PR #996by Idan3011
1.1478
PR #997by randy06122001-boop
1.4182
PR #999by aamodbhatt
1.1179
PR #1002by SoHarshh
1.1650
PR #1005by OnlyJundong
1.0853
PR #1007by dillon-blake
1.2252
PR #1009by SoHarshh
1.1574
PR #1025by Zagot-byte
1.3579
PR #1026by danielxmed
1.0945
PR #1048by mrdavtan
1.1724
PR #1051by tejas-goyal
1.2826
PR #1059by edidisheng
1.1996
PR #1062by yaowubarbara
1.4508
PR #1068by LappyG
1.1510
PR #1071by AbhayAnandUCSD
1.1455
PR #1080by ciach
1.1228
PR #1085by adityasasidhar
1.2831
PR #1086by Omrigotlieb
1.1349
PR #1088by serdardoesml
1.2542
PR #1095by vimeto
0.0905
PR #1105by abaybektursun
1.2208
PR #1106by agalimova
1.1465
PR #1108by DbBested
1.1502
PR #1112by dillon-blake
1.2252
PR #1125by jainpranjal97
1.1946
PR #1127by dentity007
1.1311
PR #1130by Gusanidas
1.1140
PR #1139by ivanontech
1.1801
PR #1141by ivanontech
1.1801
PR #1142by ymrohit
1.1493
PR #1144by inFaaa
1.3572
PR #1148by aamodbhatt
1.1179
PR #1167by Durlabhkumarjha
1.3736
PR #1170by Christopher-Lee-McClendon
1.1199
PR #1174by Okropniak
1.3069
PR #1183by akaiHuang
1.5080
PR #1185by skoustav35
0.9641
PR #1186by andrewbaggio1
0.9850
PR #1190by Durlabhkumarjha
1.3365
PR #1202by VirajDeshwal
1.1412
PR #1209by andrewbaggio1
1.1064
PR #1216by SoHarshh
1.1574
PR #1218by clarkkevRECORD
1.0978
PR #1226by Wolfie8935
1.1428
PR #1227by himanshudongre
1.4841
PR #1228by meinlebenswerk
1.1527
PR #1240by andrewbaggio1
1.1064
PR #1254by Elarwei001
1.1070
PR #1256by oidebrett
1.1444
PR #1276by BiggerDABOSS
1.1100
PR #1280by aamodbhatt
1.1156
PR #1298by Omrigotlieb
1.1043
PR #1312by adi-suresh01
1.3299
PR #1313by anthony-maio
0.8637
PR #1321by anthony-maio
0.7406
PR #1337by sergimichi
1.2079
PR #1359by LucasErcolano
0.4188
PR #1361by jorge-asenjo
1.1220
PR #1366by yunoshev
1.1371
PR #1368by JKSNS
0.8503
PR #1378by Rajat123456789
1.1711
PR #1379by LucasErcolano
0.4162
PR #1411by Blakethefn
1.5568
PR #1417by BruhTheMomentum
1.3039
PR #1422by swapp1990
1.1172
PR #1431by Idan3011
1.1266
PR #1436by DevWizard-Vandan
1.5546
PR #1444by hypnoastic
1.3081
PR #1446by LauraGomezjurado
1.0960
PR #1453by iverbovoy
1.1324
PR #1463by tsubasagit
1.2774
PR #1473by AVINASH0052
1.1156
PR #1479by andrewbaggio1
1.1450
PR #1488by ndokutovich
0.8265
PR #1492by bigbag
1.0810
PR #1505by Rohan-Abhilash
1.1791
PR #1508by jpfeiffe
1.1135
PR #1527by alphastar1111
1.2026
PR #1536by dexhunter
1.0775
PR #1556by sidhanth97
1.4352
PR #1557by ndokutovich
1.0773
PR #1559by adityasasidhar
1.2498
PR #1561by EthanYangTW
1.0783
PR #1568by yuitokyouni
1.1639
PR #1577by redefine-qbit
1.4016
PR #1581by aiejvn
1.2321
PR #1617by adityasasidhar
1.2192
PR #1626by dexhunter
1.0719
PR #1630by KevinChunye
1.1412
PR #1647by powerpratik
1.0616
PR #1649by joyceyan
1.1271
PR #1666by mrbese
1.1531
PR #1715by G3sparky
1.0809
PR #1722by deborahnelson8788726
0.6580
PR #1751by Pravin-dev06
1.3565

Hyperparameters Across PRs

pr_numberparameters
63{"hidden_size":1344,"multiplier":2.625}
65{"multiplier":3,"hidden_dim":1536}
66{"hidden":1536,"multiplier":3}
69{"layers":9,"hidden_dim":1536,"vocab_size":1024,"dim":512,"gqa_heads":8,"kv_heads":4}
70{"mlp_mult":3,"hidden_size":1536}
76{"multiplier":3}
81{"mlp_mult":3}
86{"mlp_mult":3,"hidden":1536}
88{"MLP_HIDDEN":1536}
89{"hidden_dim":1536}
99{"mlp_mult":3,"num_layers":9,"model_dim":512,"num_heads":8,"num_kv_heads":4,"tie_embeddings":1}
102{"mlp_mult":3,"hidden_dim":1536}
107{"hidden_size":1488}
108{"layers":11,"hidden_dim":1536}
110{"layers":9,"ffn_schedule":[768,960,1152,1344,1536,1728,1920,2112,2304]}
114{"mlp_hidden":1536,"default_mlp_hidden":1024}
116{"hidden_size":1536,"mlp_mult":3}
117{"multiplier":3}
120{"hidden_dim":1536,"layers":9,"model_dim":512,"num_heads":8,"num_kv_heads":4}
122{"hidden_dim":1536,"multiplier":3}
123{"multiplier":3,"hidden_size":1536}
128{"mlp_mult":3,"hidden":1536}
135{"hidden_dimension":1536}
137{"hidden":1536,"mlp_mult":3}
139{"mlp_multiplier":3,"hidden_dim":2304}
150{"hidden":1536}
156{"dimensions":1536}
160{"mlp_mult":3}
162{"hidden":1536}
164{"hidden_size":1536}
172{"mlp_mult":3,"model_dim":512,"layers":9,"heads":8,"kv_heads":4}
173{"hidden_size":1536,"base_hidden_size":1024}
175{"expansion":3,"hidden_dim":1536,"negative_slope":0.5}
178{"hidden":1536}
180{"hidden":1536}
181{"mlp_mult":3}
187{"multiplier":3}
190{"mlp_hidden":1344}
191{"mlp_mult":3}
192{"multiplier":3}
194{"multiplier":3}
196{"mlp_mult":3}
198{"hidden_size":1536}
201{"hidden":1536}
206{"hidden_size":1536}
207
208{"layers":11}
212{"hidden":1536}
215{"mlp_mult":3}
218{"layers":3}
219{"hidden":1536}
222{"mlp_mult":3,"hidden_size":1536,"num_layers":10,"model_dim":512,"num_heads":8,"num_kv_heads":4}
223{"hidden":1536}
225{"layers":11,"dimensions":512,"mlp_hidden":1024}
230{"hidden_dim":1536,"multiplier":3}
232{"layers":11,"hidden_dim":1536}
236{"hidden":1536}
238{"multiplier":3}
243{"hidden":1536}
246{"hidden":1536}
249{"hidden":1536}
251{"mlp_mult":3}
254{"layers":11,"hidden_dim":1536,"heads":8,"kv_heads":4}
264{"multiplier":3,"hidden_size":1536}
265{"hidden_dim":1536}
267{"hidden_size":1536}
273{"multiplier":3}
274{"hidden_size":1536}
278
281{"layers":11,"model_dim":512,"heads":8,"kv_heads":4,"mlp_hidden":1536}
287{"hidden_size":1536}
289{"hidden":1536,"multiplier":3}
294
295{"hidden_size":1536}
296{"hidden":1536}
297{"hidden":1536,"model_dim":512,"layers":9}
302{"layers":3}
303
304{"hidden_size":1536}
305{"hidden":1536}
306{"expansion":3,"hidden_dim":1536}
307{"multiplier":3}
312{"mlp_mult":3}
316
317
324{"layers":10,"hidden":1536,"mlp_expansion":3}
326{"layers":10,"hidden":1536}
327{"expansion":3}
330{"hidden_size":1536}
331{"layers":10,"hidden":1536}
333{"multiplier":2.75,"hidden_size":1408}
334
338{"expansion":3}
339
351{"multiplier":3}
352{"mlp_multiplier":3}
359{"layers":11,"width":512}
361{"expansion_ratio":3}
362{"mlp_multiplier":3,"hidden_dim":1536}
364{"layers":10}
366{"hidden_dim":1536}
368{"layers":10,"dim":512,"mlp_multiplier":3}
369{"expansion":3}
370{"hidden":1536}
374{"multiplier":3}
375{"multiplier":3}
376{"hidden":1536}
379{"expansion":3}
383{"expansion":3}
385{"mlp_mult":3,"hidden_size":1536}
388
389
390{"expansion":3}
398{"hidden":1536}
400{"multiplier":3}
401{"multiplier":3}
403
406
410{"layers":3}
414{"expansion":3}
415
418{"expansion":3}
422{"hidden":1536}
424
433{"expansion":3}
442{"hidden":1536}
443{"multiplier":3}
444{"mlp_mult":3}
445
446{"layers":7,"width":512,"hidden":1536,"attention_heads":8,"kv_heads":4}
447{"multiplier":3,"hidden_dim":1536}
448{"mlp_mult":3}
450{"hidden":1536}
453{"hidden_size":1536}
454{"hidden_dim":1536}
455{"expansion":3}
456{"expansion":3}
458{"layers":3}
461{"expansion":3}
465{"hidden":1536}
466{"multiplier":3}
467{"multiplier":3}
469{"hidden":1728}
473{"multiplier":3}
474
481{"multiplier":3}
483{"mlp_mult":3}
485{"hidden_size":1536}
486
487{"multiplier":3}
488{"layers":11,"model_dim":512,"num_heads":8,"num_kv_heads":4,"mlp_mult":3}
489
499{"mlp_multiplier":4,"hidden_size":2560}
508{"expansion":3}
510{"expansion_factor":3,"activation":"relu²"}
512{"hidden_size":1536}
515{"layers":10,"mlp_hidden":1536,"embedding_dim":512,"GQA_heads":"8/4"}
516{"multiplier":3}
518
525
528
529
533{"expansion":3}
546{"hidden":1536,"baseline_hidden":1024}
547{"mlp_hidden_units":1536,"expansion_factor":3}
548{"layers":10,"d_model":512,"heads":8,"kv_heads":4}
549{"layers":3}
550{"expansion_factor":3}
555
560{"layers":10}
563{"expansion_factor":3,"hidden_dim":1536,"activation":"ReLU²"}
564{"expansion_factor":3,"activation":"relu-squared"}
568{"hidden_size":1536}
573{"hidden_dim":1536}
580{"MLP_MULT":3}
583{"multiplier":3,"hidden_dim":1536}
585{"multiplier":3.5,"hidden_dim":1792}
586{"expansion":3}
589{"layers":3}
593{"layers":3}
595{"layers":10,"d_model":512,"heads":8,"kv_heads":4,"mlp_multiplier":3}
596{"expansion":3}
598{"hidden":1536,"activation":"ReLU²"}
605
612{"hidden_dim":1536}
614
618{"mlp_mult":3}
619{"MLP_MULT":3,"NUM_LAYERS":10}
622{"hidden":1536}
633{"hidden_dim":1536}
642
644{"expansion_factor":3,"hidden_dim":1536,"activation":"ReLU²"}
645
659
666{"hidden":2304}
671{"hidden_units":1536}
678{"mlp_mult":3,"hidden":1536}
681{"activation":"LeakyReLU(0.5)^2"}
682{"mlp_mult":3}
684
685
688{"activation":"LeakyReLU(0.5)^2"}
690{"layers":3}
692{"layers":3}
693{"layers":3,"activation":"LeakyReLU(0.5)^2"}
698{"multiplier":3}
700{"multiplier":3.5}
705{"hidden_multiplier":3,"hidden_dim":1536}
709{"hidden_size":1536}
710{"expansion":3}
713{"layers":10,"dim":512}
714
715{"multiplier":3}
728{"layers":11}
730{"mlp_mult":4}
734{"multiplier":3}
736{"layers":9,"mlp_multiplier":2}
738{"layers":11,"dimensions":512}
741
744
752{"activation":"LeakyReLU(0.5)^2"}
754{"hidden_dim":1536}
755{"mlp_mult":3,"hidden":1152}
757
760{"multiplier":3}
761{"multiplier":3}
764{"layers":11,"dimensions":512,"gqa":"8/4"}
768
770
771{"expansion":3}
774{"hidden_size_multiplier":3}
777{"activation":"LeakyReLU(0.5)^2"}
778{"multiplier":3}
779
786{"layers":3}
791
794{"layers":3}
795
796{"mlp_multiplier":3}
797
798{"mlp_blocks":3}
799{"mlp_mult":3}
805{"mlp_multiplier":3}
806
807
808
809{"multiplier":3}
810{"hidden_size":1536}
811
827{"multiplier":2}
828{"layers":10,"d_model":512}
836{"expansion":3}
838{"hidden_dim":1536}
840{"multiplier":3}
841{"mlp_layers":3}
843{"multiplier":3}
849{"hidden":1792}
850{"expansion":3,"hidden":1536}
851{"multiplier":3.5}
854{"layers":9,"model_dim":512,"heads":8,"kv_heads":4,"ffn_hidden_dim":1536}
874{"mlp_multiplier":3}
880{"multiplier":3.5}
891
892
896{"multiplier":3}
900
907{"mlp_dim":1536}
915{"layers":3}
924
925{"multiplier":3}
926{"expansion":3}
929{"layers":9,"hidden_size":1536,"mlp_mult":2}
933{"hidden_size":768}
940
941{"multiplier":3.5}
945{"multiplier":3.5}
947{"mlp_mult":3.2}
953{"expansion":3.5}
967{"expansion":3.5}
972
978
979{"expansion":3}
982{"hidden_multiplier":3}
988{"multiplier":2.5}
989{"multiplier":3,"hidden_dim":1536}
993{"multiplier":3,"activation":"ReLU²"}
995{"multiplier":3.5}
996
997{"hidden":1536}
999
1002
1005
1007{"expansion":3}
1009{"layers":3}
1025{"multiplier":3}
1026
1048{"hidden":1536}
1051{"negative_slope":0.5}
1059{"mlp_mult":3}
1062
1068{"mlp_mult":3}
1071{"hidden_dim":1536}
1080{"multiplier":3}
1085{"expansion_factor":3}
1086{"expansion":3}
1088{"layers":3}
1095{"multiplier":3}
1105{"multiplier":3}
1106
1108{"expansion":3}
1112{"expansion":3}
1125{"multiplier":3}
1127{"hidden_size":1536}
1130
1139{"hidden_dim":1920}
1141{"mlp_multiplier":3}
1142{"multiplier":3}
1144{"multiplier":3}
1148
1167{"mlp_mult":3}
1170{"multiplier":3}
1174{"num_layers":5,"model_dim":512,"mlp_mult":4,"num_heads":8,"num_kv_heads":4,"bigram_vocab_size":4096,"bigram_dim":1024}
1183
1185
1186{"multiplier":3.25}
1190{"layers":10,"mlp_expansion":3}
1202{"multiplier":3}
1209
1216{"multiplier":3,"activation":"LeakyReLU"}
1218{"mlp_mult":4}
1226{"hidden":1536}
1227{"multiplier":2}
1228
1240
1254{"expansion":3}
1256
1276{"activation":"LeakyReLU","activation_slope":0.5}
1280
1298{"activation":"LeakyReLU²"}
1312{"multiplier":3}
1313{"layers":3}
1321{"layers":3}
1337{"multiplier":3}
1359{"multiplier":3}
1361{"activation":"LeakyReLU²"}
1366
1368{"hidden":1536}
1378{"layers":11,"mlp_multiplier":3,"hidden_size":1536}
1379{"multiplier":3}
1411{"mlp_layers":3,"activation":"ReLU²"}
1417{"expansion":3}
1422{"multiplier":3}
1431{"multiplier":3.5,"hidden_dim":1792}
1436
1444
1446{"width":1536}
1453{"multiplier":3,"hidden_dim":2640}
1463{"hidden":1536}
1473{"width_multiplier":3}
1479{"activation":"LeakyReLU","multiplier":3}
1488
1492{"multiplier":4}
1505{"mlp_multiplier":3,"hidden_dim":1536}
1508{"mlp_multiplier":3,"activation":"LeakyReLU"}
1527{"multiplier":3}
1536{"expansion":4}
1556{"multiplier":3}
1557{"layers":11,"dimensions":512,"heads":8,"kv_heads":4,"mlp_multiplier":4}
1559{"multiplier":3}
1561{"hidden_dim":2048}
1568{"expansion":3}
1577{"mlp_mult":1.9}
1581{"mlp_mult":3}
1617{"multiplier":3}
1626{"hidden_multiplier":4}
1630{"multiplier":3}
1647{"multiplier":4}
1649
1666{"multiplier":3}
1715{"multiplier":4}
1722{"hidden_multiplier":3}
1751