← Back to Architecture
weight tying
ArchitectureUsed in
288 PRs
Best BPB
0.0180
Avg BPB
1.1341
Submissions
PR #5by albertorkive
1.2244PR #41by kiankyars
1.2296PR #50by mattqlf
1.1925PR #54by TheCause
1.5283PR #144by DJLougen
1.3517PR #197by machdragon
1.1893PR #212by mrdavtan
1.1329PR #236by saml212
1.1400PR #244by simon-marcus
1.2064PR #272by simon-marcus
1.2427PR #276by riatzukiza
1.6577PR #284by DanishjeetSingh
1.4106PR #294by sseanliu
1.1645PR #302by JackYoung27
1.1520PR #305by Naazimsnh02
1.1672PR #309by NewyorkDev
1.1914PR #310by vishesh9131
1.1787PR #319by Arth-Singh
1.2716PR #321by andreanjos
1.1864PR #331by Rhodrium
1.1487PR #355by josusanmartin
1.1929PR #357by adityagupta26
1.1928PR #366by shivnarainms22
1.1574PR #391by NishantDahal
1.2374PR #395by NishantDahal
1.2374PR #398by felipe-parodi
1.1213PR #399by abaybektursun
1.1247PR #422by albertorkive
1.1396PR #433by Robby955
1.3441PR #442by sjp611
1.1027PR #457by carlesonielfa
1.1839PR #462by JoeProAI
1.0672PR #476by aquemy
1.4574PR #483by tmustier
1.1346PR #510by SelfAnush
1.1989PR #532by NotADevIAmaMeatPopsicle
1.0487PR #536by jaksenc
1.5140PR #548by LoquiAuris
1.0865PR #557by hypery11
1.1160PR #564by sadeghja1070
1.1270PR #585by EthanYangTW
1.1179PR #618by 0xtigerclaw
1.4702PR #633by MatoTeziTanka
1.1526PR #648by maorinka
1.1428PR #668by Christopher-Lee-McClendon
1.0920PR #671by keshav55
1.1807PR #713by hypery11
1.1180PR #749by FyeJordy
1.3684PR #755by dcrow85
1.0321PR #767by RichiiiTV
0.9209PR #768by mradassaad
1.1201PR #773by siddhantparadox
1.1532PR #783by petergpt
1.1171PR #785by SirSaltySalmon
1.5364PR #786by shinegami-2002
0.8128PR #830by zlxi02
1.4096PR #832by jfprincz
1.1903PR #869by THUQiXuan
0.1290PR #871by greqone
0.8004PR #875by shalyhinpavel
1.0226PR #890by sofiabod
0.4405PR #892by robbiebusinessacc
1.1428PR #895by iverbovoy
1.0889PR #903by CiprianFlorin-Ifrim
1.2064PR #904by anthony-maio
1.2734PR #905by anthony-maio
1.8587PR #907by resouer
0.0960PR #908by albertorkive
1.1734PR #909by sunnypatneedi
0.8609PR #913by RoyiRa
0.0887PR #918by haikosys
0.1653PR #920by CiprianFlorin-Ifrim
1.1539PR #921by TimPietrusky
0.0939PR #923by CiprianFlorin-Ifrim
1.1090PR #929by andreanjos
1.1653PR #933by haikosys
0.0804PR #937by mihir-s-05
1.4457PR #961by callithyia
0.0881PR #962by AnirudhRahul
0.0214PR #963by sunnypatneedi
0.8609PR #965by Adam-Jacuch
1.1184PR #967by dexhunter
1.0450PR #969by dnldsz
1.2907PR #970by dnldsz
1.2907PR #972by Idan3011
0.3922PR #976by Vibes-me
1.2058PR #978by AnirudhRahul
1.5134PR #979by 0xadvait
1.1387PR #981by BurguerJohn
1.4893PR #986by sofiabod
0.0830PR #989by alexanderaperry-arch
1.1402PR #990by newjordan
0.7614PR #992by TimS-ml
1.4054PR #993by aerosta
0.9631PR #994by singhaikshitijjain
1.4315PR #996by Idan3011
1.1478PR #997by randy06122001-boop
1.4182PR #1001by ibarrajo
1.1188PR #1016by ADIITJ
1.1269PR #1022by aramdov
1.1646PR #1030by sofiabod
0.1130PR #1034by Jeneesh1014
1.7195PR #1036by ivanontech
1.1974PR #1038by Vibes-me
1.2058PR #1040by JoeProAI
1.1336PR #1041by JoeProAI
1.1356PR #1044by greqone
1.8989PR #1046by Jayteare
1.2174PR #1048by mrdavtan
1.1724PR #1050by Taleef7
1.1194PR #1055by sanyalsunny111
0.9693PR #1056by sofiabod
0.0180PR #1057by Programmerryoki
1.2201PR #1059by edidisheng
1.1996PR #1063by SHN2004
1.3321PR #1065by rithunkp
1.1536PR #1074by ldh-at
1.3288PR #1088by serdardoesml
1.2542PR #1095by vimeto
0.0905PR #1096by vimeto
1.3342PR #1097by danielxmed
1.3355PR #1106by agalimova
1.1465PR #1107by mradassaad
1.5633PR #1110by gowtham0992
1.2249PR #1123by sisegod
1.1986PR #1126by AnirudhRahul
1.1091PR #1142by ymrohit
1.1493PR #1152by ericdatum
1.7942PR #1159by JDAppleseed
0.3693PR #1165by brandonpf
1.2314PR #1166by Christopher-Lee-McClendon
1.1347PR #1170by Christopher-Lee-McClendon
1.1199PR #1180by estesryan
1.0577PR #1185by skoustav35
0.9641PR #1193by dentity007
1.4390PR #1198by ymrohit
1.5992PR #1212by Gusanidas
1.1108PR #1221by amabito
1.1915PR #1226by Wolfie8935
1.1428PR #1227by himanshudongre
1.4841PR #1232by Christopher-Lee-McClendon
1.0929PR #1235by maksblu
1.3527PR #1236by ibarrajo
1.1179PR #1241by aiejvn
0.9901PR #1242by Campbellb
1.0903PR #1245by mkenney2
1.1470PR #1254by Elarwei001
1.1070PR #1258by jorge-asenjo
1.3874PR #1261by Aniket-pd
1.3029PR #1263by xexyz
0.9354PR #1278by GitGeeks
1.1147PR #1282by newjordan
1.1035PR #1284by tyrel-beede
1.1207PR #1286by newjordan
1.0963PR #1287by dentity007
1.1048PR #1293by 5en5e1
1.2409PR #1299by Ribin545
1.8184PR #1300by Ribin545
1.8184PR #1307by amrayach
1.1101PR #1308by newjordan
1.1364PR #1311by htrung1105
1.1303PR #1312by adi-suresh01
1.3299PR #1315by andrewmouldon
1.2207PR #1322by newjordan
1.0854PR #1335by WeijieChen2017
1.1948PR #1337by sergimichi
1.2079PR #1337by sergimichi
1.2079PR #1339by bigbag
1.0955PR #1342by nicholasbailey87
1.4816PR #1347by shasank0001
1.3038PR #1355by mradassaad
1.1526PR #1361by jorge-asenjo
1.1220PR #1370by Christopher-Lee-McClendon
1.0030PR #1381by X-Abhishek-X
1.1604PR #1383by nirmathur
1.3151PR #1384by iverbovoy
1.1441PR #1387by Muhammad-Ahmed-Rayyan
1.2919PR #1388by CiprianFlorin-Ifrim
1.5390PR #1400by tmancino
1.1035PR #1403by Rhoahndur
1.3485PR #1410by izlley
1.1158PR #1411by Blakethefn
1.5568PR #1413by dexhunterRECORD
1.0828PR #1417by BruhTheMomentum
1.3039PR #1421by X-Abhishek-X
1.0925PR #1422by swapp1990
1.1172PR #1431by Idan3011
1.1266PR #1434by ranausmanai
1.5207PR #1435by AbhayAnandUCSD
1.0980PR #1436by DevWizard-Vandan
1.5546PR #1437by dexhunter
1.0780PR #1440by Mertyandimata
1.1026PR #1444by hypnoastic
1.3081PR #1445by X-Abhishek-X
1.0889PR #1450by andrewbaggio1
1.0848PR #1461by viasky657
0.4118PR #1465by sisegod
1.1381PR #1471by X-Abhishek-X
1.0866PR #1472by trhgbao
1.2066PR #1478by jxgod
1.1995PR #1479by andrewbaggio1
1.1450PR #1481by Cayton-Tech
1.3440PR #1486by AlirezaAlampour
1.6656PR #1489by joshkmartinez
1.0736PR #1492by bigbag
1.0810PR #1493by bigbagRECORD
1.0810PR #1501by SPThole
1.1159PR #1502by SPThole
1.1147PR #1505by Rohan-Abhilash
1.1791PR #1508by jpfeiffe
1.1135PR #1512by Itssshikhar
1.1117PR #1514by dexhunter
1.0798PR #1515by dexhunter
1.0872PR #1520by taka6745
1.0824PR #1527by alphastar1111
1.2026PR #1533by aryanbhosale
1.0790PR #1536by dexhunter
1.0775PR #1539by translatingthename
1.0587PR #1542by negrurv
1.5363PR #1543by PavelPaha
1.3286PR #1544by Abhishek8108
1.0283PR #1545by Abhishek8108
1.0283PR #1546by SPThole
1.0850PR #1547by adityasasidhar
1.1928PR #1549by dljr-github
1.3220PR #1550by translatingthename
1.0587PR #1555by andrewbaggio1
1.0764PR #1560by dexhunter
1.0741PR #1562by joshkmartinez
1.0205PR #1565by Idan3011
1.1036PR #1568by yuitokyouni
1.1639PR #1570by yufang67
1.0970PR #1574by KRGulaj
1.3587PR #1579by Tonyy1977
1.1372PR #1583by codemath3000
1.0801PR #1584by codemath3000
1.0752PR #1585by codemath3000
1.0639PR #1585by codemath3000
1.0639PR #1586by dexhunter
1.0749PR #1602by SPThole
1.0744PR #1606by AlirezaAlampour
1.3969PR #1608by User123331
1.3921PR #1612by seekerPrice
1.5096PR #1616by Vickyrrrrrr
1.4100PR #1617by adityasasidhar
1.2192PR #1619by AVINASH0052
1.1156PR #1623by divagr18
1.1942PR #1624by joshkmartinez
1.0585PR #1627by mike-ferguson
1.3246PR #1629by channyzf6
1.0829PR #1633by joshkmartinez
1.0585PR #1640by thestbobo
1.1412PR #1643by mradassaad
1.1473PR #1650by Jaredcastorena
1.4233PR #1658by AVINASH0052
1.0810PR #1661by anderamondarainh-stack
1.1444PR #1665by mrbese
1.3571PR #1670by dexhunter
1.0597PR #1672by andrewbaggio1
1.0119PR #1676by aazizyan
1.0788PR #1683by yunoshev
1.1280PR #1688by Buld1n
1.0809PR #1689by chris-colinsky
1.0822PR #1691by AVINASH0052
1.2244PR #1693by dexhunter
1.0573PR #1695by X-Abhishek-X
1.0759PR #1696by kings-crown
1.1224PR #1699by lsb
1.4831PR #1704by Buld1n
1.0976PR #1709by Bananakin1
1.1470PR #1714by Anakintano
1.0857PR #1715by G3sparky
1.0809PR #1720by kiyoaki
1.0818PR #1722by deborahnelson8788726
0.6580PR #1723by SlavH
0.5116PR #1724by Unwindology
1.1803PR #1725by teslaeco
1.0813PR #1728by mikeapedia
1.0771PR #1731by Victory963
1.0785PR #1735by AjAnubolu
1.0429PR #1737by sakthivarshans
1.0723PR #1738by alertcat
1.0354PR #1748by elad-simbalista
1.2098PR #1749by gracebml
1.0996PR #1753by Abhishek-Dalvi410
1.2917PR #1755by OE-GOD
1.0746PR #1756by romeerp
1.0651PR #1759by yijieyuan
1.0799Hyperparameters Across PRs
| pr_number | parameters |
|---|---|
| 5 | {"physical_layers":null,"logical_layers":null} |
| 41 | — |
| 50 | — |
| 54 | {"recurrent_passes":3} |
| 144 | — |
| 197 | — |
| 212 | — |
| 236 | — |
| 244 | — |
| 272 | — |
| 276 | — |
| 284 | — |
| 294 | — |
| 302 | — |
| 305 | {"dim":512} |
| 309 | — |
| 310 | — |
| 319 | {"unique_layers":5,"loops":3} |
| 321 | — |
| 331 | — |
| 355 | — |
| 357 | — |
| 366 | — |
| 391 | — |
| 395 | — |
| 398 | — |
| 399 | — |
| 422 | — |
| 433 | {"shared_blocks":3,"virtual_layers":9,"lora_rank":8} |
| 442 | — |
| 457 | — |
| 462 | — |
| 476 | — |
| 483 | — |
| 510 | — |
| 532 | — |
| 536 | — |
| 548 | — |
| 557 | — |
| 564 | — |
| 585 | {"layers":[9,10]} |
| 618 | — |
| 633 | — |
| 648 | {"num_loops":2} |
| 668 | — |
| 671 | — |
| 713 | {"vocab_size":1024} |
| 749 | — |
| 755 | — |
| 767 | — |
| 768 | {"layers":[5,6,7,8,9,10]} |
| 773 | {"tie_embeddings":1} |
| 783 | {"layers":11,"bigram_vocab_size":1536,"cache_layer":7} |
| 785 | — |
| 786 | — |
| 830 | — |
| 832 | — |
| 869 | — |
| 871 | — |
| 875 | — |
| 890 | — |
| 892 | — |
| 895 | — |
| 903 | {"vocab_size":8192} |
| 904 | — |
| 905 | — |
| 907 | — |
| 908 | — |
| 909 | — |
| 913 | — |
| 918 | — |
| 920 | — |
| 921 | — |
| 923 | — |
| 929 | — |
| 933 | — |
| 937 | — |
| 961 | — |
| 962 | — |
| 963 | — |
| 965 | — |
| 967 | — |
| 969 | — |
| 970 | — |
| 972 | — |
| 976 | — |
| 978 | — |
| 979 | — |
| 981 | — |
| 986 | — |
| 989 | — |
| 990 | — |
| 992 | — |
| 993 | — |
| 994 | — |
| 996 | — |
| 997 | {"vocab_size":1024} |
| 1001 | — |
| 1016 | — |
| 1022 | — |
| 1030 | — |
| 1034 | — |
| 1036 | — |
| 1038 | — |
| 1040 | {"tied":false} |
| 1041 | {"tie_embeddings":false} |
| 1044 | — |
| 1046 | — |
| 1048 | — |
| 1050 | — |
| 1055 | — |
| 1056 | — |
| 1057 | — |
| 1059 | — |
| 1063 | — |
| 1065 | — |
| 1074 | — |
| 1088 | — |
| 1095 | — |
| 1096 | — |
| 1097 | — |
| 1106 | — |
| 1107 | — |
| 1110 | {"shared_blocks":3} |
| 1123 | — |
| 1126 | — |
| 1142 | — |
| 1152 | — |
| 1159 | — |
| 1165 | — |
| 1166 | — |
| 1170 | — |
| 1180 | — |
| 1185 | — |
| 1193 | — |
| 1198 | — |
| 1212 | — |
| 1221 | — |
| 1226 | — |
| 1227 | — |
| 1232 | — |
| 1235 | — |
| 1236 | — |
| 1241 | — |
| 1242 | — |
| 1245 | — |
| 1254 | — |
| 1258 | — |
| 1261 | — |
| 1263 | — |
| 1278 | — |
| 1282 | — |
| 1284 | — |
| 1286 | — |
| 1287 | — |
| 1293 | — |
| 1299 | — |
| 1300 | {"steps":1} |
| 1307 | — |
| 1308 | {"flat_layers":9,"crawler_layers":1,"crawler_loops":2} |
| 1311 | — |
| 1312 | — |
| 1315 | {"layers":9,"learned_basis_matrices":3,"fixed_random_projections":512} |
| 1322 | — |
| 1335 | {"vocab":1024} |
| 1337 | {"layers":24,"groups":2,"virtual_layers":24,"degree_attn":5,"degree_ffn":2} |
| 1337 | {"embedding_projection":[1024,128,512]} |
| 1339 | {"vocab_size":2048} |
| 1342 | — |
| 1347 | {"layers":15,"model_dim":512,"num_heads":8,"num_kv_heads":4,"mlp_mult":3} |
| 1355 | — |
| 1361 | — |
| 1370 | — |
| 1381 | — |
| 1383 | {"shared_pairs":["3-4","9-10"]} |
| 1384 | — |
| 1387 | {"shared_blocks":2} |
| 1388 | — |
| 1400 | — |
| 1403 | — |
| 1410 | — |
| 1411 | — |
| 1413 | — |
| 1417 | — |
| 1421 | — |
| 1422 | — |
| 1431 | — |
| 1434 | — |
| 1435 | — |
| 1436 | — |
| 1437 | — |
| 1440 | — |
| 1444 | — |
| 1445 | — |
| 1450 | — |
| 1461 | — |
| 1465 | — |
| 1471 | — |
| 1472 | — |
| 1478 | — |
| 1479 | — |
| 1481 | {"vocab_size":1024,"model_dim":512,"bottleneck_ranks":[64,128]} |
| 1486 | — |
| 1489 | — |
| 1492 | — |
| 1493 | — |
| 1501 | — |
| 1502 | — |
| 1505 | — |
| 1508 | {"vocab_size":4096} |
| 1512 | — |
| 1514 | — |
| 1515 | — |
| 1520 | — |
| 1527 | — |
| 1533 | — |
| 1536 | — |
| 1539 | — |
| 1542 | — |
| 1543 | — |
| 1544 | — |
| 1545 | — |
| 1546 | {"embed_dim":416,"model_dim":512} |
| 1547 | — |
| 1549 | — |
| 1550 | — |
| 1555 | — |
| 1560 | — |
| 1562 | — |
| 1565 | — |
| 1568 | {"unique_blocks":6,"passes":2,"effective_layers":12,"d_model":672} |
| 1570 | — |
| 1574 | — |
| 1579 | — |
| 1583 | — |
| 1584 | — |
| 1585 | — |
| 1585 | — |
| 1586 | — |
| 1602 | — |
| 1606 | — |
| 1608 | — |
| 1612 | — |
| 1616 | — |
| 1617 | — |
| 1619 | — |
| 1623 | — |
| 1624 | — |
| 1627 | — |
| 1629 | — |
| 1633 | — |
| 1640 | — |
| 1643 | — |
| 1650 | — |
| 1658 | — |
| 1661 | — |
| 1665 | {"ngroups":1} |
| 1670 | — |
| 1672 | — |
| 1676 | — |
| 1683 | — |
| 1688 | — |
| 1689 | — |
| 1691 | — |
| 1693 | — |
| 1695 | — |
| 1696 | — |
| 1699 | {"vocab_size_plus_mask":1025} |
| 1704 | — |
| 1709 | — |
| 1714 | — |
| 1715 | — |
| 1720 | — |
| 1722 | — |
| 1723 | — |
| 1724 | — |
| 1725 | — |
| 1728 | — |
| 1731 | — |
| 1735 | — |
| 1737 | — |
| 1738 | — |
| 1748 | — |
| 1749 | — |
| 1753 | {"tied_embeddings":1} |
| 1755 | — |
| 1756 | — |
| 1759 | — |