← Back to Architecture
depth recurrence
ArchitectureUsed in
357 PRs
Best BPB
0.1156
Avg BPB
1.1444
Submissions
PR #30by JackYoung27
1.2663PR #31by JackYoung27
1.2663PR #54by TheCause
1.5283PR #79by Marvbuster
1.8698PR #91by koushikkethamakka
1.5890PR #103by MatthewHRockwell
1.5000PR #104by gwelinder
1.3358PR #126by Athenox14
1.7510PR #144by DJLougen
1.3517PR #148by iverbovoy
1.2196PR #184by Idan3011
1.1855PR #187by Idan3011
1.1629PR #212by mrdavtan
1.1329PR #213by estesryan
1.6004PR #216by alons23
0.8100PR #288by trasnake87
1.2334PR #294by sseanliu
1.1645PR #298by MrINVISO
1.2271PR #319by Arth-Singh
1.2716PR #340by starfly-web
1.2182PR #341by tobiascanavesi
1.3323PR #345by anandks2006
1.8522PR #386by Sambhav242005
1.4061PR #432by jadechip
1.5295PR #456by Christopher-Lee-McClendon
1.1532PR #459by mer2234
1.1490PR #459by mer2234
1.1490PR #459by mer2234
1.1490PR #461by Christopher-Lee-McClendon
1.1446PR #495by SergiuDeveloper
1.2092PR #526by Christopher-Lee-McClendon
1.1425PR #530by j420
1.4963PR #537by Christopher-Lee-McClendon
1.1387PR #552by loveless2001
1.1634PR #559by Parswanadh
1.5348PR #596by AriaAnima
0.6430PR #648by maorinka
1.1428PR #686by msisovic
1.1182PR #697by Danishlynx
1.1194PR #733by stukenov
1.0278PR #739by Jonas-T5
1.5000PR #745by stukenov
1.0222PR #752by Naazimsnh02
1.1182PR #784by iverbovoy
1.2065PR #808by Naazimsnh02
0.6364PR #822by henrycashe26
1.2604PR #835by iverbovoy
1.1980PR #855by aazizyan
1.2659PR #856by iverbovoy
1.1454PR #857by aruniyer
1.1093PR #895by iverbovoy
1.0889PR #900by Robby955
0.1156PR #927by Tonyy1977
1.1696PR #990by newjordan
0.7614PR #1032by wfproc
1.3631PR #1033by Naazimsnh02
0.4311PR #1047by newjordan
0.8822PR #1083by newjordan
0.4961PR #1088by serdardoesml
1.2542PR #1096by vimeto
1.3342PR #1110by gowtham0992
1.2249PR #1186by andrewbaggio1
0.9850PR #1193by dentity007
1.4390PR #1203by raider99k
1.3557PR #1204by msisovicRECORD
1.1063PR #1230by nestamidavaine
1.1163PR #1231by nestamidavaine
1.1163PR #1239by tmancino
1.5918PR #1245by mkenney2
1.1470PR #1255by akaiHuang
1.5080PR #1260by dexhunter
1.0929PR #1274by MatoTeziTanka
1.0876PR #1278by GitGeeks
1.1147PR #1279by dexhunter
1.0924PR #1283by newjordan
1.1373PR #1285by dexhunterRECORD
1.0912PR #1289by MatoTeziTanka
1.0819PR #1290by aryanbhosale
1.1104PR #1293by 5en5e1
1.2409PR #1296by aryanbhosale
1.0926PR #1299by Ribin545
1.8184PR #1312by adi-suresh01
1.3299PR #1323by sohv
1.1247PR #1326by aryanbhosale
1.0896PR #1331by dexhunter
1.0900PR #1333by aryanbhosale
1.0766PR #1334by aryanbhosaleRECORD
1.0897PR #1338by bigbag
1.0955PR #1339by bigbag
1.0955PR #1344by Omrigotlieb
1.0923PR #1345by shasank0001
1.1763PR #1349by LocalX991
1.3693PR #1352by alientony
1.2450PR #1368by JKSNS
0.8503PR #1381by X-Abhishek-X
1.1604PR #1383by nirmathur
1.3151PR #1384by iverbovoy
1.1441PR #1385by korentomas
1.4465PR #1387by Muhammad-Ahmed-Rayyan
1.2919PR #1389by Rome-1
1.7270PR #1392by Its-Just-Crump
1.1020PR #1394by clarkkevRECORD
1.0856PR #1395by dttdrv
1.0924PR #1396by erichroepke
1.1067PR #1397by Mertyandimata
1.1047PR #1398by Mertyandimata
1.1047PR #1399by AnubhavBharadwaaj
1.0898PR #1400by tmancino
1.1035PR #1406by aamodbhatt
1.0887PR #1412by Robby955RECORD
1.0835PR #1413by dexhunterRECORD
1.0828PR #1415by bigbag
1.0913PR #1416by erichroepke
1.0795PR #1420by abaybektursun
1.0801PR #1421by X-Abhishek-X
1.0925PR #1422by swapp1990
1.1172PR #1423by aryanbhosale
1.0791PR #1435by AbhayAnandUCSD
1.0980PR #1437by dexhunter
1.0780PR #1440by Mertyandimata
1.1026PR #1445by X-Abhishek-X
1.0889PR #1449by codeprakhar25
1.3680PR #1450by andrewbaggio1
1.0848PR #1453by iverbovoy
1.1324PR #1460by resouer
1.0827PR #1465by sisegod
1.1381PR #1467by PhamPhuHoa-23
1.1056PR #1471by X-Abhishek-X
1.0866PR #1472by trhgbao
1.2066PR #1477by aryanbhosaleRECORD
1.0822PR #1482by aamodbhatt
1.0787PR #1485by ndokutovich
1.0679PR #1487by ndokutovich
1.0600PR #1489by joshkmartinez
1.0736PR #1492by bigbag
1.0810PR #1493by bigbagRECORD
1.0810PR #1499by dippatel1994
1.6323PR #1509by Lumi-node
1.1962PR #1514by dexhunterRECORD
1.0798PR #1517by RulinShao
1.0632PR #1518by abaybektursun
1.0788PR #1520by taka6745
1.0824PR #1521by aryanbhosale
1.0802PR #1523by EthanYangTW
1.0778PR #1532by nogakeren
1.0803PR #1533by aryanbhosale
1.0790PR #1534by someone114514
1.0846PR #1535by newjordan
1.0742PR #1536by dexhunter
1.0775PR #1537by pireylow
1.3971PR #1539by translatingthename
1.0587PR #1540by aryanbhosale
1.0777PR #1541by bigbag
1.0778PR #1542by negrurv
1.5363PR #1546by SPThole
1.0850PR #1548by dljr-github
1.3220PR #1549by dljr-github
1.3220PR #1550by translatingthename
1.0587PR #1555by andrewbaggio1
1.0764PR #1557by ndokutovich
1.0773PR #1561by EthanYangTW
1.0783PR #1565by Idan3011
1.1036PR #1569by abbudjoe
1.3576PR #1570by yufang67
1.0970PR #1571by skar07
1.5406PR #1572by anthony-maio
1.0797PR #1573by shivangbaveja
1.1464PR #1577by redefine-qbit
1.4016PR #1579by Tonyy1977
1.1372PR #1582by He-Wenhao
1.3428PR #1583by codemath3000
1.0801PR #1584by codemath3000
1.0752PR #1585by codemath3000
1.0639PR #1586by dexhunter
1.0749PR #1589by nnm2602
1.3223PR #1589by nnm2602
1.3223PR #1589by nnm2602
1.3223PR #1600by sayujshah
1.2781PR #1602by SPThole
1.0744PR #1607by inin-zou
1.4765PR #1607by inin-zou
1.4765PR #1612by seekerPrice
1.5096PR #1614by seekerPrice
1.5096PR #1616by Vickyrrrrrr
1.4100PR #1620by shiawyonglim
1.6644PR #1621by mrbese
1.1531PR #1623by divagr18
1.1942PR #1626by dexhunterRECORD
1.0719PR #1627by mike-ferguson
1.3246PR #1628by yu314-coder
1.1921PR #1629by channyzf6
1.0829PR #1633by joshkmartinez
1.0585PR #1635by PapaFranku4647
1.1063PR #1639by kunwar-vikrant
1.0832PR #1640by thestbobo
1.1412PR #1646by sergeevii123
1.0909PR #1647by powerpratik
1.0616PR #1658by AVINASH0052
1.0810PR #1660by pablinga19
1.0858PR #1661by anderamondarainh-stack
1.1444PR #1662by pablinga19
1.0862PR #1663by pablinga19
1.0862PR #1665by mrbese
1.3571PR #1666by mrbese
1.1531PR #1667by MarioPaerleRECORD
1.0714PR #1670by dexhunter
1.0597PR #1672by andrewbaggio1
1.0119PR #1676by aazizyan
1.0788PR #1688by Buld1n
1.0809PR #1689by chris-colinsky
1.0822PR #1691by AVINASH0052
1.2244PR #1693by dexhunter
1.0573PR #1697by Buld1n
1.0812PR #1700by jorge-asenjo
1.0722PR #1701by Buld1n
1.1016PR #1702by Buld1n
1.1092PR #1703by Buld1n
1.0832PR #1707by nothingLiva
1.0740PR #1714by Anakintano
1.0857PR #1715by G3sparky
1.0809PR #1716by himanshudongre
1.0788PR #1720by kiyoaki
1.0818PR #1725by teslaeco
1.0813PR #1726by krishs0404
1.4689PR #1727by yahya010
1.0722PR #1728by mikeapedia
1.0771PR #1731by Victory963
1.0785PR #1732by Victory963
1.0785PR #1733by G3sparky
1.3262PR #1735by AjAnubolu
1.0429PR #1736by dexhunterRECORD
1.0655PR #1737by sakthivarshans
1.0723PR #1738by alertcat
1.0354PR #1739by DevelopedByAnurag
1.1497PR #1750by teslaeco
1.0809PR #1751by Pravin-dev06
1.3565PR #1754by upascal
1.0881PR #1755by OE-GOD
1.0746PR #1756by romeerp
1.0651PR #1759by yijieyuan
1.0799PR #1760by BrandtChristian
1.1863PR #1762by frido22
1.5200PR #1764by gmn0105
1.2921PR #1766by tashapais
1.0655PR #1769by dexhunterRECORD
1.0645PR #1770by liujshi
1.0796PR #1771by bigbag
1.0651PR #1773by Amanbig
1.1872PR #1776by anmarhindi
1.0808PR #1779by leon2k2k2k
1.0642PR #1780by wisebreadloaf
1.0806PR #1783by ismailntl
1.1716PR #1785by OE-GOD
1.0192PR #1786by sachinnchaudhary
1.0918PR #1794by Programmerryoki
1.0849PR #1795by OE-GOD
0.9516PR #1797by dexhunter
1.0616PR #1798by leon2k2k2k
1.0629PR #1800by leon2k2k2k
1.0629PR #1802by aamodbhatt
1.0771PR #1807by davie2009kh
1.0704PR #1809by PranavViswanath
1.0800PR #1811by peytontolbert
1.2350PR #1812by EthanNing
1.0729PR #1813by djeidy
0.9417PR #1814by suryavanshi
1.0742PR #1817by Tonyy1977
1.0903PR #1820by aiejvn
1.4011PR #1831by Christopher-Lee-McClendon
1.0815PR #1832by sricursion
1.0992PR #1833by pragnyanramtha
0.9069PR #1835by anmarhindi
1.0014PR #1850by someone114514
1.0050PR #1851by aquariouseworkmanRECORD
1.0613PR #1852by G3sparky
1.0282PR #1854by ndokutovich
0.9024PR #1857by dexhunter
1.0322PR #1858by G3sparky
0.9946PR #1864by hardik-bhalekar
1.0805PR #1876by Meirzhan05
1.0801PR #1880by Meirzhan05
1.0775PR #1889by thearmankarapetyan
1.0786PR #1891by peytontolbert
1.2205PR #1893by Hieuabssy
1.0901PR #1894by ChideraIbe123
1.0996PR #1895by VFYAS
1.0785PR #1896by G3sparky
1.1155PR #1901by Karen042009
0.8335PR #1903by GrishaKhumaryan
0.9418PR #1906by AayushBaniya2006
1.0614PR #1911by dttdrv
1.0354PR #1913by Jeffrey-Le
1.0847PR #1914by Fija
1.0612PR #1915by AidenGeunGeun
1.0650PR #1919by dev-pratap-singh
1.0587PR #1924by dexhunter
1.0608PR #1925by simon-marcus
1.0611PR #1929by davie2009kh
0.9457PR #1931by jaydenpiao
1.0759PR #1932by PrzemyslaV88
1.0796PR #1933by deborahnelson8788726
0.9915PR #1934by liujshi
1.0599PR #1936by hilbertmeng
1.0769PR #1938by lijuncheng16
1.0713PR #1943by LoBreeze
1.0818PR #1944by dmitriymyan1
1.2551PR #1947by phfarath
1.0890PR #1948by TimS-ml
1.0624PR #1949by ymrohit
1.1908PR #1950by Christopher-Lee-McClendon
1.0600PR #1952by jayyvk
1.1055PR #1959by remg1997
0.9962PR #1962by chris-colinsky
1.0631PR #1968by Gotnhub
1.0773PR #1969by bsisduck
1.0804PR #1971by BharathSShankar
1.0750PR #1972by BharathSShankar
1.0398PR #1973by harborglowvintage-oss
1.2193PR #1974by harborglowvintage-oss
1.2193PR #1977by sahiee-dev
1.0730PR #1978by EthanYangTW
1.0778PR #1979by Christopher-Lee-McClendon
1.0399PR #1985by yigengjiang
1.1093PR #1987by TimS-ml
1.0618PR #1991by joshuaswanson
0.9429PR #1992by jamesEmerson112
1.0511PR #1994by potatonyliu
1.3004PR #1995by User123331
1.0878PR #2004by corbensorenson
1.3569PR #2005by jamesEmerson112
1.0805PR #2008by Christopher-Lee-McClendon
1.0449PR #2009by SlavH
1.0500PR #2010by Abhishek8108
1.0817PR #2011by BharathSShankar
1.0750PR #2014by simonbissonnetteRECORD
1.0576PR #2022by BharathSShankar
1.0720PR #2026by RahimMirani
1.0611PR #2027by H1cSuNtDr4C0n3S
1.0806PR #2028by Arnie016
1.0898PR #2031by deborahnelson8788726
1.0599PR #2034by Maheshram1
1.0576PR #2037by organic-intelligence-1976
1.2670PR #2038by FF-GardenFn
1.2000PR #2042by FF-GardenFn
1.3641PR #2044by FF-GardenFn
1.3978PR #2046by nprime06
1.0634PR #2051by dexhunter
1.0605PR #2056by FF-GardenFn
1.2646PR #2062by BumaldaOverTheWater94
1.2195PR #2071by jamesEmerson112
1.0066PR #2077by tranphat180603
1.0877PR #2080by NewyorkDev
0.9727PR #2081by maxrubin629
1.1887PR #2082by PrzemyslaV88
1.0668PR #2083by NewyorkDev
0.9418PR #2085by umshahid
1.0857PR #2090by SPThole
1.2310Hyperparameters Across PRs
| pr_number | parameters |
|---|---|
| 30 | {"layers":15,"unique_blocks":5,"loops":3,"dim":768} |
| 31 | {"shared_blocks":5,"loops":3,"effective_layers":15,"dimension":768} |
| 54 | {"unique_layers":7,"recurrent_passes":3,"effective_depth":10} |
| 79 | {"unique_blocks":3,"repeats":3,"effective_depth":9,"dim":1024} |
| 91 | {"unique_layers":3,"recurrence_count":3} |
| 103 | {"unique_layers":5,"virtual_depth":30} |
| 104 | {"num_unique_layers":4,"num_recurrence":3} |
| 126 | {"unique_layers":4,"recurrence_factor":3,"effective_layers":12} |
| 144 | {"layers":8,"width":384,"k_stride":2,"m_stride":4} |
| 148 | {"shared_blocks":3,"repeats":4,"effective_layers":12} |
| 184 | {"passes":2,"effective_layers":15,"physical_layers":10} |
| 187 | {"passes":2,"encoder_layers":5,"decoder_layers":5} |
| 212 | {"shared_blocks":3,"loops":3} |
| 213 | {"model_dim":512,"num_loop_iters":3,"min_loop_iters":1} |
| 216 | {"blocks":4,"recurrences":6,"effective_layers":24} |
| 288 | {"layers":8,"loops":2,"effective_depth":16} |
| 294 | {"unique_blocks":3,"effective_layers":12,"repetitions_per_block":4} |
| 298 | {"unique_layers":3,"passes":3,"effective_depth":9} |
| 319 | {"unique_layers":5,"loops":3,"effective_depth":15,"dim":640} |
| 340 | — |
| 341 | {"unique_entry_layers":1,"shared_blocks":4,"loops":5,"unique_exit_layers":1,"effective_layers":22} |
| 345 | {"loops":4} |
| 386 | {"passes":12,"shared_blocks":1} |
| 432 | — |
| 456 | {"unique_layers":10,"num_layers":10} |
| 459 | {"layers":3,"loops":4} |
| 459 | {"layers":4,"loops":3} |
| 459 | {"layers":2,"loops":6} |
| 461 | {"layers":11,"unique_layers":10} |
| 495 | {"ENCODER_LOOPS":2,"DECODER_LOOPS":2} |
| 526 | {"logical_layers":11,"unique_layers":10} |
| 530 | {"basis_blocks":5,"unrolls":3,"effective_layers":15,"dim":576} |
| 537 | {"layers":11,"unique_layers":10} |
| 552 | {"unique_blocks":8,"loops":2,"effective_layers":16} |
| 559 | {"unique_layers":7,"loops":2,"effective_depth":14} |
| 596 | {"layers":11} |
| 648 | {"unique_layers":6,"encoder_layers":3,"decoder_layers":3,"num_loops":2,"effective_layers":12} |
| 686 | {"recur_layers":[4,5],"physical_layers":11,"virtual_layers":13} |
| 697 | {"repeat_layers":[4,5],"physical_layers":11,"virtual_layers":13} |
| 733 | {"physical_layers":11,"virtual_layers":13,"repeated_layers":[4,5]} |
| 739 | {"unique_blocks":8,"phases":4,"repetitions":5,"effective_depth":40,"width":512} |
| 745 | {"physical_layers":11,"virtual_layers":13,"repeated_layers":[4,5]} |
| 752 | {"layers":[4,5],"physical_layers":11,"virtual_layers":13} |
| 784 | {"blocks":3,"repeats":4,"effective_layers":12,"dim":832} |
| 808 | {"layers":[4,5],"virtual_layers":13,"physical_layers":11} |
| 822 | {"layers":4,"loops":3} |
| 835 | {"blocks":3,"repeats":4,"effective_layers":12} |
| 855 | {"prelude":1,"shared_blocks":4,"loops":3,"coda":1} |
| 856 | {"blocks":3,"repeats":4,"effective_layers":12} |
| 857 | {"layers":5,"effective_layers":15,"unique_blocks":11} |
| 895 | {"shared_blocks":3,"repeats":[2,3,4,5],"effective_layers":15} |
| 900 | {"shared_blocks":3,"loops":3,"total_layers":11} |
| 927 | {"blocks":4,"loops":7,"dim":1024} |
| 990 | {"loops":4,"flat_layers":4} |
| 1032 | {"layers":2,"steps":2} |
| 1033 | {"layers":[4,5]} |
| 1047 | {"layers":4,"crawler_layers":1,"loops":4} |
| 1083 | {"flat_layers":4,"crawler_layers":1,"loops":4} |
| 1088 | {"layers":null} |
| 1096 | {"effective_layers":14,"unique_blocks":6,"model_dim":640,"heads":10,"kv_heads":5,"head_dim":64,"mlp_multiplier":3} |
| 1110 | {"unique_blocks":3,"iterations":4,"effective_layers":12} |
| 1186 | — |
| 1193 | {"layers":12} |
| 1203 | {"schedule":"0,0,1,1","groups":2,"model_dim":768,"num_heads":12,"num_kv_heads":6,"shared_cores":2} |
| 1204 | {"layers":[4,5],"num_layers":11,"start_step":3000,"untie_mlp":true} |
| 1230 | {"stem_layers":4,"core_layers":3,"tail_layers":4,"passes":3} |
| 1231 | {"layers":11,"effective_layers_eval":17,"passes":[1,2,3],"core_layers":[4,5,6]} |
| 1239 | {"blocks":3,"orbits":8} |
| 1245 | {"state_dim":4} |
| 1255 | — |
| 1260 | {"layers":[4,5],"repeat_count":1,"shared_mlp":true} |
| 1274 | {"layers":[4,5]} |
| 1278 | {"layers":4,"iterations":5} |
| 1279 | {"layers":[4,5],"repeat_count":1} |
| 1283 | {"layers":9,"crawler_layers":1,"loops":2} |
| 1285 | {"layers":[4,5]} |
| 1289 | {"layers":[4,5],"start_step":3000} |
| 1290 | {"layers":[4,5],"virtual_layers":13,"physical_layers":11,"start_step":3000} |
| 1293 | {"layers":9,"passes":2} |
| 1296 | {"layers":[4,5],"start_step":3000} |
| 1299 | {"steps":1} |
| 1312 | {"horizons":3} |
| 1323 | {"iterations":22} |
| 1326 | {"layers":[4,5]} |
| 1331 | {"layers":[3,4,5]} |
| 1333 | {"layers":[4,5]} |
| 1334 | {"layers":[4,5],"physical_layers":11,"virtual_layers":13} |
| 1338 | {"layers":[3,4,5],"start_step":3000} |
| 1339 | {"layers":[3,4,5],"start_step":3000} |
| 1344 | {"layers":[3,4,5],"virtual_layers":14,"physical_layers":11} |
| 1345 | {"layers":10,"model_dim":576,"num_heads":8,"num_kv_heads":4,"recurrent_mode":"fixed","recurrent_core_start":3,"recurrent_core_len":2,"recurrent_steps":2} |
| 1349 | {"num_passes":8} |
| 1352 | {"recurrence":1} |
| 1368 | {"layers":[4,5],"virtual_layers":13,"physical_layers":11} |
| 1381 | {"physical_layers":11,"virtual_layers":13,"recurrence_layers":[4,5],"start_step":3000} |
| 1383 | {"physical_blocks":9,"virtual_layers":11,"shared_blocks":["3-4","9-10"]} |
| 1384 | {"layers":3,"repeats":4,"effective_layers":12} |
| 1385 | {"physical_layers":4,"loops":3,"effective_layers":12} |
| 1387 | {"num_recurrences":9} |
| 1389 | {"layers":[4,5]} |
| 1392 | {"layers":[4,5],"start_step":3000} |
| 1394 | {"layers":[4,5],"loops":2} |
| 1395 | {"layers":[4,5],"start_step":3000} |
| 1396 | {"layers":[3,4,5]} |
| 1397 | — |
| 1398 | — |
| 1399 | {"layers":4} |
| 1400 | {"layers":2} |
| 1406 | {"layers":11,"recurrent_layers":[4,5],"effective_passes":13} |
| 1412 | {"phase1_at":0.5,"phase2_at":0.65} |
| 1413 | {"layers":[4,5],"loops":2} |
| 1415 | {"layers":[3,4,5]} |
| 1416 | — |
| 1420 | {"num_loops":3,"virtual_layers":17,"loop_layers":[4,5]} |
| 1421 | {"layers":[4,5],"virtual_layers":13,"activated_step":3000} |
| 1422 | {"layers":13,"unique_blocks":7} |
| 1423 | {"loop":[4,5]} |
| 1435 | {"physical_layers":11,"virtual_layers":13,"repeat_layers":[4,5],"activate_step":3000} |
| 1437 | {"loop_start":3,"loop_end":5} |
| 1440 | {"layers":[4,5],"effective_layers":13} |
| 1445 | {"layers":[3,4,5],"virtual_layers":14,"physical_layers":11,"start_step":2000} |
| 1449 | {"unique_blocks":7,"repeats":2} |
| 1450 | {"layers":[4,5],"loops":3,"virtual_layers":17} |
| 1453 | {"layers":3,"repeats":4,"effective_layers":12} |
| 1460 | {"layers":[4,5],"repeats":2} |
| 1465 | {"unique_layers":9,"recur":2,"effective_layers":18} |
| 1467 | {"layers":[4,5],"start_step":3000} |
| 1471 | {"layers":[3,4,5],"virtual_layers":14} |
| 1472 | {"physical_layers":8,"loops":2,"logical_layers":16} |
| 1477 | {"loop_start":4,"loop_end":5} |
| 1482 | — |
| 1485 | {"layers":3,"physical_layers":11,"virtual_layers":13,"repeat_layers":[3,4,5]} |
| 1487 | {"layers":3,"virtual_layers":13} |
| 1489 | {"loops":2,"start_layer":4,"end_layer":5} |
| 1492 | {"layers":[3,4,5],"virtual_layers":17,"physical_layers":11} |
| 1493 | {"layers":[3,4,5],"virtual_layers":17,"physical_layers":11} |
| 1499 | {"layers":[3,4,5],"repeats":2} |
| 1509 | {"layers":5,"iterations":2,"effective_depth":10} |
| 1514 | {"layers":[3,5],"repeats":2} |
| 1517 | {"layers":3,"start_step":2000,"physical_layers":11,"virtual_layers":14} |
| 1518 | {"LOOP_START":3,"LOOP_END":5,"NUM_LOOPS":2,"passes":3,"loop_blocks":3} |
| 1520 | {"virtual_layers":17,"physical_layers":11} |
| 1521 | {"layers":3} |
| 1523 | {"physical_layers":11,"virtual_layers":17,"loop_layers":[3,4,5],"activation_start":35} |
| 1532 | {"layers":3,"activate_at_frac":0.35,"virtual_layers":17,"physical_layers":11} |
| 1533 | {"layers":17} |
| 1534 | — |
| 1535 | {"flat_layers":7,"crawler_layers":3,"loops":3} |
| 1536 | {"physical_layers":11,"virtual_layers":17,"loops":3} |
| 1537 | {"loop_start":3,"loop_end":5} |
| 1539 | {"layers":3,"virtual_layers":14,"physical_layers":11} |
| 1540 | {"layers":[3,4,5]} |
| 1541 | {"physical_layers":11,"virtual_layers":17} |
| 1542 | {"layers":1,"repeats":9} |
| 1546 | {"layers":3,"num_loops":2} |
| 1548 | {"layers":[3,4,5],"loops":2} |
| 1549 | {"layers":[3,4,5],"passes":3} |
| 1550 | {"physical_layers":11,"virtual_layers":14,"repeat_layers":[3,4,5]} |
| 1555 | {"layers":"3-5","repeats":2,"virtual_layers":17,"physical_layers":11} |
| 1557 | {"layers":[3,4,5]} |
| 1561 | {"physical_layers":11,"virtual_layers":17} |
| 1565 | {"physical_layers":12,"effective_layers":16,"recurrent_layers":2,"repeats":3} |
| 1569 | {"layers":11,"dimension":512,"schedule":"AAAAAPAAAAA"} |
| 1570 | {"layers":[3,4,5],"loops":2,"activated_at_frac":0.35} |
| 1571 | — |
| 1572 | {"layers":[3,4,5],"num_loops":2} |
| 1573 | {"layers":12,"virtual_layers":14,"replayed_layers":[3,4]} |
| 1577 | {"recurrent_loops":1} |
| 1579 | {"blocks":4,"loops":7,"dimension":736} |
| 1582 | {"physical_layers":9,"effective_layers":12,"recurrent_layers":3,"extra_loops":1} |
| 1583 | {"layers":3,"virtual_layers":17,"physical_layers":11} |
| 1584 | {"layers":[3,5],"activated_at_frac":0.35} |
| 1585 | {"layers":[3,5],"activated_at_frac":0.35} |
| 1586 | {"layers":"3-5","loops":2} |
| 1589 | {"layers":[3,4],"repeats":2} |
| 1589 | {"layers":[3,4,5],"repeats":2} |
| 1589 | {"layers":[3,4],"repeats":3} |
| 1600 | {"unique_blocks":5,"iterations":2} |
| 1602 | {"layers":3,"extra_passes":2} |
| 1607 | {"layers":[3,4],"repeats":2,"virtual_layers":12,"physical_layers":8} |
| 1607 | {"start_frac":0.35,"start_step":350} |
| 1612 | {"layers":[3,4,5]} |
| 1614 | {"layers":[3,4,5],"start_step":1500} |
| 1616 | {"layers":4,"recurrent_layers":[2,3,4,5],"virtual_depth":18} |
| 1620 | {"unique_blocks":6,"logical_layers":12} |
| 1621 | {"layers":[3,4,5],"loops":3,"activation_frac":0.35} |
| 1623 | {"layers":[3,4],"repeats":2,"physical_layers":9,"effective_layers":11} |
| 1626 | {"layers":3} |
| 1627 | — |
| 1628 | {"physical_layers":11,"virtual_layers":17,"shared_layers":[3,4,5],"loops":3} |
| 1629 | {"layers":[3,4,5],"repeats":3,"activate_at_frac":0.35} |
| 1633 | {"layers":[4,5],"loops":2} |
| 1635 | {"layers":[4,5],"num_layers":11,"start_step":3000,"untie_mlp":true} |
| 1639 | {"loops":"3-5","activated_at":"35% training"} |
| 1640 | {"unique_blocks":6,"recurrence_steps":4,"effective_layers":24} |
| 1646 | {"layers":11,"loops":2,"virtual_layers":17} |
| 1647 | {"layers":3} |
| 1658 | {"layers":[3,4,5],"passes":2,"enabled_frac":0.35} |
| 1660 | {"layers":[3,4,5],"onset_step":3000} |
| 1661 | {"reused_blocks":[4,5],"source_block":3} |
| 1662 | {"layers":[3,4,5],"onset_step":3000} |
| 1663 | {"layers":3} |
| 1665 | — |
| 1666 | {"layers":[3,5],"loops":3} |
| 1667 | {"layers":[3,4,5],"activated_frac":0.35} |
| 1670 | {"encoder":[0,1,2,3,4,5,3,4],"decoder":[5,3,4,5,6,7,8,9,10]} |
| 1672 | {"physical_layers":11,"virtual_layers":17} |
| 1676 | {"loops":3,"layers":[3,4,5]} |
| 1688 | {"layers":[3,5]} |
| 1689 | {"layers":[3,4,5]} |
| 1691 | {"reps":2} |
| 1693 | {"layers":11} |
| 1697 | {"enable_looping_at_step":2600} |
| 1700 | {"layers":[3,4,5]} |
| 1701 | {"num_loops":2,"loop_start":3,"loop_end":5} |
| 1702 | {"loop_onset_step":2600,"loop_layers":[3,5],"gate":true} |
| 1703 | {"num_loops":2,"loop_start":3,"loop_end":5,"enable_looping_at_step":2600} |
| 1707 | {"layers":10} |
| 1714 | {"layers":[3,4,5],"loops":2} |
| 1715 | {"layers":[3,4,5],"num_loops":2,"activate_frac":0.35} |
| 1716 | {"encoder":[0,1,2,3,4,5,3,4],"decoder":[5,3,4,5,6,7,8,9,10]} |
| 1720 | {"layers":3,"virtual_layers":17,"physical_layers":11} |
| 1725 | {"layers":3} |
| 1726 | {"loop_start":3,"loop_end":5,"enable_looping_at":0.35,"num_loops":2} |
| 1727 | {"phases":4} |
| 1728 | {"num_loops":2} |
| 1731 | {"layers":3,"virtual_layers":17,"physical_layers":11} |
| 1732 | {"physical_layers":11,"virtual_layers":17,"recurrence_layers":3} |
| 1733 | — |
| 1735 | {"layers":3,"virtual_layers":17} |
| 1736 | {"layers":[4,5],"repeats":2} |
| 1737 | {"layers":[3,4,5],"repetitions":3} |
| 1738 | {"layers":3} |
| 1739 | {"layers":[4,5],"virtual_layers":11,"physical_layers":9} |
| 1750 | {"layers":3} |
| 1751 | {"layers":11} |
| 1754 | {"loop_start":3,"loop_end":5,"num_loops":2,"enabled_at":"50% training"} |
| 1755 | {"layers":3} |
| 1756 | {"train_depths":[1,3,4],"eval_depth":4} |
| 1759 | {"layers":3,"activate_at_frac":0.35} |
| 1760 | {"layers":[3,4,5],"loops":2} |
| 1762 | {"blocks":2} |
| 1764 | {"num_loops":0} |
| 1766 | {"layers":3,"loops":2,"virtual_layers":17,"activate_at":35} |
| 1769 | {"loop_start":3,"loop_end":5,"num_loops":2} |
| 1770 | {"layers":3} |
| 1771 | {"layers":11,"depths":[1,3,4]} |
| 1773 | {"layers":[3,4,5],"activation":"35%"} |
| 1776 | {"layers":[3,4,5],"loops":2,"activate_frac":0.35} |
| 1779 | {"layers":[3,4,5],"loops":2} |
| 1780 | {"layers":3,"phase1_frac":0.35,"phase2_frac":0.55} |
| 1783 | {"layers":3,"passes":4} |
| 1785 | — |
| 1786 | — |
| 1794 | {"layers":"3-5"} |
| 1795 | {"layers":11} |
| 1797 | {"loop_start":3,"loop_end":5,"parallel_start_layer":8} |
| 1798 | {"layers":[4,5]} |
| 1800 | {"alpha":[[0.23,0.04,0.03],[0.13,-0.34,0.01],[0.06,0.19,-0.02]],"beta":[1.56,1.85,2.13]} |
| 1802 | {"encoder":[0,1,2,3,4,5,3,4],"decoder":[5,3,4,5,6,7,8,9,10]} |
| 1807 | {"layers":3} |
| 1809 | {"layers":[3,4,5],"loops":2,"virtual_layers":17,"physical_layers":11} |
| 1811 | {"training_depth_recurrence":1,"evaluation_depth_recurrence":1} |
| 1812 | {"layers":[3,5],"num_loops":2} |
| 1813 | {"layers":[3,4,5],"enable_after_training_frac":0.35} |
| 1814 | — |
| 1817 | {"effective_depth":7,"loops":2} |
| 1820 | {"layers":11,"num_iters":22} |
| 1831 | {"layers":3} |
| 1832 | {"layers":3} |
| 1833 | {"layers":[1,3,4]} |
| 1835 | {"layers":3} |
| 1850 | {"layers":[3,5]} |
| 1851 | {"layers":[3,4,5],"loops":2,"activated_at_frac":0.35} |
| 1852 | {"layers":[3,5],"num_loops":2,"activated_at_frac":0.35} |
| 1854 | {"layers":11,"parallel_residual_start":8} |
| 1857 | — |
| 1858 | {"layers":[3,4,5],"num_loops":2,"activated_at_frac":0.35} |
| 1864 | {"layers":[3,4,5],"repeats":2} |
| 1876 | {"layers":[3,5],"loops":3} |
| 1880 | {"layers":[3,4,5],"loops":3,"virtual_layers":17} |
| 1889 | {"layers":3} |
| 1891 | {"training":1,"evaluation":1} |
| 1893 | {"layers":[3,4,5],"start_step":3000} |
| 1894 | {"enable_looping_at":0.42} |
| 1895 | {"activated_at_wallclock_fraction":0.4} |
| 1896 | {"cells":6,"layers":12,"virtual_layers":17,"dimensions":512} |
| 1901 | {"pattern":[0,1,2,3,4,5,3,4,5]} |
| 1903 | {"layers":6,"pattern":[0,1,2,3,4,5,3,4,5]} |
| 1906 | {"layers":3} |
| 1911 | {"layers":[3,4,5]} |
| 1913 | {"layers":11,"repeated_layers":"3-5","repeat_count":2} |
| 1914 | {"loop_start":3,"loop_end":5,"parallel_start_layer":8} |
| 1915 | — |
| 1919 | {"layers":[3,7],"repetitions":3} |
| 1924 | {"num_loops":2} |
| 1925 | — |
| 1929 | {"layers":3} |
| 1931 | {"layers":3} |
| 1932 | {"layers":3,"start_layer":3,"end_layer":5} |
| 1933 | {"layers":3} |
| 1934 | {"layers":[3,4,5],"num_loops":2} |
| 1936 | {"layers":[3,4,5]} |
| 1938 | {"layers":[3,4,5],"loops":2} |
| 1943 | {"layers":3,"start_layer":3,"end_layer":5} |
| 1944 | {"layers":[4,5]} |
| 1947 | {"layers":3} |
| 1948 | {"layers":[3,4,5],"repeat":2,"activation_frac":0.35} |
| 1949 | {"folds":5,"shared_blocks":2,"fold_state_dim":496,"visible_dim":576,"exit_blocks":4,"stem_blocks":1} |
| 1950 | {"loop_layers":[3,5],"num_loops":2} |
| 1952 | — |
| 1959 | {"layers":3} |
| 1962 | {"layers":[3,4,5],"repeats":3} |
| 1968 | {"layers":3} |
| 1969 | {"loop_layers":[3,5],"num_loops":2,"virtual_layers":17} |
| 1971 | {"layers":3} |
| 1972 | {"layers":[3,5]} |
| 1973 | {"layers":[3,4,5],"num_loops":2} |
| 1974 | {"layers":[3,4,5],"num_loops":2} |
| 1977 | {"layers":3} |
| 1978 | {"layers":null,"cycles":"KS_UT_DEPTH"} |
| 1979 | {"layers":[3,5],"num_loops":2} |
| 1985 | {"layers":[4,5]} |
| 1987 | {"layers":"L3-5","repeats":2} |
| 1991 | {"encoder":[0,1,2,3,4,5,3,4],"decoder":[5,3,4,5,6,7,8,9,10]} |
| 1992 | {"loops":2} |
| 1994 | {"layers":7,"loops":3,"effective_depth":21} |
| 1995 | — |
| 2004 | {"unique_blocks":8,"effective_depth":16,"route_repeats":2} |
| 2005 | {"layers":[3,4,5]} |
| 2008 | {"loop_layers":[3,4,5],"num_loops":2} |
| 2009 | {"layers":8,"recurrent_passes":3,"effective_depth":24} |
| 2010 | — |
| 2011 | {"layers":3} |
| 2014 | {"layers":[3,4,5],"frac":0.35} |
| 2022 | {"layers":11,"recurrence_loops":3,"recurrence_range":"3-5"} |
| 2026 | {"layers":[3,5],"repeats":3,"threshold_frac":0.35} |
| 2027 | {"layers":[3,5]} |
| 2028 | {"layers":[3,4,5]} |
| 2031 | — |
| 2034 | {"layers":[3,4,5],"frac":0.35} |
| 2037 | {"enabled":false} |
| 2038 | — |
| 2042 | — |
| 2044 | — |
| 2046 | {"schedule":"1->2->3"} |
| 2051 | {"layers":[3,5],"loops":2} |
| 2056 | {"chunk":4,"dimensions":384} |
| 2062 | {"loops":2,"loop_start":4,"loop_end":5} |
| 2071 | {"layers":[3,4,5]} |
| 2077 | {"layers":3} |
| 2080 | — |
| 2081 | {"prelude_blocks":2,"core_blocks":2,"coda_blocks":2,"loop_passes":3} |
| 2082 | — |
| 2083 | {"sequence_length":8192} |
| 2085 | {"encoder":[0,1,2,3,4,5,3,4],"decoder":[5,3,4,5,6,7,8,9,10],"activated_frac":0.35} |
| 2090 | {"virtual_layers":11,"physical_blocks":2} |