← Back to Other0
unknown
OtherUsed in
910 PRs
Avg BPB
1.0625
Submissions
PR #31by JackYoung27
1.2663PR #31by JackYoung27
1.2663PR #34by ChenLiu-1996
1.2244PR #37by khasinski
1.2012PR #41by kiankyars
1.2296PR #42by chonchiog
1.2197PR #44by daniellawson9999
1.1111PR #45by kiankyars
1.2240PR #46by vavo
1.2697PR #53by kshitizz36
1.1888PR #53by kshitizz36
1.1888PR #56by cschubiner
1.8440PR #61by saml212
1.2154PR #64by yesbhautik
1.1250PR #66by arjun-krishna1
1.1632PR #69by TevBenji
1.1708PR #71by AntDX316
1.3509PR #73by NishantDahal
1.3281PR #73by NishantDahal
1.3281PR #77by samacquaRECORD
1.1950PR #79by Marvbuster
1.8698PR #85by hydeh3r3
1.2156PR #85by hydeh3r3
1.2156PR #88by seanward
1.1605PR #91by koushikkethamakka
1.5890PR #92by saikrishnarallabandi
1.1938PR #95by MatoTeziTanka
1.1836PR #99by takhir-iota
1.1605PR #104by gwelinder
1.3358PR #107by m0at
1.1648PR #108by kellyvv
1.4370PR #110by mr-ashish-panday
1.2244PR #113by JoeProAI
1.1870PR #114by saml212
1.1574PR #116by abhishekgahlot2
1.1666PR #117by trovatochris
1.1702PR #120by andrewgcodes
0.9588PR #122by mtybadger
1.1603PR #122by mtybadger
1.1603PR #123by saikrishnarallabandi
1.1642PR #126by Athenox14
1.7510PR #128by rsavitt
1.1594PR #130by mohosy
1.6372PR #130by mohosy
1.6372PR #131by Billy1900
1.2701PR #137by abhishekgahlot2
1.1666PR #139by ksang123
1.2029PR #139by ksang123
1.2029PR #141by nglain
1.2075PR #141by nglain
1.2075PR #141by nglain
1.2075PR #142by ankitmaloo
1.1925PR #142by ankitmaloo
1.1925PR #143by Julz19
1.1779PR #144by DJLougen
1.3517PR #145by mrdavtan
1.2052PR #146by swapp1990
1.2987PR #147by ankitmaloo
1.1631PR #147by ankitmaloo
1.1631PR #151by mrdavtan
1.2045PR #152by timowhite88
1.1744PR #156by dexhunter
1.1602PR #156by dexhunter
1.1602PR #160by ChaseWNorton
1.1623PR #161by santosh5541
1.1957PR #163by Focus2321
1.2091PR #166by chinesepowered
1.1550PR #168by spokane-way
1.0217PR #169by beee003
1.1973PR #172by GMaN1911
1.1812PR #172by GMaN1911
1.1812PR #173by tamoghnokandar
1.1532PR #174by Julz19
1.1537PR #175by anthony-maio
1.1229PR #179by devin-cog
1.1472PR #181by manfromnowhere143
1.2194PR #182by mihir-s-05
1.1844PR #183by anantdgoel
1.2529PR #186by mahsumaktas
1.1565PR #187by Idan3011
1.1629PR #191by chris-buckley
1.1598PR #192by baudrillardsgh0st
1.1502PR #192by baudrillardsgh0st
1.1502PR #193by KHUCHAN
1.2917PR #193by KHUCHAN
1.2917PR #194by baudrillardsgh0st
1.1480PR #196by sicauzxl
1.3825PR #196by sicauzxl
1.3825PR #197by machdragon
1.1893PR #200by khasinski
1.2012PR #200by khasinski
1.2012PR #201by machdragon
1.1551PR #204by Akasxh
1.2320PR #205by xinpw8
1.1792PR #206by dexhunter
1.1507PR #209by JWLBOYCE
1.1624PR #211by dubthecat
1.1719PR #211by dubthecat
1.1719PR #213by estesryan
1.6004PR #215by JayCheng113
1.1548PR #216by alons23
0.8100PR #217by kshitizz36
1.1753PR #218by bopmite
1.1248PR #219by alertcat
1.1541PR #220by timothywangdev
1.8480PR #223by 0xjaishy
1.1326PR #225by dibdabo
1.2089PR #232by kellyvv
1.4370PR #232by kellyvv
1.4370PR #236by saml212
1.1400PR #237by takoyakisoft
1.8389PR #238by kellyvv
1.5164PR #240by riatzukiza
1.6660PR #242by jamesrziggy
1.2988PR #244by simon-marcus
1.2064PR #246by kvmukilan
1.1704PR #246by kvmukilan
1.1704PR #247by riatzukiza
1.6114PR #248by riatzukiza
1.6231PR #254by timowhite88
1.1303PR #256by IvGolovach
1.1779PR #260by Kevxn97
1.3276PR #262by ibarrajo
1.0539PR #263by Dannybc123
1.5382PR #273by dentity007
1.1575PR #275by ibarrajo
1.0539PR #276by riatzukiza
1.6577PR #278by nicolasdickenmann
1.0365PR #281by charmquark1984
1.1381PR #283by Cwarren15-A
1.2244PR #283by Cwarren15-A
1.2244PR #283by Cwarren15-A
1.2244PR #285by DanishjeetSingh
1.3510PR #286by chris-buckley
1.1628PR #292by xuafeng
1.3274PR #293by Nishu2000-hub
1.2827PR #293by Nishu2000-hub
1.2827PR #293by Nishu2000-hub
1.2827PR #294by sseanliu
1.1645PR #294by sseanliu
1.1645PR #294by sseanliu
1.1645PR #296by sseanliu
1.1645PR #298by MrINVISO
1.2271PR #298by MrINVISO
1.2271PR #298by MrINVISO
1.2271PR #298by MrINVISO
1.2271PR #298by MrINVISO
1.2271PR #298by MrINVISO
1.2271PR #298by MrINVISO
1.2271PR #298by MrINVISO
1.2271PR #298by MrINVISO
1.2271PR #298by MrINVISO
1.2271PR #299by Mistobaan
1.1697PR #302by JackYoung27
1.1520PR #302by JackYoung27
1.1520PR #305by Naazimsnh02
1.1672PR #307by dennisimoo
1.1357PR #307by dennisimoo
1.1357PR #307by dennisimoo
1.1357PR #316by SkywardSyntax
1.2035PR #316by SkywardSyntax
1.2035PR #318by sseanliu
1.1284PR #325by Aum08Desai
1.1462PR #332by saml212
1.1320PR #341by tobiascanavesi
1.3323PR #343by joeynyc
1.2459PR #345by anandks2006
1.8522PR #346by bjbjbjbjbjbj
1.3529PR #351by sp00mm
1.1659PR #352by sp00mm
1.1659PR #352by sp00mm
1.1659PR #357by adityagupta26
1.1928PR #358by adityagupta26
1.1400PR #359by tmustier
1.1345PR #360by MultiFe22
1.1426PR #361by adityagupta26
1.1400PR #366by shivnarainms22
1.1574PR #367by ksang123
1.1770PR #367by ksang123
1.1770PR #373by JoeProAI
1.1634PR #374by unnirRECORD
1.1246PR #375by charmquark1984
1.1257PR #375by charmquark1984
1.1257PR #375by charmquark1984
1.1257PR #375by charmquark1984
1.1257PR #375by charmquark1984
1.1257PR #375by charmquark1984
1.1257PR #375by charmquark1984
1.1257PR #375by charmquark1984
1.1257PR #375by charmquark1984
1.1257PR #375by charmquark1984
1.1257PR #375by charmquark1984
1.1257PR #379by dannywillowliu-uchi
1.1257PR #383by joelnishanth
1.1320PR #383by joelnishanth
1.1320PR #384by anantdgoel
1.2882PR #384by anantdgoel
1.2882PR #384by anantdgoel
1.2882PR #388by ElliotSlusky
1.1231PR #389by trasnake87
1.1466PR #393by CrimsonSithria
1.2417PR #393by CrimsonSithria
1.2417PR #394by greqone
1.1247PR #394by greqone
1.1247PR #395by NishantDahal
1.2374PR #399by abaybektursun
1.1247PR #405by meett07
1.5516PR #406by dentity007
1.1287PR #407by itu-itis24-buyukhelvacigilm24
1.3208PR #408by markste-in
1.4784PR #408by markste-in
1.4784PR #413by anantdgoel
1.4525PR #415by EthanYangTW
1.1216PR #417by EthanYangTW
1.1227PR #417by EthanYangTW
1.1227PR #417by EthanYangTW
1.1227PR #417by EthanYangTW
1.1227PR #418by yashverms
1.1715PR #420by leofeasby
1.1454PR #422by albertorkive
1.1396PR #424by someone114514
1.1725PR #432by jadechip
1.5295PR #432by jadechip
1.5295PR #432by jadechip
1.5295PR #433by Robby955
1.3441PR #435by rthgit
1.6130PR #435by rthgit
1.6130PR #436by CrimsonSithria
1.2392PR #437by jupram
1.2257PR #438by stevenshinechen
1.3458PR #445by newjordan
1.1236PR #446by sofiabod
1.1933PR #446by sofiabod
1.1933PR #446by sofiabod
1.1933PR #446by sofiabod
1.1933PR #448by handemanai
1.2006PR #450by zachgoldfine44
1.1466PR #451by harborglowvintage-oss
1.1464PR #453by Divyesh-Thirukonda
1.1248PR #454by nalediym
1.2055PR #455by kasimte
1.1299PR #457by carlesonielfa
1.1839PR #459by mer2234
1.1490PR #465by LoquiAuris
1.1508PR #466by simonbissonnette
1.1354PR #469by cmcdnd
1.1418PR #470by leofeasby
1.1454PR #473by abaybektursun
1.1214PR #474by joshuaswarren
1.1690PR #475by abrahaw123-cell
0.5000PR #476by aquemy
1.4574PR #477by harsha-gouru
1.1522PR #477by harsha-gouru
1.1522PR #478by gowtham0992
1.1268PR #480by imyesung
1.1456PR #482by harsha-gouru
1.1522PR #483by tmustier
1.1346PR #483by tmustier
1.1346PR #488by pkim02
1.3267PR #492by Divyesh-Thirukonda
1.1591PR #493by parinzee
1.1309PR #493by parinzee
1.1309PR #493by parinzee
1.1309PR #498by newjordan
1.1478PR #498by newjordan
1.1478PR #499by newjordan
1.1478PR #499by newjordan
1.1478PR #503by EthanYangTW
1.1195PR #503by EthanYangTW
1.1195PR #506by eshansinghal14
1.2294PR #507by skarakulak
1.1558PR #512by MatoTeziTanka
0.9512PR #512by MatoTeziTanka
0.9512PR #517by lukacf
0.9789PR #526by Christopher-Lee-McClendon
1.1425PR #527by Shuvam-Banerji-Seal
1.4078PR #528by EthanYangTW
1.1195PR #528by EthanYangTW
1.1195PR #529by EthanYangTW
1.1195PR #529by EthanYangTW
1.1195PR #531by pragnyanramtha
1.1324PR #532by NotADevIAmaMeatPopsicle
1.0487PR #538by cruz-andr
1.1511PR #542by ddavidgao
1.1898PR #549by abaybektursunRECORD
1.1194PR #554by chrisnkuno
1.4612PR #555by ymrohit
1.0916PR #559by Parswanadh
1.5348PR #560by Rohan5commit
1.1935PR #560by Rohan5commit
1.1935PR #560by Rohan5commit
1.1935PR #560by Rohan5commit
1.1935PR #564by sadeghja1070
1.1270PR #568by MatoTeziTanka
0.7853PR #570by armmer016
1.3434PR #573by Sarimsaljook
1.0523PR #576by cmcdnd
1.1164PR #579by newjordan
1.1355PR #579by newjordan
1.1355PR #579by newjordan
1.1355PR #585by EthanYangTW
1.1179PR #586by EaCognitive
1.1365PR #589by RoyiRa
1.1178PR #593by abaybektursun
1.1163PR #596by AriaAnima
0.6430PR #596by AriaAnima
0.6430PR #598by Christopher-Lee-McClendon
1.1334PR #605by bigbag
0.7227PR #605by bigbag
0.7227PR #605by bigbag
0.7227PR #606by EthanYangTW
1.1162PR #606by EthanYangTW
1.1162PR #606by EthanYangTW
1.1162PR #622by Upsalla
1.0941PR #633by MatoTeziTanka
1.1526PR #634by raahilshah
1.1171PR #636by NewyorkDev
1.1234PR #636by NewyorkDev
1.1234PR #640by CiprianFlorin-IfrimRECORD
1.1570PR #641by CiprianFlorin-Ifrim
1.1239PR #648by maorinka
1.1428PR #648by maorinka
1.1428PR #648by maorinka
1.1428PR #648by maorinka
1.1428PR #648by maorinka
1.1428PR #650by abaybektursun
1.1187PR #651by phulin
1.2093PR #653by demirelo
1.1552PR #653by demirelo
1.1552PR #657by anthony-maio
1.1234PR #657by anthony-maio
1.1234PR #660by HugoOchoaLP
1.1826PR #663by raahilg
0.4380PR #663by raahilg
0.4380PR #663by raahilg
0.4380PR #664by tsbiosky
1.2982PR #665by harborglowvintage-oss
1.1464PR #666by chrislovescoding
1.1932PR #666by chrislovescoding
1.1932PR #667by suchitj2702
1.1352PR #669by amabito
1.4942PR #669by amabito
1.4942PR #669by amabito
1.4942PR #670by abaybektursun
1.1171PR #674by newjordan
1.0461PR #678by SPThole
1.3525PR #678by SPThole
1.3525PR #679by andrewmouldon
1.2164PR #680by bro4all
1.1483PR #681by Alfaxad
1.4775PR #682by gthgomez
1.1233PR #684by DeepReinforce
1.0574PR #686by msisovic
1.1182PR #687by RoyiRa
1.0745PR #687by RoyiRa
1.0745PR #688by RoyiRa
1.0745PR #692by EthanYangTW
1.1186PR #693by EthanYangTW
1.1186PR #694by Bortlesboat
1.1507PR #696by gravelBridge
1.2622PR #697by Danishlynx
1.1194PR #700by RoyiRa
1.0541PR #700by RoyiRa
1.0541PR #702by lukacf
1.0244PR #702by lukacf
1.0244PR #703by Gusanidas
1.1176PR #705by seanward
1.2151PR #706by newjordan
1.0461PR #707by Shuvam-Banerji-Seal
1.4078PR #710by Dhruba531
1.1240PR #712by Shuvam-Banerji-Seal
1.4078PR #712by Shuvam-Banerji-Seal
1.4078PR #713by hypery11
1.1180PR #714by Upsalla
1.1187PR #715by Asukabot0
1.0337PR #716by SHN2004
1.4239PR #717by Jaksenc
1.3515PR #717by Jaksenc
1.3515PR #719by Shuvam-Banerji-Seal
1.5252PR #719by Shuvam-Banerji-Seal
1.5252PR #721by hypery11
0PR #725by Shuvam-Banerji-Seal
1.5252PR #726by DeepReinforce
1.1147PR #727by Asukabot0
0.9674PR #728by abaybektursun
1.1142PR #728by abaybektursun
1.1142PR #728by abaybektursun
1.1142PR #730by janwww
1.1570PR #731by pentxayc
1.0400PR #733by stukenov
1.0278PR #734by Robby955
1.1198PR #737by SPThole
1.3557PR #738by gowtham0992
1.0970PR #738by gowtham0992
1.0970PR #745by stukenov
1.0222PR #746by C0neF
1.3556PR #751by Shuvam-Banerji-Seal
1.5252PR #751by Shuvam-Banerji-Seal
1.5252PR #752by Naazimsnh02
1.1182PR #755by dcrow85
1.0321PR #755by dcrow85
1.0321PR #756by abaybektursun
1.1142PR #756by abaybektursun
1.1142PR #757by fielding
1.1124PR #759by markste-in
1.3092PR #760by erikqu
1.2185PR #761by Asukabot0
0.9581PR #762by robinojw
0.7139PR #764by ndokutovich
0.9633PR #764by ndokutovich
0.9633PR #767by RichiiiTV
0.9209PR #769by MatoTeziTanka
0.8508PR #769by MatoTeziTanka
0.8508PR #770by minh-stakc
0.6672PR #770by minh-stakc
0.6672PR #772by abaybektursun
1.3055PR #772by abaybektursun
1.3055PR #772by abaybektursun
1.3055PR #773by siddhantparadox
1.1532PR #774by travispchen
0.9370PR #774by travispchen
0.9370PR #776by agalimova
0.9258PR #777by Robby955
0.9623PR #778by raahilshah
0.9605PR #779by deanbrr
0.6683PR #782by newjordan
0.9362PR #782by newjordan
0.9362PR #783by petergpt
1.1171PR #785by SirSaltySalmon
1.5364PR #786by shinegami-2002
0.8128PR #786by shinegami-2002
0.8128PR #788by hypery11
0.9059PR #790by danialht
1.1172PR #792by xexyz
1.0340PR #793by pall23-mech
1.2500PR #794by jeremyschied
1.3346PR #796by Robby955
0.6567PR #797by armantsaturian
0.8960PR #797by armantsaturian
0.8960PR #798by travispchen
0.5466PR #798by travispchen
0.5466PR #798by travispchen
0.5466PR #799by yuvraajbains
1.2005PR #800by newjordan
0.5644PR #800by newjordan
0.5644PR #802by Bortlesboat
0.9123PR #803by pentxayc
0.4416PR #803by pentxayc
0.4416PR #803by pentxayc
0.4416PR #805by zeytx
1.1807PR #806by ibarrajo
0.6678PR #806by ibarrajo
0.6678PR #807by connectwithprakash
1.0116PR #809by AayushBaniya2006
0.2952PR #810by Idan3011
0.9393PR #810by Idan3011
0.9393PR #811by quietsmile
0.4377PR #811by quietsmile
0.4377PR #814by newjordan
0.4820PR #814by newjordan
0.4820PR #814by newjordan
0.4820PR #818by lucamignatti
0.5527PR #820by mtybadger
1.6252PR #822by henrycashe26
1.2604PR #826by himanshudongre
0.2951PR #826by himanshudongre
0.2951PR #827by Programmerryoki
1.3999PR #827by Programmerryoki
1.3999PR #828by bigbag
0.9076PR #831by sseanliu
1.1284PR #832by jfprincz
1.1903PR #834by AnirudhRahul
0.1663PR #834by AnirudhRahul
0.1663PR #835by iverbovoy
1.1980PR #835by iverbovoy
1.1980PR #840by quietsmile
0.2873PR #840by quietsmile
0.2873PR #840by quietsmile
0.2873PR #843by quietsmile
0.2834PR #846by himanshudongre
0.1434PR #849by dttdrv
1.1105PR #850by callithyia
0.3212PR #850by callithyia
0.3212PR #851by RoyiRa
0.2071PR #851by RoyiRa
0.2071PR #851by RoyiRa
0.2071PR #851by RoyiRa
0.2071PR #851by RoyiRa
0.2071PR #852by Prush69
1.1189PR #852by Prush69
1.1189PR #855by aazizyan
1.2659PR #856by iverbovoy
1.1454PR #859by bigbag
0.1582PR #861by JoeProAI
1.1326PR #864by aryanbhosale
0.2841PR #865by aryanbhosale
0.2841PR #868by aamodbhatt
0.1181PR #869by THUQiXuan
0.1290PR #869by THUQiXuan
0.1290PR #870by simon-marcus
0.0935PR #872by gowtham0992
1.0467PR #873by gowtham0992
1.0467PR #875by shalyhinpavel
1.0226PR #875by shalyhinpavel
1.0226PR #875by shalyhinpavel
1.0226PR #876by Bortlesboat
0.5863PR #876by Bortlesboat
0.5863PR #880by RoyiRa
0.1003PR #880by RoyiRa
0.1003PR #880by RoyiRa
0.1003PR #881by simon-marcus
0.0990PR #881by simon-marcus
0.0990PR #881by simon-marcus
0.0990PR #881by simon-marcus
0.0990PR #884by BhatiaUday
1.1448PR #888by aamodbhatt
0.0942PR #888by aamodbhatt
0.0942PR #889by anthony-maio
0.9642PR #890by sofiabod
0.4405PR #900by Robby955
0.1156PR #900by Robby955
0.1156PR #900by Robby955
0.1156PR #902by Muhtasham
1.8111PR #904by anthony-maio
1.2734PR #905by anthony-maio
1.8587PR #905by anthony-maio
1.8587PR #905by anthony-maio
1.8587PR #907by resouer
0.0960PR #907by resouer
0.0960PR #907by resouer
0.0960PR #907by resouer
0.0960PR #909by sunnypatneedi
0.8609PR #909by sunnypatneedi
0.8609PR #912by Bortlesboat
0.3461PR #913by RoyiRa
0.0887PR #913by RoyiRa
0.0887PR #915by anthony-maio
0.9642PR #915by anthony-maio
0.9642PR #916by Bortlesboat
0.3461PR #916by Bortlesboat
0.3461PR #921by TimPietrusky
0.0939PR #931by AnirudhRahul
0.0498PR #933by haikosys
0.0804PR #933by haikosys
0.0804PR #933by haikosys
0.0804PR #937by mihir-s-05
1.4457PR #940by antaloaalonso
0.9581PR #943by aamodbhatt
0.0165PR #944by aamodbhatt
0.0165PR #944by aamodbhatt
0.0165PR #945by TimPietrusky
0.0274PR #945by TimPietrusky
0.0274PR #948by dentity007
0.1156PR #948by dentity007
0.1156PR #948by dentity007
0.1156PR #948by dentity007
0.1156PR #948by dentity007
0.1156PR #949by jzgdev
1.3178PR #950by jzgdev
1.3178PR #958by shouryamaanjain
1.1382PR #959by himanalot
0.0000PR #959by himanalot
0.0000PR #962by AnirudhRahul
0.0214PR #962by AnirudhRahul
0.0214PR #963by sunnypatneedi
0.8609PR #963by sunnypatneedi
0.8609PR #967by dexhunter
1.0450PR #967by dexhunter
1.0450PR #968by dentity007
0.1154PR #968by dentity007
0.1154PR #974by anthony-maio
1.6542PR #974by anthony-maio
1.6542PR #976by Vibes-me
1.2058PR #982by haikosys
0.0638PR #984by jzgdev
1.3178PR #984by jzgdev
1.3178PR #985by danielweidinger2299-debug
1.3540PR #986by sofiabod
0.0830PR #986by sofiabod
0.0830PR #986by sofiabod
0.0830PR #990by newjordan
0.7614PR #993by aerosta
0.9631PR #994by singhaikshitijjain
1.4315PR #994by singhaikshitijjain
1.4315PR #994by singhaikshitijjain
1.4315PR #999by aamodbhatt
1.1179PR #1014by haimianbaobao007
1.6200PR #1015by shram86
1.2115PR #1024by immartian
0.0830PR #1026by danielxmed
1.0945PR #1028by newjordan
0.8104PR #1029by fielding
1.1520PR #1030by sofiabod
0.1130PR #1033by Naazimsnh02
0.4311PR #1033by Naazimsnh02
0.4311PR #1033by Naazimsnh02
0.4311PR #1034by Jeneesh1014
1.7195PR #1036by ivanontech
1.1974PR #1038by Vibes-me
1.2058PR #1043by okezue
1.1261PR #1043by okezue
1.1261PR #1044by greqone
1.8989PR #1044by greqone
1.8989PR #1044by greqone
1.8989PR #1046by Jayteare
1.2174PR #1047by newjordan
0.8822PR #1053by ikermoel
1.3600PR #1055by sanyalsunny111
0.9693PR #1056by sofiabod
0.0180PR #1056by sofiabod
0.0180PR #1060by dexhunter
1.1123PR #1066by adityakm24
1.1259PR #1076by sofiabod
0.0109PR #1076by sofiabod
0.0109PR #1076by sofiabod
0.0109PR #1081by michaelwinczuk
1.1220PR #1081by michaelwinczuk
1.1220PR #1083by newjordan
0.4961PR #1084by AnubhavBharadwaaj
1.1185PR #1084by AnubhavBharadwaaj
1.1185PR #1088by serdardoesml
1.2542PR #1088by serdardoesml
1.2542PR #1095by vimeto
0.0905PR #1096by vimeto
1.3342PR #1096by vimeto
1.3342PR #1097by danielxmed
1.3355PR #1099by Bortlesboat
1.1133PR #1100by agalimova
1.1465PR #1100by agalimova
1.1465PR #1105by abaybektursun
1.2208PR #1105by abaybektursun
1.2208PR #1106by agalimova
1.1465PR #1106by agalimova
1.1465PR #1106by agalimova
1.1465PR #1108by DbBested
1.1502PR #1108by DbBested
1.1502PR #1108by DbBested
1.1502PR #1111by MichaelMcCulloch
0.2532PR #1114by minh-stakc
0.0235PR #1114by minh-stakc
0.0235PR #1114by minh-stakc
0.0235PR #1116by gowtham0992
1.4447PR #1119by gowtham0992
1.4584PR #1119by gowtham0992
1.4584PR #1119by gowtham0992
1.4584PR #1119by gowtham0992
1.4584PR #1120by newjordan
1.1099PR #1122by icryo
1.1146PR #1122by icryo
1.1146PR #1124by NewyorkDev
1.1194PR #1128by AnubhavBharadwaaj
1.1154PR #1130by Gusanidas
1.1140PR #1130by Gusanidas
1.1140PR #1130by Gusanidas
1.1140PR #1139by ivanontech
1.1801PR #1143by simon-marcus
1.0806PR #1143by simon-marcus
1.0806PR #1143by simon-marcus
1.0806PR #1145by AnirudhRahul
1.1109PR #1149by LucasErcolano
1.6507PR #1150by sahiee-dev
1.1151PR #1152by ericdatum
1.7942PR #1153by LucasErcolano
1.6507PR #1153by LucasErcolano
1.6507PR #1153by LucasErcolano
1.6507PR #1154by LucasErcolano
1.7757PR #1154by LucasErcolano
1.7757PR #1159by JDAppleseed
0.3693PR #1164by papalino456
1.1917PR #1172by dexhunter
1.1015PR #1176by bigbag
1.0962PR #1183by akaiHuang
1.5080PR #1186by andrewbaggio1
0.9850PR #1192by dentity007
1.3560PR #1193by dentity007
1.4390PR #1198by ymrohit
1.5992PR #1214by gersh
1.1688PR #1214by gersh
1.1688PR #1215by turbo-indubitable
1.1601PR #1216by SoHarshh
1.1574PR #1217by bigbag
1.1027PR #1217by bigbag
1.1027PR #1222by abaybektursun
1.4707PR #1224by vermissa0ss
1.1129PR #1224by vermissa0ss
1.1129PR #1227by himanshudongre
1.4841PR #1227by himanshudongre
1.4841PR #1236by ibarrajo
1.1179PR #1241by aiejvn
0.9901PR #1241by aiejvn
0.9901PR #1241by aiejvn
0.9901PR #1243by simon-marcus
1.1230PR #1245by mkenney2
1.1470PR #1248by ibarrajo
1.1264PR #1251by ibarrajo
1.1349PR #1255by akaiHuang
1.5080PR #1255by akaiHuang
1.5080PR #1257by BoxiYu
1.0855PR #1257by BoxiYu
1.0855PR #1259by himanshudongre
1.1533PR #1268by samquiring
1.1875PR #1272by andrewbaggio1
1.1100PR #1272by andrewbaggio1
1.1100PR #1272by andrewbaggio1
1.1100PR #1272by andrewbaggio1
1.1100PR #1276by BiggerDABOSS
1.1100PR #1282by newjordan
1.1035PR #1282by newjordan
1.1035PR #1286by newjordan
1.0963PR #1291by dentity007
1.0925PR #1296by aryanbhosale
1.0926PR #1299by Ribin545
1.8184PR #1299by Ribin545
1.8184PR #1299by Ribin545
1.8184PR #1305by DariusFeher
1.2070PR #1306by resouer
1.0846PR #1309by cadenmcmann
1.1143PR #1318by renqianluo
1.0095PR #1320by jpfeiffe
1.1196PR #1325by monisha-max
1.3868PR #1325by monisha-max
1.3868PR #1325by monisha-max
1.3868PR #1328by renqianluo
0.6361PR #1329by renqianluo
0.6361PR #1342by nicholasbailey87
1.4816PR #1342by nicholasbailey87
1.4816PR #1344by Omrigotlieb
1.0923PR #1344by Omrigotlieb
1.0923PR #1347by shasank0001
1.3038PR #1357by mollahasani
1.2200PR #1357by mollahasani
1.2200PR #1357by mollahasani
1.2200PR #1359by LucasErcolano
0.4188PR #1359by LucasErcolano
0.4188PR #1359by LucasErcolano
0.4188PR #1360by JulianTang2027
1.1585PR #1368by JKSNS
0.8503PR #1369by xiayicheng3-code
1.1196PR #1369by xiayicheng3-code
1.1196PR #1369by xiayicheng3-code
1.1196PR #1372by Bortlesboat
1.0050PR #1378by Rajat123456789
1.1711PR #1378by Rajat123456789
1.1711PR #1379by LucasErcolano
0.4162PR #1379by LucasErcolano
0.4162PR #1379by LucasErcolano
0.4162PR #1380by ranausmanai
1.1567PR #1381by X-Abhishek-X
1.1604PR #1381by X-Abhishek-X
1.1604PR #1384by iverbovoy
1.1441PR #1394by clarkkevRECORD
1.0856PR #1394by clarkkevRECORD
1.0856PR #1394by clarkkevRECORD
1.0856PR #1394by clarkkevRECORD
1.0856PR #1396by erichroepke
1.1067PR #1397by Mertyandimata
1.1047PR #1397by Mertyandimata
1.1047PR #1398by Mertyandimata
1.1047PR #1399by AnubhavBharadwaaj
1.0898PR #1403by Rhoahndur
1.3485PR #1405by anthony-maio
1.0856PR #1410by izlley
1.1158PR #1413by dexhunterRECORD
1.0828PR #1414by Abhishek8108
0.7093PR #1415by bigbag
1.0913PR #1416by erichroepke
1.0795PR #1416by erichroepke
1.0795PR #1420by abaybektursun
1.0801PR #1420by abaybektursun
1.0801PR #1430by renqianluo
0.3964PR #1430by renqianluo
0.3964PR #1431by Idan3011
1.1266PR #1433by mtybadger
1.2067PR #1433by mtybadger
1.2067PR #1433by mtybadger
1.2067PR #1436by DevWizard-Vandan
1.5546PR #1437by dexhunter
1.0780PR #1438by sabdulmajid
1.2029PR #1439by reyhandl
1.2639PR #1439by reyhandl
1.2639PR #1439by reyhandl
1.2639PR #1439by reyhandl
1.2639PR #1439by reyhandl
1.2639PR #1439by reyhandl
1.2639PR #1443by hardik-bhadani-git
1.3496PR #1443by hardik-bhadani-git
1.3496PR #1452by bsisduck
0.3509PR #1461by viasky657
0.4118PR #1473by AVINASH0052
1.1156PR #1474by shram86
1.1434PR #1476by aryan-cs
1.0842PR #1478by jxgod
1.1995PR #1479by andrewbaggio1
1.1450PR #1487by ndokutovich
1.0600PR #1488by ndokutovich
0.8265PR #1489by joshkmartinez
1.0736PR #1494by G3sparky
1.1220PR #1501by SPThole
1.1159PR #1502by SPThole
1.1147PR #1502by SPThole
1.1147PR #1502by SPThole
1.1147PR #1504by Stuckertks09
1.2206PR #1504by Stuckertks09
1.2206PR #1507by ChideraIbe123
0.2282PR #1514by dexhunter
1.0798PR #1521by aryanbhosale
1.0802PR #1521by aryanbhosale
1.0802PR #1524by Jash-Vora
1.2552PR #1525by Jash-Vora
1.2552PR #1528by xiehuanyi
1.1104PR #1528by xiehuanyi
1.1104PR #1532by nogakeren
1.0803PR #1534by someone114514
1.0846PR #1537by pireylow
1.3971PR #1537by pireylow
1.3971PR #1537by pireylow
1.3971PR #1540by aryanbhosale
1.0777PR #1544by Abhishek8108
1.0283PR #1548by dljr-github
1.3220PR #1551by andrewmouldon
1.2199PR #1551by andrewmouldon
1.2199PR #1555by andrewbaggio1
1.0764PR #1555by andrewbaggio1
1.0764PR #1556by sidhanth97
1.4352PR #1557by ndokutovich
1.0773PR #1558by Subramanyam6
1.4500PR #1558by Subramanyam6
1.4500PR #1558by Subramanyam6
1.4500PR #1562by joshkmartinez
1.0205PR #1564by joshkmartinez
1.0171PR #1569by abbudjoe
1.3576PR #1571by skar07
1.5406PR #1574by KRGulaj
1.3587PR #1574by KRGulaj
1.3587PR #1575by joshkmartinez
1.0167PR #1576by joshkmartinez
1.0167PR #1578by mikeapedia
1.0668PR #1578by mikeapedia
1.0668PR #1580by liveyourday
1.2286PR #1581by aiejvn
1.2321PR #1581by aiejvn
1.2321PR #1582by He-Wenhao
1.3428PR #1584by codemath3000
1.0752PR #1589by nnm2602
1.3223PR #1589by nnm2602
1.3223PR #1589by nnm2602
1.3223PR #1589by nnm2602
1.3223PR #1589by nnm2602
1.3223PR #1589by nnm2602
1.3223PR #1589by nnm2602
1.3223PR #1598by amrayach
1.0813PR #1607by inin-zou
1.4765PR #1608by User123331
1.3921PR #1623by divagr18
1.1942PR #1623by divagr18
1.1942PR #1629by channyzf6
1.0829PR #1634by arsenis-cmd
1.1335PR #1634by arsenis-cmd
1.1335PR #1634by arsenis-cmd
1.1335PR #1638by kunwar-vikrant
1.0832PR #1644by mradassaad
1.1473PR #1662by pablinga19
1.0862PR #1662by pablinga19
1.0862PR #1663by pablinga19
1.0862PR #1663by pablinga19
1.0862PR #1664by zoharb157
1.0980PR #1664by zoharb157
1.0980PR #1664by zoharb157
1.0980PR #1665by mrbese
1.3571PR #1665by mrbese
1.3571PR #1666by mrbese
1.1531PR #1682by PapaFranku4647
1.2834PR #1694by Rtx09x
1.1136PR #1695by X-Abhishek-X
1.0759PR #1697by Buld1n
1.0812PR #1697by Buld1n
1.0812PR #1697by Buld1n
1.0812PR #1699by lsb
1.4831PR #1700by jorge-asenjo
1.0722PR #1700by jorge-asenjo
1.0722PR #1704by Buld1n
1.0976PR #1705by genji0306
1.0339PR #1707by nothingLiva
1.0740PR #1712by aamodbhatt
1.0190PR #1713by AbhiShet108
1.3479PR #1717by samchill666
1.1249PR #1717by samchill666
1.1249PR #1718by himanshudongre
1.0788PR #1722by deborahnelson8788726
0.6580PR #1723by SlavH
0.5116PR #1727by yahya010
1.0722PR #1729by romeerp
1.0678PR #1730by N10ELabs
1.0845PR #1732by Victory963
1.0785PR #1732by Victory963
1.0785PR #1734by yahya010
1.0108PR #1736by dexhunter
1.0655PR #1736by dexhunter
1.0655PR #1736by dexhunter
1.0655PR #1738by alertcat
1.0354PR #1740by amrayach
1.0722PR #1741by amrayach
1.0722PR #1747by swapp1990
1.0820PR #1753by Abhishek-Dalvi410
1.2917PR #1753by Abhishek-Dalvi410
1.2917PR #1754by upascal
1.0881PR #1755by OE-GOD
1.0746PR #1756by romeerp
1.0651PR #1759by yijieyuan
1.0799PR #1759by yijieyuan
1.0799Hyperparameters Across PRs
| pr_number | parameters |
|---|---|
| 31 | {"steps":2651,"wallclock":"10min","hardware":"4xH100 SXM"} |
| 31 | {"tokenizer_optimization":"sp4096","width_depth_sweep":true,"test_time_training":true,"qat":true} |
| 34 | {"max_wallclock_seconds":600,"num_gpus":8,"val_every_steps":200} |
| 37 | {"vocab_size":4096,"tokens_per_byte":0.306} |
| 41 | {"hardware":"8xH100","wallclock_seconds":600,"nproc_per_node":8} |
| 42 | {"matrix_lr":0.06} |
| 44 | {"dataset_alias":"fineweb10B_sp1024_valonly"} |
| 45 | {"mlp_hidden":960} |
| 46 | {"train_batch_tokens":786432,"max_wallclock_seconds":600,"experiments":8} |
| 53 | {"vocab_size":4096} |
| 53 | {"val_loss_every":0} |
| 56 | {"val_batch_size":8388608} |
| 61 | {"mlp_hidden":992} |
| 64 | {"layers":11,"model_dim":512,"heads":8,"kv_heads":4,"mlp_hidden":1536} |
| 66 | — |
| 69 | {"quant_range":[-31,31]} |
| 71 | {"max_wallclock_seconds":600,"num_gpus":8} |
| 73 | {"train_batch_tokens":131072} |
| 73 | {"grad_accum_steps":2} |
| 77 | {"batch_size":64,"targets":["lm_head","c_q","c_v"]} |
| 79 | — |
| 85 | {"experiments":92} |
| 85 | {"train_on_val":1} |
| 88 | {"matrix_lr":0.02,"muon_momentum":0.99,"warmdown_iters":3000} |
| 91 | {"width_range":[512,1152],"layers_range":[2,6],"recurrence_range":[2,6],"head_range":[4,16]} |
| 92 | {"vocab_size":8192} |
| 95 | {"matrix_lr":0.06,"scalar_lr":0.06,"tied_embed_lr":0.04} |
| 99 | {"group_size":64} |
| 104 | — |
| 107 | — |
| 108 | {"entries":907927,"artifact_mb":2.87} |
| 110 | {"dry_run":true,"steps":10} |
| 113 | {"matrix_lr":0.06} |
| 114 | {"fp16_tied_embedding":true,"fp16_late_k_passthrough_layers":2} |
| 116 | {"logit_softcap":15} |
| 117 | {"qat_start_frac":0.7} |
| 120 | — |
| 122 | — |
| 122 | {"vocab_size":2048} |
| 123 | {"vocab_size":4096} |
| 126 | {"start_length":128,"end_length":1024,"warmup_steps":2000} |
| 128 | {"train_batch_tokens":393216,"max_wallclock_seconds":600} |
| 130 | {"modes":["STE","Gaussian noise"],"qat_start_frac":0.75,"lr_reduction_on_qat_start":0.5} |
| 130 | {"matrix_lr":0.06,"scalar_lr":0.06,"tied_embed_lr":0.08} |
| 131 | {"train_seq_len_1_gpu":1024,"train_seq_len_8_gpu":2048,"grad_accum_steps_1_gpu":2,"grad_accum_steps_8_gpu":1} |
| 137 | {"enable_qat":1,"ema_decay":0.998} |
| 139 | — |
| 139 | {"trits_per_byte":5} |
| 141 | {"seed":1337,"experiments":33,"gpu_tiers":["A40","1xH100","8xH100"]} |
| 141 | {"framework":"PyTorch 2.4"} |
| 141 | {"training_time_seconds":600,"gpus":8} |
| 142 | {"int8_clip_percentile":99.99995} |
| 142 | {"int8_per_row_scale_dtype":"float32"} |
| 143 | {"fp16_embed_passthrough":1,"fp16_late_k_layers":0} |
| 144 | {"train_shards":80,"wallclock_seconds":600} |
| 145 | {"qat_start_step":6000,"qat_fraction":0.3} |
| 146 | {"variants_tested":3} |
| 147 | {"int8_clip_percentile":99.99995} |
| 147 | {"int8_per_row_scale_dtype":"float32"} |
| 151 | {"train_batch_tokens":524288,"eval_batch_seqs":32} |
| 152 | — |
| 156 | {"range":[-31,31]} |
| 156 | — |
| 160 | — |
| 161 | {"max_wallclock_seconds":600,"hardware":"8x H100 SXM"} |
| 163 | {"train_batch_tokens":262144,"matrix_lr":0.03,"scalar_lr":0.03,"tied_embed_lr":0.04} |
| 166 | {"train_batch_tokens":393000} |
| 168 | {"prefix_size_bytes":8750000,"covered_validation_tokens":12900000,"coverage_fraction":0.208} |
| 169 | — |
| 172 | {"strength":0.15} |
| 172 | {"strength":0.3} |
| 173 | — |
| 174 | — |
| 175 | {"threshold":0.15} |
| 179 | {"train_files":"fineweb_val_*.bin"} |
| 181 | {"train_on_val":1} |
| 182 | — |
| 183 | {"lambda":0.02,"decay":0.98} |
| 186 | {"sparsity":0.02} |
| 187 | {"bottleneck":"512->768->512"} |
| 191 | {"train_batch_tokens":786432,"matrix_lr":0.02,"scalar_lr":0.02,"tied_embed_lr":0.03} |
| 192 | {"container":"int8","value_range":[-32,31]} |
| 192 | — |
| 193 | {"CTM_NOVELTY_GAIN":1,"CTM_SALIENCE_GAIN":0.5} |
| 193 | {"SKIP_GATE_MODE":"error"} |
| 194 | {"rho":0.05,"frac":0.03} |
| 196 | {"train_on_val":1} |
| 196 | {"qat_enable":1,"qat_start_frac":0.1} |
| 197 | {"staging_profile":1,"lawa_enabled":1} |
| 200 | {"vocab_size":4096,"compression_improvement":"26%"} |
| 200 | {"range":"[-31,31]"} |
| 201 | — |
| 204 | {"warmdown_iters":20000} |
| 205 | {"pruning_rate":0.02} |
| 206 | — |
| 209 | {"wallclock_cap_seconds":600,"batch_tokens":786432,"keep_float_tensors":["tok_emb.weight","blocks.9.attn.c_k.weight","blocks.10.attn.c_k.weight"],"context_features_enabled":{"bigram":0,"smeargate":0,"swa":0}} |
| 211 | — |
| 211 | — |
| 213 | {"iterations":6000,"hardware":"8x H100","runtime_seconds":224} |
| 215 | — |
| 216 | {"precision":"bfloat16"} |
| 217 | {"vocab_size":4096} |
| 218 | {"olb_lr":0.1,"olb_momentum":0.9} |
| 219 | {"layers":12,"dim":512,"heads":8,"kv_heads":4,"parameters_m":29.2} |
| 220 | — |
| 223 | {"phase1_fraction":0.6} |
| 225 | {"format":"int6_mixed_per_row_v2"} |
| 232 | {"entries":907927,"table_size_bytes":2867053} |
| 232 | {"avg_bytes_per_entry":3.16} |
| 236 | {"from_tokens":786000,"to_tokens":524288} |
| 237 | — |
| 238 | {"steps":10670,"hardware":"1xH100"} |
| 240 | {"artifact_cap_bytes":16000000,"iterations":500} |
| 242 | {"oversample":4,"warmup_frac":0.7} |
| 244 | — |
| 246 | {"rope_base":50000} |
| 246 | — |
| 247 | {"serialized_model_bytes":9988629,"total_submission_bytes":10036271} |
| 248 | {"hardware":"1x RTX 4070 Laptop GPU","iterations":500} |
| 254 | {"hardware":"Hopper"} |
| 256 | {"candidates":[1,0.95,0.9,0.85]} |
| 260 | {"hardware":"1xH100","branch":"codex/sliding-window-eval-v1"} |
| 262 | {"prefix_tokens":6200000,"coverage":0.1} |
| 263 | {"experiments":15} |
| 273 | {"layers":10,"step_time_ms":65.49,"steps":9156} |
| 275 | {"coverage":0.1,"prefix_size_mb":4.24} |
| 276 | {"max_wallclock_seconds":900,"stopped_step":471,"total_steps":500} |
| 278 | {"prefix_type":"sparse_blocks_v1","block_size":256,"selected_blocks":20681,"covered_tokens":5294336,"covered_fraction":0.0854,"prefix_bytes":4240256} |
| 281 | {"version":"2.8.3"} |
| 283 | {"order":2,"zero_learned_parameters":true,"zero_artifact_size_cost":true} |
| 283 | {"alpha":0.95,"mode":"per-doc"} |
| 283 | — |
| 285 | {"max_wallclock_seconds":600,"hardware":"1x NVIDIA A100-SXM4-40GB","train_shards":80} |
| 286 | {"start_frac":0.85} |
| 292 | {"formula":"w + (w_quantized - w).detach()"} |
| 293 | {"training_docs":2000000} |
| 293 | {"num_shards":10} |
| 293 | {"layers":8} |
| 294 | {"meta_steps":1576,"scope":"last 3 blocks' MLPs","training_fraction":0.2} |
| 294 | {"top_fraction":0.02} |
| 294 | — |
| 296 | {"outer_step_scale":0.01,"inner_steps":3,"inner_lr":0.1,"meta_steps":1576} |
| 298 | — |
| 298 | — |
| 298 | — |
| 298 | — |
| 298 | — |
| 298 | — |
| 298 | {"rank":16} |
| 298 | — |
| 298 | — |
| 298 | {"rank":16} |
| 299 | {"num_layers":8,"model_dim":768,"train_batch_tokens":262144,"logit_softcap":10,"tied_embed_lr":0.03,"matrix_lr":0.02,"scalar_lr":0.02,"beta1":0.7} |
| 302 | — |
| 302 | {"k":1,"train_fraction":0.1} |
| 305 | {"pruning_percentile":10} |
| 307 | {"train_batch_tokens":524288} |
| 307 | — |
| 307 | — |
| 316 | — |
| 316 | — |
| 318 | {"cache_tokens":8192,"stride":64} |
| 325 | {"late_qat":1,"qat_threshold":0.1} |
| 332 | {"top_10_percent":"int7","middle_70_percent":"int6","bottom_20_percent":"int5"} |
| 341 | — |
| 343 | {"experiments":97,"dev_hardware":"RTX 4080","submission_hardware":"8x H100 SXM"} |
| 345 | — |
| 346 | {"train_shards":1,"grad_accum_steps":8} |
| 351 | {"lr_scale_threshold":0.1,"quant_bits":6} |
| 352 | {"k":2,"alpha":0.2} |
| 352 | {"quantization":"int6","method":"STE QAT"} |
| 357 | — |
| 358 | {"prune_fraction":0.03} |
| 359 | {"required":true} |
| 360 | {"warmup_steps":500} |
| 361 | {"pruned_fraction":0.03} |
| 366 | {"layer":5,"lambda_init":0.2,"extra_parameters":1} |
| 367 | {"bits_per_param":1.6} |
| 367 | {"roundtrip_gap":0.0016} |
| 373 | — |
| 374 | {"threshold":0.1} |
| 375 | {"num_heads":2,"loss_weight":0.3} |
| 375 | {"num_tokens":64} |
| 375 | {"top_percent_int7":10,"middle_percent_int6":70,"bottom_percent_int5":20} |
| 375 | — |
| 375 | {"train_batch_tokens":1048576} |
| 375 | {"train_batch_tokens":786432} |
| 375 | {"train_batch_tokens":524288} |
| 375 | — |
| 375 | {"K":3} |
| 375 | — |
| 375 | — |
| 379 | {"lr_scale_threshold":0.1} |
| 383 | {"threshold_lr_scale":0.1} |
| 383 | — |
| 384 | {"cache_lambda":0.02,"cache_decay":0.995,"ogd_lr":0.1} |
| 384 | {"meta_loss_weight":0.5,"inner_lr":0.03,"start_frac":0.5,"every":4} |
| 384 | {"split_digits":false,"split_by_unicode_script":false,"split_by_number":false,"max_sentencepiece_length":64,"vocab_size":8192} |
| 388 | {"speedup_vs_fa2":1.18} |
| 389 | {"pruning_fraction":0.1} |
| 393 | {"experiments":111} |
| 393 | {"train_batch_tokens":131072} |
| 394 | {"flash_attn_backend":"native","torch_compile":true} |
| 394 | {"backout_enabled":true,"backout_lambda_init":0.2,"backout_layer":-1} |
| 395 | {"fp32_control_tensors":true} |
| 399 | {"optimizer_time_reduction_ms":{"before":19.7,"after":1.3}} |
| 405 | {"hardware":"1x RTX 3090 on RunPod","dataset":"fineweb10B_sp1024","tokenizer":"fineweb_1024_bpe.model","train_shards":1} |
| 406 | — |
| 407 | {"hardware":"1xH200","wallclock_seconds":600,"artifact_cap_bytes":16000000} |
| 408 | {"train_batch_tokens":98304} |
| 408 | {"matrix_lr":0.035,"scalar_lr":0.035} |
| 413 | {"alpha":0.95,"order":2} |
| 415 | {"step_time_ms":84.65,"steps":6939} |
| 417 | {"step_time_ms":84.65} |
| 417 | {"warmdown_fraction":0.04} |
| 417 | {"preserved_blocks":8} |
| 417 | {"pruning_rate":0.02} |
| 418 | {"threshold":0.1} |
| 420 | {"iterations":50000,"max_wallclock_seconds":86400} |
| 422 | {"top_45_percent":"int7","middle_40_percent":"int6","bottom_15_percent":"int5"} |
| 424 | {"prune_candidates":[0,0.01,0.02,0.03,0.04,0.05],"target_artifact_bytes":15950000} |
| 432 | {"from":"4 x 30720","to":"2 x 30720"} |
| 432 | — |
| 432 | — |
| 433 | {"shrinkage_gated_lora_rank":8} |
| 435 | — |
| 435 | {"threshold":0.0025} |
| 436 | {"experiments":129,"total_compute_usd":19.47} |
| 437 | {"aux_loss_weight":0.1} |
| 438 | {"beta2":0.95} |
| 445 | {"epochs":2,"batches":100,"lr_fraction":0.1} |
| 446 | {"tied_embed_lr":0.01} |
| 446 | {"matrix_lr":0.03} |
| 446 | {"logit_softcap":15} |
| 446 | {"qk_gain_init":1} |
| 448 | — |
| 450 | {"threshold":0.25} |
| 451 | {"batch_size_tokens":622592} |
| 453 | {"enabled":true,"final_training_fraction":0.04} |
| 454 | {"cmudict_exceptions":4795,"word_coverage":0.846,"tokenizer_vocab_size":1024} |
| 455 | {"trigger":"lr_scale<0.1"} |
| 457 | {"reset_between_documents":true} |
| 459 | — |
| 465 | {"layers":10,"d_model":512,"vocab_size":1024} |
| 466 | — |
| 469 | {"threshold":0.5,"adaptation_steps":1700} |
| 470 | {"iterations":50000,"max_wallclock_seconds":86400} |
| 473 | {"banks":4,"replaced_linear_layers":66} |
| 474 | {"threshold":0.25} |
| 475 | — |
| 476 | — |
| 477 | {"smoothing_alpha":0.25,"clip_range":"[-4, 4]","tokens_used":16000000} |
| 477 | {"values_per_byte":2} |
| 478 | {"lr_scale_threshold":0.15} |
| 480 | {"configurations":["attn6_mlp6","attn6_mlp5","attn6_mlp4","attn5_mlp5","attn5_mlp4"]} |
| 482 | {"packing":"two values per byte"} |
| 483 | — |
| 483 | — |
| 488 | {"last_fraction":0.15} |
| 492 | — |
| 493 | — |
| 493 | {"batch_size_tokens":524288,"training_steps":8200,"training_time_seconds":600} |
| 493 | {"matrix_lr":0.025,"scalar_lr":0.025} |
| 498 | {"batches":100,"epochs":2,"lr_fraction":0.1} |
| 498 | {"teacher":"EMA","steps":50,"temperature":2,"alpha":0.7} |
| 499 | {"epochs":2,"batches":100} |
| 499 | {"teacher":"EMA","steps":50,"temperature":2,"alpha":0.7} |
| 503 | {"threshold":0.5,"percentile_clipping":0.9995} |
| 503 | {"sparsity":0.02} |
| 507 | {"multiplier":2} |
| 512 | {"layers":11} |
| 512 | — |
| 517 | {"experiments":7,"wall_clock_hours":2} |
| 526 | — |
| 527 | — |
| 528 | {"threshold":0.5,"clipping_percentile":0.9995} |
| 528 | {"sparsity":0.02} |
| 529 | {"threshold":0.5,"clip_percentile":0.9995} |
| 529 | — |
| 531 | — |
| 532 | {"codebook_sizes":{"mlp":48,"qkv":80,"proj":64}} |
| 538 | — |
| 542 | — |
| 549 | {"step_time_ms":83.4} |
| 554 | {"checkpoint_interval_steps":25} |
| 555 | {"sparse_hidden_dim":{"from":64,"to":48},"bigram_dim":{"from":128,"to":96},"max_wallclock_seconds":{"from":600,"to":596}} |
| 559 | — |
| 560 | — |
| 560 | — |
| 560 | — |
| 560 | {"train_batch_tokens":131072,"max_wallclock_seconds":2700,"eval_stride":64,"eval_batch_seqs":64} |
| 564 | {"encoder_layers":5,"decoder_layers":6} |
| 568 | {"prune_percent":[3,5]} |
| 570 | — |
| 573 | — |
| 576 | {"temperature":0.98} |
| 579 | — |
| 579 | — |
| 579 | — |
| 585 | {"qat_threshold":0.5,"calibration_samples":256,"prune_pct":0.02} |
| 586 | — |
| 589 | {"tau":0.1,"warmdown_scale_threshold":0.02} |
| 593 | {"calibration_batches":256} |
| 596 | {"gpus":8} |
| 596 | {"max_doc_length":50000} |
| 598 | {"frozen_blocks":2} |
| 605 | — |
| 605 | — |
| 605 | — |
| 606 | — |
| 606 | — |
| 606 | {"threshold":0.5,"QAT_steps":1750} |
| 622 | {"document_level":true,"sequential_processing":true} |
| 633 | — |
| 634 | {"target_size_MB":15.9} |
| 636 | {"late_qat_threshold":0.15} |
| 636 | — |
| 640 | — |
| 641 | — |
| 648 | — |
| 648 | — |
| 648 | {"NCCL_NVLS_ENABLE":1,"NCCL_NET_GDR_LEVEL":5} |
| 648 | — |
| 648 | — |
| 650 | {"shard_order_env_var":"SHARD_ORDER","ranking_model":"6-layer, 512d model trained 500 steps on shard 0","ranking_metric":"cross-entropy loss","ordering":"descending loss (hardest first)"} |
| 651 | — |
| 653 | — |
| 653 | — |
| 657 | — |
| 657 | — |
| 660 | {"moe_start_layer":8} |
| 663 | {"physics_steps":5000,"spectral_modes":256,"svd_coords":64} |
| 663 | {"max_autotune":false} |
| 663 | — |
| 664 | {"grad_scale":4} |
| 665 | {"batch_size_tokens":622592,"wallclock_seconds":600} |
| 666 | {"ternary_start_frac":0.3} |
| 666 | {"threshold_multiplier":0.7} |
| 667 | {"recompile_cost_seconds":50,"overhead_ms_per_step":5} |
| 669 | {"scan_type":"Kogge-Stone","implementation":"pure PyTorch"} |
| 669 | {"enabled":true} |
| 669 | {"activation":"LeakyReLU(0.5)^2","pcg_lambda":0.5} |
| 670 | — |
| 674 | {"activation":"leaky_relu_sq","slope":0.5} |
| 678 | {"calibration_batches":8,"alpha":0.5} |
| 678 | {"min":0.85,"max":0.95,"period":50} |
| 679 | {"early_layers":1.4,"middle_layers":1.8,"late_layers":2.2} |
| 680 | {"gated_attention":true,"value_residual":true,"late_qat_threshold":0.15,"max_wallclock_seconds":600} |
| 681 | {"threshold":0.1} |
| 682 | {"no_compile":true,"max_val_seqs":256} |
| 684 | {"mix_width_start":8,"mix_width_end":32} |
| 686 | {"added_params":"~2K"} |
| 687 | {"experts":["neural","unigram","bigram","trigram","entropy"],"online_update":"log_w -= eta * loss"} |
| 687 | {"ngram_order":[1,2,3],"trigram_buckets":65536} |
| 688 | {"eta":0.1} |
| 692 | {"block_size":128,"calibration_samples":256} |
| 693 | {"block_size":128,"calibration_samples":256,"act_order":true} |
| 694 | — |
| 696 | {"vocab_size":260} |
| 697 | {"ttt_recovery_epochs":20,"ttt_recovery_lr":0.001} |
| 700 | {"lambda":0.01} |
| 700 | {"experts":5} |
| 702 | {"alpha_formula":"0.05 + 0.35 * sigmoid(2 * (H - 4.0))"} |
| 702 | {"orders":[2,3,4,5]} |
| 703 | {"gamma":1.1} |
| 705 | {"vocab_size":256} |
| 706 | {"slope":0.5} |
| 707 | — |
| 710 | {"threshold":0.15} |
| 712 | — |
| 712 | — |
| 713 | {"short_doc_threshold":512} |
| 714 | {"threshold":0.57} |
| 715 | {"alpha":0.4,"order":7,"eval_time_only":true} |
| 716 | {"hardware":"1x RTX 4090","wallclock_seconds":300} |
| 717 | {"module":"CastedLinear","window":"late"} |
| 717 | {"fullgraph":false} |
| 719 | — |
| 719 | — |
| 721 | — |
| 725 | — |
| 726 | {"memmap":true,"multi_shard":true,"gpu_prefetch":true} |
| 727 | {"alpha_formula":"0.05 + 0.55 * sigmoid(2 * (H - 4.0))"} |
| 728 | {"calib_batches":64} |
| 728 | — |
| 728 | {"parameter_banks":4} |
| 730 | {"packing":"base-3 + LZMA"} |
| 731 | {"experts":5,"eta":0.1,"deferred_updates":true} |
| 733 | — |
| 734 | {"calibration_batches":256,"block_size":128} |
| 737 | {"difficulty_metric":"unigram entropy","selection":"load 2x sequences per batch and select the half centered around target difficulty percentile","schedule_shape":"V-shaped","aligned_with":["LR warmdown","SWA phases"]} |
| 738 | {"n":5,"threshold":0.7,"min_observations":3} |
| 738 | {"hidden_dim":512,"k":32,"buffer_size":30000,"temperature":50} |
| 745 | {"experts":5} |
| 746 | {"enabled":true} |
| 751 | — |
| 751 | {"gradient_accum_tokens":131000} |
| 752 | {"chunks":1893} |
| 755 | {"beta":1,"replaced_merge_tokens":659,"total_merge_tokens":765,"vocab_size":1024} |
| 755 | — |
| 756 | {"iterations":3} |
| 756 | {"passes":3} |
| 757 | {"enabled":true,"threshold":0.1} |
| 759 | {"tokens_per_batch":{"before":196000,"after":98000}} |
| 760 | {"values_per_byte":5} |
| 761 | {"orders":[2,3,4,5,6,7]} |
| 762 | {"legal_scoring":true,"multi_epoch_caveat":true} |
| 764 | {"shard_ordering":"hardest_first"} |
| 764 | {"slope":0.9} |
| 767 | {"flash_attn":true,"zstandard_missing":true} |
| 769 | {"ngram":5,"buckets":4194304,"alpha_model":0.8,"alpha_cache":0.2} |
| 769 | {"stride":2048,"overlap":0} |
| 770 | {"min_order":2,"max_order":7} |
| 770 | {"formula":"alpha = 0.05 + 0.55 * sigmoid(2 * (H - 4.0))"} |
| 772 | {"stage":"shard-level selection","num_shards":80,"methods_tested":8} |
| 772 | {"chunk_size_tokens":32768,"total_chunks":244080,"selection_fraction":0.12} |
| 772 | {"seeds":[1337,42,2025],"hardware":"8xH100"} |
| 773 | {"late_qat":true,"ttt_burst_enabled":0,"distill_enabled":0} |
| 774 | {"entropy_center":3,"slope":0.25,"min_order":2} |
| 774 | — |
| 776 | {"modes":["EXPLORE","EXPLOIT","COMBINE","NARROW"]} |
| 777 | {"orders":"2-7","buckets_per_table":4194304,"min_count":2,"alpha_base":0.05,"alpha_range":0.55,"alpha_scale":2,"alpha_threshold":4} |
| 778 | {"orders":"2-7","min_count":2,"buckets_per_order":4194304} |
| 779 | {"alpha_formula":"0.05 + 0.55 * sigmoid(2 * (H - 4.0))"} |
| 782 | — |
| 782 | {"update_interval_batches":32,"converged_multipliers":{"o2":0.3,"o3":0.3,"o4":0.97,"o5":2,"o6":2,"o7":2}} |
| 783 | — |
| 785 | {"prefetch":1,"prefetch_queue":2,"copy_stream":1} |
| 786 | {"orders":[2,3,4,5,6,7],"alpha_formula":"0.05 + 0.55 * sigmoid(2 * (H - 4.0))"} |
| 786 | {"hash_tables_per_order":2,"buckets_per_order":4000000} |
| 788 | — |
| 790 | {"time_limit_seconds":600} |
| 792 | {"alpha_low":0.05,"alpha_high":0.4,"entropy_threshold":4} |
| 793 | {"cap_bytes":16000000} |
| 794 | {"activation":"LeakyReLU(0.5)^2"} |
| 796 | {"distributed_ranks":8} |
| 797 | — |
| 797 | {"cache_neural_mix":"80/20","near_zero_backoff_beta":0.000001} |
| 798 | {"orders":[2,3,4,5,6,7]} |
| 798 | {"entropy_centers":{"2":4.5,"3":4.2,"4":3.8,"5":3.5,"6":3.2,"7":3}} |
| 798 | {"eta":0.1} |
| 799 | {"train_batch_tokens":524288,"context_length":2048,"hardware":"8x H100 SXM"} |
| 800 | {"ranks":8,"token_history_scale":"full 62M-token picture"} |
| 800 | {"suppressed_orders":[2,3],"suppression_scale":[0.3,0.45],"boosted_orders":[5,6,7],"boost_scale":[1.88,2]} |
| 802 | {"cache_orders":[2,3,4,5,6,7]} |
| 803 | {"complement_alpha":0.5} |
| 803 | {"ngram_order_min":2,"ngram_order_max":10,"buckets":4194304} |
| 803 | {"alpha_base":0.2,"alpha_range":0.55,"alpha_center":3} |
| 805 | {"negative_slope":0.5} |
| 806 | {"slope":0.9} |
| 806 | {"alpha_formula":"0.05 + 0.55 * sigmoid(2.0 * (H - 4.0))"} |
| 807 | — |
| 809 | {"order":9,"alpha_min":0.05,"alpha_max":0.6,"min_count":2,"num_buckets":4194304,"chunk_tokens":1000000} |
| 810 | {"reported_speedup":"37%"} |
| 810 | — |
| 811 | {"complement_alpha":0.5} |
| 811 | {"ngram_order":10,"alpha_base":0.2,"alpha_range":0.55,"alpha_center":3} |
| 814 | {"multipliers":54} |
| 814 | {"complement_alpha":0.5} |
| 814 | {"ranks":8} |
| 818 | — |
| 820 | {"sampler":"ddpm_cache","sampling_schedule":"linear","sampling_steps":256} |
| 822 | {"gpu_count":1,"duration_minutes":20} |
| 826 | {"orders":[2,9],"buckets_per_order":4194304,"alpha_range":[0.05,0.6],"entropy_center":3,"chunk_size":1000000} |
| 826 | — |
| 827 | — |
| 827 | {"clip_percentiles":[0.9999,0.99995,0.99999,0.999995,1]} |
| 828 | {"experiments":74,"screening_steps":"10-12"} |
| 831 | {"throughput_tax_bpb_per_ms":0.007} |
| 832 | {"projections":256,"knots":17,"weight":0.02} |
| 834 | {"prefill_counted_in_wallclock":true} |
| 834 | {"experts":7,"mixer_loss_weight":0.1,"neural_floor":0.05} |
| 835 | {"phases":[{"repeats":2,"eff_depth":6},{"repeats":3,"eff_depth":9},{"repeats":4,"eff_depth":12}]} |
| 835 | — |
| 840 | — |
| 840 | {"low_orders_multiplier":0.3,"high_orders_multiplier":2} |
| 840 | — |
| 843 | {"score_first":true,"cache_update_timing":"after scoring each chunk","low_order_multiplier":0.3,"high_order_multiplier":2} |
| 846 | {"order_range":"2-9","hash_buckets":4000000} |
| 849 | {"experts":6} |
| 850 | {"alpha":0.5} |
| 850 | {"trigger_fraction":0.85} |
| 851 | {"high_order":9,"low_order":2} |
| 851 | {"ranks":8} |
| 851 | {"temperature":0.85} |
| 851 | — |
| 851 | {"eta":0.1} |
| 852 | — |
| 852 | {"activation":"LeakyReLU(0.5)^2"} |
| 855 | {"negative_slope":0.5} |
| 856 | {"experts":5} |
| 859 | {"experiments":79} |
| 861 | {"encoder_layers":[0,1,2,3,4,5],"decoder_layers":[6,7,8,9,10],"dim":512,"heads":8,"mlp_hidden":1536} |
| 864 | {"orders":"2-9","chunk_size":65000,"hash_buckets":4000000} |
| 865 | {"order_range":"2-9","chunk_size_tokens":65000,"hash_buckets":4000000,"backward_looking":true,"score_first":true} |
| 868 | {"target_seconds":580,"safety_seconds":8} |
| 869 | {"chunks":63,"max_order":9,"buckets":4194304} |
| 869 | {"alpha_max":0.7,"order_mults":[0.3,0.3,0.97,2,2,2,2,2]} |
| 870 | {"ngram_orders":"2-12","cache_build":"np.bincount","rescore_scope":"all_tokens"} |
| 872 | {"final_training_fraction":0.2} |
| 873 | {"phased_training":true,"meta_learning_final_fraction":0.2} |
| 875 | {"global_batch_schedule":[64,128,192]} |
| 875 | — |
| 875 | — |
| 876 | {"orders":[2,11]} |
| 876 | — |
| 880 | {"probes":[48,36,28,20,16]} |
| 880 | {"orders":[2,9]} |
| 880 | {"window":4096,"alpha_multiplier_range":[0.7,1.5]} |
| 881 | {"ngram_orders":"2-12","full_stream_rescore":true} |
| 881 | — |
| 881 | — |
| 881 | — |
| 884 | {"grad_accum_formula":"8 // world_size"} |
| 888 | {"pass1_records_token_stats":true,"pass2_no_second_forward_pass":true} |
| 888 | {"NGRAM_SELF_EXCLUDE":0,"NGRAM_COUNT_CONF_GAIN":0} |
| 889 | {"orders":"2-7gram","alpha_formula":"0.05 + 0.55 * sigmoid(2*(H-4))","min_count":2,"hash_buckets_per_order":4000000} |
| 890 | {"orders":"2-9","hash_buckets_per_order":4000000,"min_count":2,"alpha_range":[0.05,0.6]} |
| 900 | {"phrase_probes":[20,16],"ngram_orders":[2,15]} |
| 900 | {"orders":[2,15]} |
| 900 | {"alpha":0.5,"orders":[2,5],"warmup_steps":200} |
| 902 | {"bits":10} |
| 904 | {"clean_noisy_loss_mix":0.35,"noise_ratio_start":0.05,"noise_ratio_end":0.35,"random_replace_prob":0.15,"mask_token_id":2} |
| 905 | {"diffusion_steps":8,"min_prefix":16} |
| 905 | — |
| 905 | {"metric":"diffusion_pll_bpb"} |
| 907 | {"ranks":8,"shared_tables":true} |
| 907 | {"passes":2,"tokens":62000000} |
| 907 | {"order_min":2,"order_max":12} |
| 907 | {"speedup_claimed":"10-50x"} |
| 909 | {"orders":"2-11","buckets_per_order":4194304} |
| 909 | {"beta":2} |
| 912 | {"orders":[2,12]} |
| 913 | {"ngram_order":"2-12"} |
| 913 | {"phrase_lengths":[64,56,48,36,28,20,16]} |
| 915 | {"orders":"2-7","alpha_formula":"0.05 + 0.55 * sigmoid(2.0 * (H - 4.0))"} |
| 915 | {"speedup_vs_torch_compile":1.94} |
| 916 | {"orders":"2-12"} |
| 916 | — |
| 921 | {"order":13,"passes":2,"entropy_center":3,"entropy_scale":2} |
| 931 | {"buckets":32768,"orders":"2..9","count_table_bits":32} |
| 933 | {"alpha_high":0.99,"entropy_thresh":3} |
| 933 | — |
| 933 | — |
| 937 | {"threshold":0.05} |
| 940 | {"orders":"2-7"} |
| 943 | — |
| 944 | — |
| 944 | — |
| 945 | {"orders":[2,16],"buckets":4000000,"experts":17,"mixer_loss_weight":0.15,"neural_floor":0.05} |
| 945 | {"complement_alpha":0.5,"complement_threshold":0.3} |
| 948 | — |
| 948 | {"concentrations":[50,50,6.95,2.98,2.05,2.05,2.05,1.86,1.86,1.86,1.86,1.86,1.86,1.86]} |
| 948 | {"probe_lengths":[20,16]} |
| 948 | {"order_min":2,"order_max":15,"buckets":4194304} |
| 948 | {"alpha":0.5,"orders":[2,3,4,5]} |
| 949 | {"beta":2,"min_gate":0.05,"eps":0.000001,"token_stride":32} |
| 950 | {"enabled":true,"beta":2,"min_gate":0.05,"eps":0.000001,"token_stride":32} |
| 958 | {"alpha":0.2,"tau":8,"entropy_power":1} |
| 959 | {"ONLINE_CAL":3,"BIAS_LR":0.05,"BIAS_DECAY":1} |
| 959 | {"BLEND_MODE":"logodds"} |
| 962 | — |
| 962 | — |
| 963 | — |
| 963 | {"beta":2} |
| 967 | {"output_projections_lr_multiplier":3,"input_projections_lr_multiplier":0.5} |
| 967 | {"experts":["Neural","Unigram","Bigram","Trigram","Entropy"]} |
| 968 | {"ngram_order":20,"phrase_probe_lengths":[20,16]} |
| 968 | {"alpha":0.5,"orders":[2,5]} |
| 974 | {"replaced_components":["blocks.*.attn.proj","blocks.*.mlp.proj"],"adapter_rank":32} |
| 974 | {"seed_bytes":4} |
| 976 | {"layers":9,"model_dim":512,"num_heads":8,"num_kv_heads":4,"mlp_mult":2} |
| 982 | {"buckets":32000,"order_min":2,"order_max":9} |
| 984 | {"beta":2,"min_gate":0.05,"eps":0.000001,"token_stride":32,"enabled":true} |
| 984 | — |
| 985 | {"max_wallclock_seconds":600} |
| 986 | {"orders":"2-13","buckets":128000} |
| 986 | {"concentration":5} |
| 986 | {"probe_lengths":[48,36,28,20,16]} |
| 990 | {"env_var":"CRAWLER_QUANT_INT8=1"} |
| 993 | {"orders":"2->7","adaptive_mode":"sigmoid_raw_entropy","alpha_range":[0.05,0.6],"hash_buckets":4194304,"min_count":2} |
| 994 | — |
| 994 | — |
| 994 | — |
| 999 | {"high_threshold":2.1,"low_threshold":1.75} |
| 1014 | — |
| 1015 | {"backend":"flash_attn_3"} |
| 1024 | {"base_concentration":5,"formula":"c_eff = c_base / (1 + beta * np.log1p(ctx_count) * specificity_boost)"} |
| 1026 | {"max_order":7,"alpha":0.5,"nll_threshold":2.5,"adaptive_range":[0.1,2],"backoff":"strict"} |
| 1028 | {"enabled":true} |
| 1029 | {"teacher_params":105500000,"top_k_logits":32,"temperature":2,"alpha_values":[0.1,0.3,0.5]} |
| 1030 | {"orders":"2-13","buckets_per_order":131072,"concentrations":[50,50,20,10,6,4,3,2.5,2,1.8,1.6,1.4]} |
| 1033 | {"complement_alpha":0.5} |
| 1033 | {"formula":"0.20 + 0.55 * sigmoid(2 * (H - 3.0))"} |
| 1033 | {"orders":"2-10","buckets":4000000} |
| 1034 | {"alpha":0.5,"temperature":4} |
| 1036 | {"total_batch_size":65536,"baseline_batch_size":524288,"steps":404} |
| 1038 | {"layers":9,"model_dim":512,"num_heads":8,"num_kv_heads":4,"mlp_mult":2} |
| 1043 | {"packet_store":true,"online_update":true} |
| 1043 | {"confidence_margin":0.05,"has_data_threshold":20} |
| 1044 | {"target_ratio":0.25} |
| 1044 | {"weight":1} |
| 1044 | — |
| 1046 | {"table_size":"1024x1024"} |
| 1047 | {"gptq_reserve_ms":30000} |
| 1053 | {"mask_rate_range":[0.15,0.85],"eval_mask_rate":0.5,"eval_passes":8} |
| 1055 | {"alpha":0.1,"beta":0.2,"candidate_layers":[0,1,2,3]} |
| 1056 | {"orders":"2-12","buckets_per_order":32768} |
| 1056 | {"concentrations":[50,50,20,10,6,4,3,2.5,2,1.8,1.6]} |
| 1060 | {"shards":"multi-shard","stride_scheme":"coprime"} |
| 1066 | {"gpus":8,"wallclock_seconds":600} |
| 1076 | {"orders":"2-12","buckets_per_order":32768} |
| 1076 | {"concentrations":[50,50,20,10,6,4,3,2.5,2,1.8,1.6],"confidence_scale":12} |
| 1076 | — |
| 1081 | {"agents":4,"decision_interval_steps":800} |
| 1081 | {"nodes":500292,"edges":121084,"token_importance_scores":358} |
| 1083 | {"shared_tables":true,"cubric_warm_start_cells":54,"alpha_range":[0.2,0.75],"complement_alpha":0.5,"ngram_eval_order":9} |
| 1084 | {"enabled":true,"learning_rate":0.001,"steps":3} |
| 1084 | {"weight":0.1,"depth":4} |
| 1088 | — |
| 1088 | — |
| 1095 | {"ngram_orders":"2-7","cache_type":"INT16","multi_gpu_sync":"all_reduce"} |
| 1096 | {"rank":1,"targets":["Q","V","MLP-up","MLP-down"]} |
| 1096 | — |
| 1097 | {"jepa_alpha":0.1} |
| 1099 | — |
| 1100 | — |
| 1100 | — |
| 1105 | {"shards":"multi-shard","prefetch":true} |
| 1105 | {"kernel":"CUTLASS EVT","schedule":"WarpSpecializedPingpong"} |
| 1106 | — |
| 1106 | — |
| 1106 | — |
| 1108 | — |
| 1108 | — |
| 1108 | {"method":"SRYS"} |
| 1111 | — |
| 1114 | {"orders":"2-13","buckets":32000} |
| 1114 | {"concentrations":[50,50,20,10,6,4,3,2.5]} |
| 1114 | {"orders":"2-9","buckets":4000000} |
| 1116 | {"lambda":1,"mask_rate":0.15,"views":2,"forward_passes_per_step":3} |
| 1119 | {"diffusion_enabled":true,"diffusion_mix":0.5} |
| 1119 | — |
| 1119 | — |
| 1119 | — |
| 1120 | {"coprime_loader":true,"gpu_prefill":true} |
| 1122 | {"shards":80} |
| 1122 | — |
| 1124 | — |
| 1128 | {"delta_shape":[1,1,512],"steps":5,"learning_rate":0.003} |
| 1130 | — |
| 1130 | {"calibration_time_seconds":14} |
| 1130 | {"gamma":0.75} |
| 1139 | {"configs_tested":50,"sweep_rounds":5} |
| 1143 | {"parent_tokenizer":"english-1024-clean-v1","tokenizer_name":"Scylla"} |
| 1143 | — |
| 1143 | {"train_shards":79,"val_shards":1} |
| 1145 | {"single_pass":true,"prefix_only":true} |
| 1149 | {"backends":["none","qjl","polar","turbo"]} |
| 1150 | {"lr":0.003,"steps":5} |
| 1152 | {"jepa_weight":0.5,"sigreg_weight":0.0001} |
| 1153 | {"backends":["int8_triton","qjl_triton"]} |
| 1153 | — |
| 1153 | — |
| 1154 | — |
| 1154 | — |
| 1159 | {"cache_mode":"ppm","max_order":7,"mixing":"dirichlet","alpha":0.3,"count_smoothing":4} |
| 1164 | {"adapter_kind":"random_diag","sites":["q","v","lm_head"],"rank":8} |
| 1172 | — |
| 1176 | {"delta_dim":512,"steps":5,"learning_rate":0.003} |
| 1183 | {"alpha":0.3,"applied_every_steps":4} |
| 1186 | {"orders":[2,3,4,5]} |
| 1192 | — |
| 1193 | {"sparse_ratio":0.5} |
| 1198 | {"gpus":8,"gpu_type":"H100","iterations":100000,"max_wallclock_seconds":7195} |
| 1214 | {"num_parameters":22} |
| 1214 | {"layers":[6,7,8]} |
| 1215 | {"orders":"3-7","hashes":4,"counters":"64M"} |
| 1216 | {"fraction":0.65} |
| 1217 | {"qk_gain_init":5} |
| 1217 | {"slot_steps":8,"slot_lr":0.005} |
| 1222 | {"phase1_steps":7200,"phase2_steps":1500,"inner_steps":1} |
| 1224 | {"mode":"hadamard","targets":["mlp_up","mlp_down"],"block_sizes":[128,256,512]} |
| 1224 | {"targets":["mlp_up","mlp_down"]} |
| 1227 | {"order":6} |
| 1227 | {"temperature":2,"alpha":0.5} |
| 1236 | {"steps":8,"delta_shape":"[1, 1, 512]"} |
| 1241 | {"eos_id":1,"pad_id":1025,"mask_id":1024} |
| 1241 | {"shards_in_memory":4,"rotate_shards":true,"max_train_shards":0} |
| 1241 | {"head_counts":[2,4,8,16,32],"model_dim":512} |
| 1243 | {"jepa_loss_weight":0.1} |
| 1245 | {"window":1024} |
| 1248 | — |
| 1251 | — |
| 1255 | — |
| 1255 | {"alpha":0.3,"interval":4} |
| 1257 | {"alpha":0.5} |
| 1257 | — |
| 1259 | {"k":8,"lambda":0.12,"subsample":4} |
| 1268 | {"block_size":128,"calibration_tokens":131072} |
| 1272 | — |
| 1272 | — |
| 1272 | — |
| 1272 | — |
| 1276 | {"delta_shape":[1,1,512],"optimizer":"AdamW","learning_rate":0.005,"steps":8} |
| 1282 | {"stride":2} |
| 1282 | {"steps":8} |
| 1286 | — |
| 1291 | {"steps":8,"learning_rate":0.005,"delta_shape":[1,1,512]} |
| 1296 | — |
| 1299 | — |
| 1299 | {"gradient_averaging":0.08333333333333333,"maturity_ramp_steps":20} |
| 1299 | — |
| 1305 | {"chunk_divisor":4} |
| 1306 | {"steps":8,"learning_rate":0.005} |
| 1309 | {"min_order":8,"max_order":12,"lambda":0.15,"match_rate":0.0503,"match_accuracy":0.6557} |
| 1318 | {"method":"L-BFGS25","history":20,"warm_start":true,"delta_clip":5,"logit_space":true,"focal_tokens":128} |
| 1320 | {"variants_tested":7} |
| 1325 | — |
| 1325 | — |
| 1325 | {"stages":3} |
| 1328 | {"hidden_state_delta_shape":"[bsz,1,512]","logit_bias_shape":"[bsz,1,1024]","params_per_sequence":1536,"steps":24} |
| 1329 | {"hidden_state_delta_shape":"[bsz,1,512]","logit_bias_shape":"[bsz,1,1024]","params_per_sequence":1536,"steps":24} |
| 1342 | {"vocab_size":260} |
| 1342 | — |
| 1344 | {"steps":4} |
| 1344 | — |
| 1347 | — |
| 1357 | {"phases":5} |
| 1357 | {"steps":200} |
| 1357 | — |
| 1359 | {"loss_weighting":"1 - alpha * p_bigram(token)"} |
| 1359 | — |
| 1359 | {"ddp_safe":true,"score_first":true} |
| 1360 | {"sigma":2,"normalized_weights":true} |
| 1368 | {"alpha":0.9,"steps":32} |
| 1369 | — |
| 1369 | — |
| 1369 | — |
| 1372 | {"opt_mask_range":"[focal_start, s)"} |
| 1378 | — |
| 1378 | {"recurrence":2} |
| 1379 | {"loss_reweighting":"1 - alpha * p_bigram(token)"} |
| 1379 | — |
| 1379 | — |
| 1380 | {"gamma":8} |
| 1381 | — |
| 1381 | {"qk_gain":5} |
| 1384 | {"experts":5,"eta":0.1,"initial_log_weight_neural":2} |
| 1394 | {"vocab_size":8192} |
| 1394 | {"clip_scale_k":12.85,"embedding_clip_scale_k":20} |
| 1394 | — |
| 1394 | — |
| 1396 | {"steps":4} |
| 1397 | {"max_gradient_scale":1.15} |
| 1397 | — |
| 1398 | {"bigram_prior":true,"loss_scaling_max":1.15} |
| 1399 | {"learning_rate":0.05,"steps":5,"clip":3,"vocab_size":4096} |
| 1403 | {"mask_token_id":1024,"eps":0.1} |
| 1405 | {"self_gen_seqs":64} |
| 1410 | {"score_first":true} |
| 1413 | {"qk_gain_init":5} |
| 1414 | {"steps":24} |
| 1415 | {"method":"ETLB","steps":5,"learning_rate":0.05,"clip":3,"warm_start":true} |
| 1416 | — |
| 1416 | {"vocab_size":8192} |
| 1420 | {"forward":"Triton TMA","backward":"CUTLASS EVT"} |
| 1420 | — |
| 1430 | {"hidden_delta_shape":"[bsz,1,512]","logit_bias_shape":"[bsz,1,1024]","params":1536} |
| 1430 | {"heads":2,"loss_weight":0.1} |
| 1431 | — |
| 1433 | — |
| 1433 | — |
| 1433 | {"interval_steps":16} |
| 1436 | — |
| 1437 | {"enabled":true} |
| 1438 | {"steps":16000,"training_time_seconds":57979.039} |
| 1439 | — |
| 1439 | {"rank_qk_early":48,"rank_qk_late":64,"rank_vo_early":64,"rank_vo_late":96,"rank_mlp_early":96,"rank_mlp_late":128} |
| 1439 | — |
| 1439 | {"init_rank":128,"target_avg_rank":80} |
| 1439 | {"rand_basis_rank":16,"num_bases_qo":32,"num_bases_kv":16,"num_bases_mlp":32,"late_layer_mult":2} |
| 1439 | {"layers":"4-6","targets":["attn","mlp"]} |
| 1443 | — |
| 1443 | {"stages":3,"fractions":[0.1,0.7,0.2]} |
| 1452 | {"order":9,"min_count":1} |
| 1461 | {"nmf_max_iter":1} |
| 1473 | {"calibration_seqs":64,"calibration_tokens":2048} |
| 1474 | — |
| 1476 | {"QK_GAIN_INIT":5} |
| 1478 | {"exact_causal_3gram":true,"bounded_exact_local_repeat":true,"repeat_match_length":"4-8","top_k":3,"min_support":2,"alpha":0.3} |
| 1479 | — |
| 1487 | {"qk_gain_init":5.25} |
| 1488 | {"steps":24} |
| 1489 | — |
| 1494 | — |
| 1501 | {"every":4,"enabled_in_this_pr":false} |
| 1502 | {"every_n_steps":4} |
| 1502 | {"loss_weight":0.5,"delta_weight":0.3} |
| 1502 | {"total_parameters":66} |
| 1504 | {"vocab_size":1024} |
| 1504 | {"runs":47} |
| 1507 | {"order":12,"hash_buckets":4000000} |
| 1514 | {"base_beta":2,"agree_bonus":0.1,"within_beta":0,"word_beta":0} |
| 1521 | — |
| 1521 | — |
| 1524 | — |
| 1525 | — |
| 1528 | {"start_fraction":0.2} |
| 1528 | — |
| 1532 | — |
| 1534 | — |
| 1537 | {"cat_every":50,"cat_weight":0.001} |
| 1537 | {"experts":4,"top_k":2,"alpha":0.01} |
| 1537 | {"grid_size":5,"order":3} |
| 1540 | — |
| 1544 | {"seeds":3} |
| 1548 | {"seed":42,"lora_rank":304} |
| 1551 | {"stages":["0-5% full expansion","5-25% stochastic gating","25-100% pruned target-width training"]} |
| 1551 | {"gating":"Bernoulli","estimator":"straight-through"} |
| 1555 | {"min_match":1,"top_k":1000,"cross_window_weight":0.06} |
| 1555 | {"size":"16384x512"} |
| 1556 | {"alpha":0.1,"layers":"2-5"} |
| 1557 | {"beta":2,"agree":0.1} |
| 1558 | {"hessian_diagonal_boost":0.1} |
| 1558 | — |
| 1558 | {"step_count_current":700,"step_count_sota":6922} |
| 1562 | {"vocab_size":1024} |
| 1564 | {"vocab_size":1024} |
| 1569 | {"backend":"triton","state_blocks":"auto"} |
| 1571 | — |
| 1574 | — |
| 1574 | {"thresholds":[4,2.5,2,1.8]} |
| 1575 | — |
| 1576 | — |
| 1578 | {"vocab_size":8192,"normalized_text":"NFKC + lowercased"} |
| 1578 | {"dataset":"FineWeb 10B"} |
| 1580 | {"saliency_token_prior":true,"saliency_dynamic_correction":true,"saliency_phrase_term":true,"saliency_attention_bias":true,"saliency_bigram":false} |
| 1581 | {"num_spans":4,"span_len_mean":16,"span_len_min":4,"mask_ratio":0.06} |
| 1581 | {"var_weight":0.15,"cov_weight":0.02,"gamma":1} |
| 1582 | — |
| 1584 | {"fused_muon_kernel":true,"batched_ema":true,"loader_prealloc":true} |
| 1589 | {"rank":32} |
| 1589 | {"embedding_dim":128} |
| 1589 | {"message_dim":64} |
| 1589 | {"steps":50} |
| 1589 | {"hidden_dim":256} |
| 1589 | {"dimension":2048} |
| 1589 | {"bond_dim":64} |
| 1598 | — |
| 1607 | {"mamba_layers":7,"attention_layers":1} |
| 1608 | {"wallclock_seconds":600,"hardware":"NVIDIA H100 SXM5 80GB"} |
| 1623 | {"init":5} |
| 1623 | — |
| 1629 | {"layers":[6,7,9],"alpha":0.01,"warmup_steps":200,"decay_start_frac":0.7,"decay_end_frac":0.85} |
| 1634 | {"weights":{"predictable":0.1,"frontier":1,"noise":0.7}} |
| 1634 | {"loss_multiplier_range":[0.85,1.15]} |
| 1634 | {"alpha_high_quality":0.15,"alpha_low_quality":0.3} |
| 1638 | {"hyperparams":["TTT_ADAPTIVE","TTT_MAX_EPOCHS","TTT_MIN_EPOCHS","TTT_ADAPT_EMA"]} |
| 1644 | {"vocab_size":8192} |
| 1662 | {"sp":8192} |
| 1662 | {"val_loss_every":99999} |
| 1663 | {"recur_start_step":3000,"recur_homotopy":0} |
| 1663 | {"val_loss_every":99999} |
| 1664 | {"enabled":true} |
| 1664 | {"group_size":128} |
| 1664 | — |
| 1665 | {"ngram":"trigram"} |
| 1665 | {"vocab_size":288} |
| 1666 | {"vocab_size":288} |
| 1682 | {"best_p":0.9,"tested_range":[0.85,1.2]} |
| 1694 | {"calibration_sequences":64,"calibration_tokens":2048,"temperature":0.8} |
| 1695 | {"enabled":true} |
| 1697 | {"ttt_easy_chunk_ratio":0.998,"ttt_easy_chunk_epochs":1,"ttt_outlier_drop_fraction":0.03,"ttt_score_weight_power":0.5} |
| 1697 | {"swept_values":[1600,2000,2400,2600,2800,3000],"best_value":2600} |
| 1697 | {"tensors":["attn_scale","mlp_scale","resid_mix","recur_attn_delta","q_gain","skip_weights","skip_gates"]} |
| 1699 | {"t_range":[0.001,1]} |
| 1700 | {"vocab_size":8192} |
| 1700 | {"num_phases":3} |
| 1704 | {"loop_inject_enabled":1,"use_pass_readout":1,"readout_groups":16,"readout_scale":0.35} |
| 1705 | — |
| 1707 | {"tokenizer":"SP10240","casefold":true} |
| 1712 | — |
| 1713 | {"MATRIX_LR":0.08} |
| 1717 | {"grad_accum_steps":"96 // world_size"} |
| 1717 | {"max_wallclock_seconds":600} |
| 1718 | — |
| 1722 | {"steps":24,"learning_rate":0.024} |
| 1723 | {"model_vocab":1024,"tokenizer_vocab":8192} |
| 1727 | {"qk_gain_init":5.25} |
| 1729 | {"transform":"lossless_caps_caseops_v1","controls":["TITLE","ALLCAPS","CAPNEXT","ESC"]} |
| 1730 | {"qk_gain_init":4,"matrix_clip_sigmas":12.86} |
| 1732 | {"matrix_size":"512x512"} |
| 1732 | {"calibration_batches":50} |
| 1734 | {"ttt_macro_phases":0} |
| 1736 | {"name":"lossless_caps_caseops_v1"} |
| 1736 | — |
| 1736 | — |
| 1738 | — |
| 1740 | {"alpha":[0.1,0.3],"orders":[[8],[5,8,12]]} |
| 1741 | {"alpha":[0.1,0.3],"orders":[5,8,12]} |
| 1747 | {"gain":5.25} |
| 1753 | {"mask_temperature_start":1,"mask_temperature_end":0.5,"mask_lr":0.1} |
| 1753 | {"scale_params":["attn_scale","mlp_scale","head_scale","pre_logit_scale"]} |
| 1754 | {"int6_k":12.85,"int8_embedding_k":20} |
| 1755 | — |
| 1756 | — |
| 1759 | {"rank":1} |
| 1759 | {"thresh":0.55,"h_cutoff":0.5} |