← Back to Regularization
logit softcap
RegularizationUsed in
225 PRs
Best BPB
0.0180
Avg BPB
1.0965
Submissions
PR #487by anantdgoel
1.1720PR #871by greqone
0.8004PR #890by sofiabod
0.4405PR #896by MVPandey
1.1896PR #903by CiprianFlorin-Ifrim
1.2064PR #913by RoyiRa
0.0887PR #913by RoyiRa
0.0887PR #915by anthony-maio
0.9642PR #918by haikosys
0.1653PR #920by CiprianFlorin-Ifrim
1.1539PR #921by TimPietrusky
0.0939PR #922by greqone
0.0972PR #923by CiprianFlorin-Ifrim
1.1090PR #960by ADIITJ
1.1882PR #961by callithyia
0.0881PR #969by dnldsz
1.2907PR #979by 0xadvait
1.1387PR #1007by dillon-blake
1.2252PR #1027by Syed-M-Zeeshan
1.3036PR #1030by sofiabod
0.1130PR #1056by sofiabod
0.0180PR #1065by rithunkp
1.1536PR #1068by LappyG
1.1510PR #1072by vimeto
1.1170PR #1096by vimeto
1.3342PR #1097by danielxmed
1.3355PR #1108by DbBested
1.1502PR #1112by dillon-blake
1.2252PR #1123by sisegod
1.1986PR #1125by jainpranjal97
1.1946PR #1126by AnirudhRahul
1.1091PR #1152by ericdatum
1.7942PR #1170by Christopher-Lee-McClendon
1.1199PR #1180by estesryan
1.0577PR #1209by andrewbaggio1
1.1064PR #1227by himanshudongre
1.4841PR #1229by resouer
0.9300PR #1232by Christopher-Lee-McClendon
1.0929PR #1237by ibarrajo
1.1198PR #1240by andrewbaggio1
1.1064PR #1246by deborahnelson8788726
0.9650PR #1247by fahmitech
1.2208PR #1252by ahmetdenizyilmaz
1.0713PR #1263by xexyz
0.9354PR #1268by samquiring
1.1875PR #1291by dentity007
1.0925PR #1293by 5en5e1
1.2409PR #1299by Ribin545
1.8184PR #1300by Ribin545
1.8184PR #1311by htrung1105
1.1303PR #1318by renqianluo
1.0095PR #1319by canivel
0.6951PR #1322by newjordan
1.0854PR #1325by monisha-max
1.3868PR #1325by monisha-max
1.3868PR #1330by luciobaiocchi
1.4617PR #1335by WeijieChen2017
1.1948PR #1337by sergimichi
1.2079PR #1342by nicholasbailey87
1.4816PR #1361by jorge-asenjo
1.1220PR #1384by iverbovoy
1.1441PR #1388by CiprianFlorin-Ifrim
1.5390PR #1410by izlley
1.1158PR #1418by Park-Tae-Hwan
1.4192PR #1420by abaybektursun
1.0801PR #1421by X-Abhishek-X
1.0925PR #1425by dentity007
1.4479PR #1431by Idan3011
1.1266PR #1434by ranausmanai
1.5207PR #1435by AbhayAnandUCSD
1.0980PR #1440by Mertyandimata
1.1026PR #1442by akaiHuang
1.1854PR #1445by X-Abhishek-X
1.0889PR #1453by iverbovoy
1.1324PR #1456by sisegod
1.1465PR #1471by X-Abhishek-X
1.0866PR #1472by trhgbao
1.2066PR #1484by AlirezaAlampour
1.6656PR #1486by AlirezaAlampour
1.6656PR #1492by bigbag
1.0810PR #1493by bigbagRECORD
1.0810PR #1512by Itssshikhar
1.1117PR #1515by dexhunter
1.0872PR #1520by taka6745
1.0824PR #1523by EthanYangTW
1.0778PR #1527by alphastar1111
1.2026PR #1536by dexhunter
1.0775PR #1539by translatingthename
1.0587PR #1541by bigbag
1.0778PR #1544by Abhishek8108
1.0283PR #1545by Abhishek8108
1.0283PR #1546by SPThole
1.0850PR #1550by translatingthename
1.0587PR #1553by Abhishek8108
1.2097PR #1555by andrewbaggio1
1.0764PR #1560by dexhunter
1.0741PR #1561by EthanYangTW
1.0783PR #1565by Idan3011
1.1036PR #1570by yufang67
1.0970PR #1572by anthony-maio
1.0797PR #1573by shivangbaveja
1.1464PR #1581by aiejvn
1.2321PR #1583by codemath3000
1.0801PR #1584by codemath3000
1.0752PR #1585by codemath3000
1.0639PR #1586by dexhunter
1.0749PR #1589by nnm2602
1.3223PR #1606by AlirezaAlampour
1.3969PR #1616by Vickyrrrrrr
1.4100PR #1619by AVINASH0052
1.1156PR #1632by Hkoyuer
1.0274PR #1643by mradassaad
1.1473PR #1658by AVINASH0052
1.0810PR #1660by pablinga19
1.0858PR #1662by pablinga19
1.0862PR #1667by MarioPaerleRECORD
1.0714PR #1670by dexhunter
1.0597PR #1672by andrewbaggio1
1.0119PR #1676by aazizyan
1.0788PR #1688by Buld1n
1.0809PR #1689by chris-colinsky
1.0822PR #1691by AVINASH0052
1.2244PR #1693by dexhunter
1.0573PR #1695by X-Abhishek-X
1.0759PR #1696by kings-crown
1.1224PR #1699by lsb
1.4831PR #1714by Anakintano
1.0857PR #1715by G3sparky
1.0809PR #1716by himanshudongre
1.0788PR #1720by kiyoaki
1.0818PR #1722by deborahnelson8788726
0.6580PR #1724by Unwindology
1.1803PR #1731by Victory963
1.0785PR #1737by sakthivarshans
1.0723PR #1749by gracebml
1.0996PR #1755by OE-GOD
1.0746PR #1759by yijieyuan
1.0799PR #1769by dexhunterRECORD
1.0645PR #1770by liujshi
1.0796PR #1771by bigbag
1.0651PR #1776by anmarhindi
1.0808PR #1780by wisebreadloaf
1.0806PR #1783by ismailntl
1.1716PR #1787by nprime06RECORD
1.0638PR #1791by genji0306
1.0339PR #1794by Programmerryoki
1.0849PR #1797by dexhunter
1.0616PR #1798by leon2k2k2k
1.0629PR #1799by jamesEmerson112
1.2073PR #1800by leon2k2k2k
1.0629PR #1801by leon2k2k2k
1.0629PR #1802by aamodbhatt
1.0771PR #1809by PranavViswanath
1.0800PR #1811by peytontolbert
1.2350PR #1812by EthanNing
1.0729PR #1816by JiaJunDeng5930
1.3915PR #1817by Tonyy1977
1.0903PR #1820by aiejvn
1.4011PR #1822by Unwindology
1.1785PR #1825by EthanYangTW
1.0770PR #1826by EthanYangTW
1.0770PR #1828by 5en5e1
1.1169PR #1845by AlirezaAlampour
1.3095PR #1850by someone114514
1.0050PR #1851by aquariouseworkmanRECORD
1.0613PR #1852by G3sparky
1.0282PR #1854by ndokutovich
0.9024PR #1855by codemath3000RECORD
1.0611PR #1857by dexhunter
1.0322PR #1858by G3sparky
0.9946PR #1859by suchihype
1.1454PR #1876by Meirzhan05
1.0801PR #1880by Meirzhan05
1.0775PR #1881by ndokutovich
0.9019PR #1886by renqianluo
1.0696PR #1891by peytontolbert
1.2205PR #1895by VFYAS
1.0785PR #1897by Blitzo125
1.1636PR #1906by AayushBaniya2006
1.0614PR #1914by Fija
1.0612PR #1917by Blitzo125
1.1636PR #1919by dev-pratap-singh
1.0587PR #1923by jorge-asenjo
1.0658PR #1924by dexhunter
1.0608PR #1926by bigbag
1.0684PR #1934by liujshi
1.0599PR #1936by hilbertmeng
1.0769PR #1938by lijuncheng16
1.0713PR #1945by alertcatRECORD
1.0593PR #1948by TimS-ml
1.0624PR #1953by andrewbaggio1RECORD
1.0586PR #1956by AayushBaniya2006
1.0604PR #1957by mhlov000111
1.2313PR #1958by okezue
1.0135PR #1962by chris-colinsky
1.0631PR #1965by himanshudongre
1.0587PR #1966by renqianluo
1.0690PR #1967by ndokutovich
1.0585PR #1969by bsisduck
1.0804PR #1974by harborglowvintage-oss
1.2193PR #1975by RishabhPrakash5
1.2111PR #1978by EthanYangTW
1.0778PR #1980by Kbediako
1.0738PR #1992by jamesEmerson112
1.0511PR #2005by jamesEmerson112
1.0805PR #2006by Elubrazione
1.0590PR #2007by Elubrazione
1.0590PR #2013by Wilbatronic
1.0543PR #2018by simon-marcus
1.0462PR #2019by aquariouseworkman
1.0585PR #2026by RahimMirani
1.0611PR #2027by H1cSuNtDr4C0n3S
1.0806PR #2028by Arnie016
1.0898PR #2031by deborahnelson8788726
1.0599PR #2037by organic-intelligence-1976
1.2670PR #2048by kineticforge
1.3551PR #2051by dexhunter
1.0605PR #2060by S0urC10ud
1.0579PR #2062by BumaldaOverTheWater94
1.2195PR #2068by jayaram1125
1.0617PR #2071by jamesEmerson112
1.0066PR #2085by umshahid
1.0857PR #2086by deniskurlov
1.1384PR #2088by MaxIv25
1.0744PR #2089by AlirezaAlampour
1.2093Hyperparameters Across PRs
| pr_number | parameters |
|---|---|
| 487 | {"value":30} |
| 871 | {"value":30} |
| 890 | {"value":30} |
| 896 | {"cap":30} |
| 903 | {"cap":15} |
| 913 | {"value":30} |
| 913 | {"value":30} |
| 915 | {"scale":30} |
| 918 | {"value":30} |
| 920 | {"type":"poly","value":10} |
| 921 | {"value":20} |
| 922 | {"value":30} |
| 923 | {"value":10} |
| 960 | {"value":30} |
| 961 | {"value":30} |
| 969 | {"degree":5,"cap":30} |
| 979 | {"value":30} |
| 1007 | {"cap":30} |
| 1027 | {"softcap":30} |
| 1030 | {"value":30} |
| 1056 | {"value":30} |
| 1065 | {"value":30} |
| 1068 | {"value":30} |
| 1072 | {"value":30} |
| 1096 | {"description":"capped timestep scaling / clamped scale vectors"} |
| 1097 | — |
| 1108 | {"value":30} |
| 1112 | {"threshold":30} |
| 1123 | {"value":15} |
| 1125 | {"value":20} |
| 1126 | {"softcap":30} |
| 1152 | {"cap":30} |
| 1170 | {"value":30} |
| 1180 | {"value":15} |
| 1209 | — |
| 1227 | — |
| 1229 | — |
| 1232 | {"value":30} |
| 1237 | {"value":30} |
| 1240 | — |
| 1246 | {"z_loss":0.0001} |
| 1247 | {"value":30} |
| 1252 | — |
| 1263 | {"softcap":30} |
| 1268 | — |
| 1291 | — |
| 1293 | — |
| 1299 | — |
| 1300 | — |
| 1311 | {"value":30} |
| 1318 | {"delta_clip":5} |
| 1319 | {"value":30} |
| 1322 | {"value":30} |
| 1325 | {"type":"poly5"} |
| 1325 | {"type":"z-loss","weight":0.0001} |
| 1330 | {"value":30} |
| 1335 | {"value":8} |
| 1337 | {"cap":30} |
| 1342 | {"value":30} |
| 1361 | {"value":30} |
| 1384 | {"value":30} |
| 1388 | {"value":10} |
| 1410 | {"value":30} |
| 1418 | {"softcap":15} |
| 1420 | {"value":30} |
| 1421 | {"value":30} |
| 1425 | — |
| 1431 | {"cap":30} |
| 1434 | {"value":15} |
| 1435 | {"value":30} |
| 1440 | {"value":30} |
| 1442 | — |
| 1445 | {"value":30} |
| 1453 | {"value":30} |
| 1456 | {"value":15} |
| 1471 | {"value":30} |
| 1472 | {"value":15} |
| 1484 | {"cap":30,"activation":"tanh"} |
| 1486 | {"cap":30,"activation":"tanh"} |
| 1492 | {"value":30} |
| 1493 | {"value":30} |
| 1512 | {"value":30} |
| 1515 | {"value":30} |
| 1520 | {"value":30} |
| 1523 | {"value":30} |
| 1527 | {"value":12} |
| 1536 | {"value":30} |
| 1539 | {"value":30} |
| 1541 | {"value":30} |
| 1544 | {"value":30} |
| 1545 | {"value":30} |
| 1546 | {"value":30} |
| 1550 | {"value":30} |
| 1553 | {"value":30} |
| 1555 | {"value":30} |
| 1560 | {"value":30} |
| 1561 | {"value":30} |
| 1565 | {"value":30} |
| 1570 | {"value":20} |
| 1572 | {"qk_gain":5.25} |
| 1573 | {"value":30} |
| 1581 | {"softcap":30} |
| 1583 | {"value":30} |
| 1584 | {"value":30} |
| 1585 | {"value":30} |
| 1586 | {"value":30} |
| 1589 | {"value":30} |
| 1606 | {"cap":30} |
| 1616 | {"value":30} |
| 1619 | {"value":30} |
| 1632 | {"value":30} |
| 1643 | — |
| 1658 | {"value":30} |
| 1660 | {"value":30} |
| 1662 | {"sdclip":12.85} |
| 1667 | {"value":30} |
| 1670 | {"value":30} |
| 1672 | {"value":30} |
| 1676 | {"value":30} |
| 1688 | {"value":30} |
| 1689 | {"value":30} |
| 1691 | — |
| 1693 | {"value":30} |
| 1695 | {"parallel_lambda_asym":0} |
| 1696 | {"value":30} |
| 1699 | {"value":30} |
| 1714 | {"value":30} |
| 1715 | {"value":30} |
| 1716 | {"value":30} |
| 1720 | {"value":30} |
| 1722 | {"value":30} |
| 1724 | {"value":30} |
| 1731 | {"value":30} |
| 1737 | {"value":30} |
| 1749 | {"value":30} |
| 1755 | {"cap":30} |
| 1759 | {"value":30} |
| 1769 | {"value":30} |
| 1770 | {"value":30} |
| 1771 | {"value":30} |
| 1776 | {"value":30} |
| 1780 | {"value":30} |
| 1783 | {"value":30} |
| 1787 | {"training_only":true} |
| 1791 | {"value":30} |
| 1794 | {"value":30} |
| 1797 | {"value":30} |
| 1798 | {"fused_softcapped_ce":true} |
| 1799 | {"value":30} |
| 1800 | — |
| 1801 | — |
| 1802 | {"value":30} |
| 1809 | {"value":30} |
| 1811 | {"value":30} |
| 1812 | {"value":30} |
| 1816 | {"sigma":15} |
| 1817 | {"value":30} |
| 1820 | {"value":30} |
| 1822 | {"value":30} |
| 1825 | {"value":30} |
| 1826 | {"value":30} |
| 1828 | {"value":30} |
| 1845 | {"softcap":30} |
| 1850 | {"value":30} |
| 1851 | {"value":30} |
| 1852 | {"value":30} |
| 1854 | {"value":30} |
| 1855 | {"value":30} |
| 1857 | {"value":30} |
| 1858 | {"value":30} |
| 1859 | {"value":30} |
| 1876 | {"value":30} |
| 1880 | {"cap":15} |
| 1881 | {"value":30} |
| 1886 | — |
| 1891 | {"value":30} |
| 1895 | {"value":20} |
| 1897 | {"value":30} |
| 1906 | — |
| 1914 | {"value":30} |
| 1917 | {"value":30} |
| 1919 | {"value":15} |
| 1923 | {"asymmetric":true,"softcap_pos_init":30,"softcap_neg_init":30} |
| 1924 | {"logit_softcap":30} |
| 1926 | — |
| 1934 | {"value":30} |
| 1936 | {"value":30} |
| 1938 | {"value":30} |
| 1945 | {"asymmetric":true,"pos":"softcap_pos","neg":"softcap_neg"} |
| 1948 | {"value":30} |
| 1953 | {"asymmetric":true} |
| 1956 | — |
| 1957 | {"activation":"tanh"} |
| 1958 | {"value":15} |
| 1962 | {"value":30} |
| 1965 | — |
| 1966 | — |
| 1967 | {"asymmetric_logit_rescale":true} |
| 1969 | {"value":30} |
| 1974 | {"value":20} |
| 1975 | {"value":30} |
| 1978 | {"value":30} |
| 1980 | {"value":15} |
| 1992 | {"value":30} |
| 2005 | {"value":30} |
| 2006 | {"asymmetric_logit_rescale":true} |
| 2007 | {"asymmetric_logit_rescale":true} |
| 2013 | {"temperature":1.02} |
| 2018 | {"asym_logit_rescale":true} |
| 2019 | {"asymmetric":true} |
| 2026 | {"value":30} |
| 2027 | {"value":30} |
| 2028 | — |
| 2031 | — |
| 2037 | {"value":30} |
| 2048 | {"softcap":30,"mode":"tanh"} |
| 2051 | {"asymmetric":true} |
| 2060 | {"asym_logit_rescale":true} |
| 2062 | {"matrix_clip_sigmas":12.85,"embed_clip_sigmas":20} |
| 2068 | {"value":30} |
| 2071 | {"value":30} |
| 2085 | {"value":30} |
| 2086 | {"type":"poly5","cap":10} |
| 2088 | {"value":30} |
| 2089 | {"softcap":30} |