← Back to LR Schedule
cosine decay
LR ScheduleUsed in
116 PRs
Best BPB
0.1003
Avg BPB
1.0511
Submissions
PR #435by rthgit
1.6130PR #473by abaybektursun
1.1214PR #481by mrdavtan
1.0970PR #508by newjordan
1.1215PR #509by andrewbaggio1
1.1175PR #517by lukacf
0.9789PR #518by sofiabod
1.0622PR #528by EthanYangTW
1.1195PR #532by NotADevIAmaMeatPopsicle
1.0487PR #537by Christopher-Lee-McClendon
1.1387PR #545by EthanYangTW
1.1179PR #549by abaybektursunRECORD
1.1194PR #562by bigbag
1.1354PR #568by MatoTeziTanka
0.7853PR #573by Sarimsaljook
1.0523PR #578by newjordan
1.1215PR #581by teddyoweh
1.0698PR #585by EthanYangTW
1.1179PR #589by RoyiRa
1.1178PR #596by AriaAnima
0.6430PR #606by EthanYangTW
1.1162PR #607by Neopolita
1.4750PR #611by teddyoweh
0.5601PR #614by bigbag
0.6864PR #642by minh-stakc
0.8173PR #653by demirelo
1.1552PR #661by andrewbaggio1
1.1175PR #672by andrewbaggio1
1.0781PR #685by andrewbaggio1
1.0366PR #688by RoyiRa
1.0745PR #691by xexyz
1.0988PR #702by lukacf
1.0244PR #714by Upsalla
1.1187PR #720by agalimova
1.1078PR #726by DeepReinforce
1.1147PR #741by andrewbaggio1
0.9850PR #752by Naazimsnh02
1.1182PR #771by sunnypatneedi
1.0705PR #809by AayushBaniya2006
0.2952PR #826by himanshudongre
0.2951PR #834by AnirudhRahul
0.1663PR #838by aryanbhosale
1.1215PR #857by aruniyer
1.1093PR #880by RoyiRa
0.1003PR #885by lolrazh
0.9958PR #914by mkenney2
1.1873PR #953by dexhunter
1.0722PR #960by ADIITJ
1.1882PR #967by dexhunter
1.0450PR #991by ibarrajo
1.1145PR #994by singhaikshitijjain
1.4315PR #995by dexhunter
1.0362PR #999by aamodbhatt
1.1179PR #1039by yufengli-oai
1.1184PR #1092by teddyoweh
1.1219PR #1127by dentity007
1.1311PR #1148by aamodbhatt
1.1179PR #1186by andrewbaggio1
0.9850PR #1202by VirajDeshwal
1.1412PR #1218by clarkkevRECORD
1.0978PR #1227by himanshudongre
1.4841PR #1229by resouer
0.9300PR #1230by nestamidavaine
1.1163PR #1231by nestamidavaine
1.1163PR #1235by maksblu
1.3527PR #1239by tmancino
1.5918PR #1257by BoxiYu
1.0855PR #1269by Jtss-ux
1.1194PR #1280by aamodbhatt
1.1156PR #1306by resouer
1.0846PR #1313by anthony-maio
0.8637PR #1320by jpfeiffe
1.1196PR #1321by anthony-maio
0.7406PR #1324by yahya010
0.8275PR #1328by renqianluo
0.6361PR #1341by himanshudongre
1.1000PR #1370by Christopher-Lee-McClendon
1.0030PR #1380by ranausmanai
1.1567PR #1388by CiprianFlorin-Ifrim
1.5390PR #1400by tmancino
1.1035PR #1406by aamodbhatt
1.0887PR #1408by aamodbhatt
1.0800PR #1423by aryanbhosale
1.0791PR #1430by renqianluo
0.3964PR #1431by Idan3011
1.1266PR #1434by ranausmanai
1.5207PR #1457by DilpreetBansi
1.1454PR #1460by resouer
1.0827PR #1485by ndokutovich
1.0679PR #1487by ndokutovich
1.0600PR #1488by ndokutovich
0.8265PR #1489by joshkmartinez
1.0736PR #1492by bigbag
1.0810PR #1493by bigbagRECORD
1.0810PR #1501by SPThole
1.1159PR #1502by SPThole
1.1147PR #1517by RulinShao
1.0632PR #1532by nogakeren
1.0803PR #1539by translatingthename
1.0587PR #1544by Abhishek8108
1.0283PR #1545by Abhishek8108
1.0283PR #1550by translatingthename
1.0587PR #1553by Abhishek8108
1.2097PR #1580by liveyourday
1.2286PR #1627by mike-ferguson
1.3246PR #1636by PapaFranku4647
1.2299PR #1696by kings-crown
1.1224PR #1698by arsenis-cmd
1.0099PR #1711by aamodbhatt
1.0098PR #1714by Anakintano
1.0857PR #1722by deborahnelson8788726
0.6580PR #1731by Victory963
1.0785PR #1735by AjAnubolu
1.0429PR #1737by sakthivarshans
1.0723PR #1738by alertcat
1.0354PR #1758by kilojoules
1.0277Hyperparameters Across PRs
| pr_number | parameters |
|---|---|
| 435 | {"warmup":true} |
| 473 | {"across_chunks":true} |
| 481 | {"epochs":30} |
| 508 | {"over_actual_training_window":true,"chunks":200} |
| 509 | — |
| 517 | {"t_max":100,"eta_min":0.00001} |
| 518 | {"epochs":50,"formula":"lr *= 0.5 * (1 + cos(pi * progress))"} |
| 528 | — |
| 532 | {"ttt_epochs":10} |
| 537 | {"intra_chunk":true,"inter_chunk":true,"formula":"0.5 × (1 + cos(π × step / total_steps))"} |
| 545 | {"across_chunks":true} |
| 549 | {"warmdown_steps":3500} |
| 562 | {"per_step":true,"decay_to":0} |
| 568 | {"start_lr":0.01,"end_lr":0.001,"epochs":5} |
| 573 | {"to_zero":true,"warmdown_steps":3500} |
| 578 | {"fixed_window":200} |
| 581 | {"start_lr":0.0005,"end_lr":0.00002,"warmup_epochs":1} |
| 585 | {"across_chunks":true} |
| 589 | {"learning_rate":0.002,"applied_to":"TTT across chunks"} |
| 596 | {"min_lr_fraction":0.1,"within_ttt":true} |
| 606 | — |
| 607 | — |
| 611 | {"ttt_epochs":8} |
| 614 | {"per_step":true} |
| 642 | {"across_total_ttt_steps":true} |
| 653 | — |
| 661 | {"ttt_epochs":30} |
| 672 | {"phase":"TTT","epochs":30} |
| 685 | {"epochs":20} |
| 688 | {"adaptive_lr_max_mult":3} |
| 691 | {"epochs":30} |
| 702 | {"scheduler":"CosineAnnealingLR","t_max":100,"eta_min":0.00001} |
| 714 | {"warmdown_steps":3500} |
| 720 | {"warmdown":true} |
| 726 | {"across_chunks":true} |
| 741 | — |
| 752 | {"across_chunks":true} |
| 771 | {"epochs":30,"final_lr":0} |
| 809 | {"warmdown_steps":3500} |
| 826 | — |
| 834 | {"across_chunks":true} |
| 838 | {"warmdown_steps":3500} |
| 857 | {"phase":"test-time training","epochs":20} |
| 880 | {"adaptive":true} |
| 885 | {"across_chunks":true} |
| 914 | {"warmdown_iters":3000} |
| 953 | {"within_ttt":true} |
| 960 | {"start_lr":0.008,"end_lr":0.00001} |
| 967 | {"within_ttt":true} |
| 991 | — |
| 994 | {"warmup":true,"warmdown":true} |
| 995 | — |
| 999 | {"warmdown_steps":3500} |
| 1039 | {"ttt":true} |
| 1092 | {"warmdown_steps":3500} |
| 1127 | — |
| 1148 | {"warmdown_steps":3500} |
| 1186 | — |
| 1202 | {"applied_to":"TTT chunks"} |
| 1218 | — |
| 1227 | — |
| 1229 | {"start_lr":0.008,"end_lr":0.0008,"steps":16} |
| 1230 | {"used_for":"TTT"} |
| 1231 | {"used_for_ttt":true} |
| 1235 | {"warmup_steps":2,"main_scale":0.5,"min_scale":0.05,"gamma":0.8} |
| 1239 | — |
| 1257 | — |
| 1269 | {"warmdown_steps":3500} |
| 1280 | {"warmdown_iters":3500} |
| 1306 | — |
| 1313 | {"start_lr":0.012,"end_lr":0.001} |
| 1320 | {"per_chunk":true} |
| 1321 | {"start_lr":0.012,"end_lr":0.001} |
| 1324 | {"start":0.012,"end":0.001} |
| 1328 | {"start_lr":0.024,"end_lr":0.001} |
| 1341 | {"chunk_size":32768} |
| 1370 | — |
| 1380 | {"min_lr_frac":0.1} |
| 1388 | — |
| 1400 | {"epochs":10} |
| 1406 | {"applied_to":"TTT"} |
| 1408 | {"warmdown_steps":4000} |
| 1423 | — |
| 1430 | {"lr_start":0.432,"lr_end":0.001} |
| 1431 | {"used_for":"TTT","lr":0.003} |
| 1434 | — |
| 1457 | — |
| 1460 | — |
| 1485 | — |
| 1487 | — |
| 1488 | {"start":0.012,"end":0.001} |
| 1489 | — |
| 1492 | — |
| 1493 | — |
| 1501 | {"warmdown_start_step":2200} |
| 1502 | {"phase":"warmdown"} |
| 1517 | {"ttt":true} |
| 1532 | {"warmdown":0.72} |
| 1539 | {"final_lr_factor":0.1} |
| 1544 | {"warmup_steps":100} |
| 1545 | {"warmup_steps":100} |
| 1550 | {"final_multiplier":0.1} |
| 1553 | {"warmup_steps":100,"constant_after_warmup":true} |
| 1580 | {"warmup_steps":64,"decay_start_frac":0.65,"min_scale":0.15} |
| 1627 | — |
| 1636 | {"decay_steps":3000} |
| 1696 | {"applied_to":"TTT"} |
| 1698 | {"applied_to":"TTT learning rate"} |
| 1711 | — |
| 1714 | {"warmup_steps":20,"warmdown_fraction":0.72} |
| 1722 | {"start_lr":0.024,"end_lr":0.001} |
| 1731 | {"applied_to":"TTT"} |
| 1735 | {"scope":"epoch-level","t_max":21,"eta_min_ratio":0.1} |
| 1737 | {"across_chunks":true} |
| 1738 | {"scope":"epoch-level","t_max":21,"eta_min_ratio":0.1} |
| 1758 | {"epochs":21} |