← Back to LR Schedule

cosine decay

LR Schedule
Used in
116 PRs
Best BPB
0.1003
Avg BPB
1.0511

Submissions

PR #435by rthgit
1.6130
PR #473by abaybektursun
1.1214
PR #481by mrdavtan
1.0970
PR #508by newjordan
1.1215
PR #509by andrewbaggio1
1.1175
PR #517by lukacf
0.9789
PR #518by sofiabod
1.0622
PR #528by EthanYangTW
1.1195
PR #532by NotADevIAmaMeatPopsicle
1.0487
PR #537by Christopher-Lee-McClendon
1.1387
PR #545by EthanYangTW
1.1179
PR #549by abaybektursunRECORD
1.1194
PR #562by bigbag
1.1354
PR #568by MatoTeziTanka
0.7853
PR #573by Sarimsaljook
1.0523
PR #578by newjordan
1.1215
PR #581by teddyoweh
1.0698
PR #585by EthanYangTW
1.1179
PR #589by RoyiRa
1.1178
PR #596by AriaAnima
0.6430
PR #606by EthanYangTW
1.1162
PR #607by Neopolita
1.4750
PR #611by teddyoweh
0.5601
PR #614by bigbag
0.6864
PR #642by minh-stakc
0.8173
PR #653by demirelo
1.1552
PR #661by andrewbaggio1
1.1175
PR #672by andrewbaggio1
1.0781
PR #685by andrewbaggio1
1.0366
PR #688by RoyiRa
1.0745
PR #691by xexyz
1.0988
PR #702by lukacf
1.0244
PR #714by Upsalla
1.1187
PR #720by agalimova
1.1078
PR #726by DeepReinforce
1.1147
PR #741by andrewbaggio1
0.9850
PR #752by Naazimsnh02
1.1182
PR #771by sunnypatneedi
1.0705
PR #809by AayushBaniya2006
0.2952
PR #826by himanshudongre
0.2951
PR #834by AnirudhRahul
0.1663
PR #838by aryanbhosale
1.1215
PR #857by aruniyer
1.1093
PR #880by RoyiRa
0.1003
PR #885by lolrazh
0.9958
PR #914by mkenney2
1.1873
PR #953by dexhunter
1.0722
PR #960by ADIITJ
1.1882
PR #967by dexhunter
1.0450
PR #991by ibarrajo
1.1145
PR #994by singhaikshitijjain
1.4315
PR #995by dexhunter
1.0362
PR #999by aamodbhatt
1.1179
PR #1039by yufengli-oai
1.1184
PR #1092by teddyoweh
1.1219
PR #1127by dentity007
1.1311
PR #1148by aamodbhatt
1.1179
PR #1186by andrewbaggio1
0.9850
PR #1202by VirajDeshwal
1.1412
PR #1218by clarkkevRECORD
1.0978
PR #1227by himanshudongre
1.4841
PR #1229by resouer
0.9300
PR #1230by nestamidavaine
1.1163
PR #1231by nestamidavaine
1.1163
PR #1235by maksblu
1.3527
PR #1239by tmancino
1.5918
PR #1257by BoxiYu
1.0855
PR #1269by Jtss-ux
1.1194
PR #1280by aamodbhatt
1.1156
PR #1306by resouer
1.0846
PR #1313by anthony-maio
0.8637
PR #1320by jpfeiffe
1.1196
PR #1321by anthony-maio
0.7406
PR #1324by yahya010
0.8275
PR #1328by renqianluo
0.6361
PR #1341by himanshudongre
1.1000
PR #1370by Christopher-Lee-McClendon
1.0030
PR #1380by ranausmanai
1.1567
PR #1388by CiprianFlorin-Ifrim
1.5390
PR #1400by tmancino
1.1035
PR #1406by aamodbhatt
1.0887
PR #1408by aamodbhatt
1.0800
PR #1423by aryanbhosale
1.0791
PR #1430by renqianluo
0.3964
PR #1431by Idan3011
1.1266
PR #1434by ranausmanai
1.5207
PR #1457by DilpreetBansi
1.1454
PR #1460by resouer
1.0827
PR #1485by ndokutovich
1.0679
PR #1487by ndokutovich
1.0600
PR #1488by ndokutovich
0.8265
PR #1489by joshkmartinez
1.0736
PR #1492by bigbag
1.0810
PR #1493by bigbagRECORD
1.0810
PR #1501by SPThole
1.1159
PR #1502by SPThole
1.1147
PR #1517by RulinShao
1.0632
PR #1532by nogakeren
1.0803
PR #1539by translatingthename
1.0587
PR #1544by Abhishek8108
1.0283
PR #1545by Abhishek8108
1.0283
PR #1550by translatingthename
1.0587
PR #1553by Abhishek8108
1.2097
PR #1580by liveyourday
1.2286
PR #1627by mike-ferguson
1.3246
PR #1636by PapaFranku4647
1.2299
PR #1696by kings-crown
1.1224
PR #1698by arsenis-cmd
1.0099
PR #1711by aamodbhatt
1.0098
PR #1714by Anakintano
1.0857
PR #1722by deborahnelson8788726
0.6580
PR #1731by Victory963
1.0785
PR #1735by AjAnubolu
1.0429
PR #1737by sakthivarshans
1.0723
PR #1738by alertcat
1.0354
PR #1758by kilojoules
1.0277

Hyperparameters Across PRs

pr_numberparameters
435{"warmup":true}
473{"across_chunks":true}
481{"epochs":30}
508{"over_actual_training_window":true,"chunks":200}
509
517{"t_max":100,"eta_min":0.00001}
518{"epochs":50,"formula":"lr *= 0.5 * (1 + cos(pi * progress))"}
528
532{"ttt_epochs":10}
537{"intra_chunk":true,"inter_chunk":true,"formula":"0.5 × (1 + cos(π × step / total_steps))"}
545{"across_chunks":true}
549{"warmdown_steps":3500}
562{"per_step":true,"decay_to":0}
568{"start_lr":0.01,"end_lr":0.001,"epochs":5}
573{"to_zero":true,"warmdown_steps":3500}
578{"fixed_window":200}
581{"start_lr":0.0005,"end_lr":0.00002,"warmup_epochs":1}
585{"across_chunks":true}
589{"learning_rate":0.002,"applied_to":"TTT across chunks"}
596{"min_lr_fraction":0.1,"within_ttt":true}
606
607
611{"ttt_epochs":8}
614{"per_step":true}
642{"across_total_ttt_steps":true}
653
661{"ttt_epochs":30}
672{"phase":"TTT","epochs":30}
685{"epochs":20}
688{"adaptive_lr_max_mult":3}
691{"epochs":30}
702{"scheduler":"CosineAnnealingLR","t_max":100,"eta_min":0.00001}
714{"warmdown_steps":3500}
720{"warmdown":true}
726{"across_chunks":true}
741
752{"across_chunks":true}
771{"epochs":30,"final_lr":0}
809{"warmdown_steps":3500}
826
834{"across_chunks":true}
838{"warmdown_steps":3500}
857{"phase":"test-time training","epochs":20}
880{"adaptive":true}
885{"across_chunks":true}
914{"warmdown_iters":3000}
953{"within_ttt":true}
960{"start_lr":0.008,"end_lr":0.00001}
967{"within_ttt":true}
991
994{"warmup":true,"warmdown":true}
995
999{"warmdown_steps":3500}
1039{"ttt":true}
1092{"warmdown_steps":3500}
1127
1148{"warmdown_steps":3500}
1186
1202{"applied_to":"TTT chunks"}
1218
1227
1229{"start_lr":0.008,"end_lr":0.0008,"steps":16}
1230{"used_for":"TTT"}
1231{"used_for_ttt":true}
1235{"warmup_steps":2,"main_scale":0.5,"min_scale":0.05,"gamma":0.8}
1239
1257
1269{"warmdown_steps":3500}
1280{"warmdown_iters":3500}
1306
1313{"start_lr":0.012,"end_lr":0.001}
1320{"per_chunk":true}
1321{"start_lr":0.012,"end_lr":0.001}
1324{"start":0.012,"end":0.001}
1328{"start_lr":0.024,"end_lr":0.001}
1341{"chunk_size":32768}
1370
1380{"min_lr_frac":0.1}
1388
1400{"epochs":10}
1406{"applied_to":"TTT"}
1408{"warmdown_steps":4000}
1423
1430{"lr_start":0.432,"lr_end":0.001}
1431{"used_for":"TTT","lr":0.003}
1434
1457
1460
1485
1487
1488{"start":0.012,"end":0.001}
1489
1492
1493
1501{"warmdown_start_step":2200}
1502{"phase":"warmdown"}
1517{"ttt":true}
1532{"warmdown":0.72}
1539{"final_lr_factor":0.1}
1544{"warmup_steps":100}
1545{"warmup_steps":100}
1550{"final_multiplier":0.1}
1553{"warmup_steps":100,"constant_after_warmup":true}
1580{"warmup_steps":64,"decay_start_frac":0.65,"min_scale":0.15}
1627
1636{"decay_steps":3000}
1696{"applied_to":"TTT"}
1698{"applied_to":"TTT learning rate"}
1711
1714{"warmup_steps":20,"warmdown_fraction":0.72}
1722{"start_lr":0.024,"end_lr":0.001}
1731{"applied_to":"TTT"}
1735{"scope":"epoch-level","t_max":21,"eta_min_ratio":0.1}
1737{"across_chunks":true}
1738{"scope":"epoch-level","t_max":21,"eta_min_ratio":0.1}
1758{"epochs":21}