← Back to Architecture
Partial RoPE
ArchitectureUsed in
335 PRs
Best BPB
0.0180
Avg BPB
1.0628
Submissions
PR #64by yesbhautik
1.1250PR #175by anthony-maio
1.1229PR #218by bopmite
1.1248PR #315by jfprincz
1.1248PR #327by Ananddna
1.1450PR #330by bopmite
1.1609PR #332by saml212
1.1320PR #334by nathon-lee
1.2207PR #344by aryanbhosale
1.1330PR #351by sp00mm
1.1659PR #352by sp00mm
1.1659PR #356by sjp611
1.8338PR #371by mrdavtan
1.1401PR #374by unnirRECORD
1.1246PR #376by anthony-maio
1.1399PR #383by joelnishanth
1.1320PR #388by ElliotSlusky
1.1231PR #389by trasnake87
1.1466PR #397by translatingthename
1.1364PR #398by felipe-parodi
1.1213PR #399by abaybektursun
1.1247PR #400by chanwoo-park-official
1.1296PR #401by newjordan
1.1243PR #410by EthanYangTW
1.1216PR #413by anantdgoel
1.4525PR #414by signalrush
1.1233PR #415by EthanYangTW
1.1216PR #417by EthanYangTW
1.1227PR #418by yashverms
1.1715PR #434by parinzee
1.1370PR #445by newjordan
1.1236PR #452by ofirkris
1.1366PR #453by Divyesh-Thirukonda
1.1248PR #455by kasimte
1.1299PR #458by ofirkris
1.1365PR #461by Christopher-Lee-McClendon
1.1446PR #462by JoeProAI
1.0672PR #469by cmcdnd
1.1418PR #473by abaybektursun
1.1214PR #477by harsha-gouru
1.1522PR #478by gowtham0992
1.1268PR #481by mrdavtan
1.0970PR #482by harsha-gouru
1.1522PR #485by harsha-gouru
1.1522PR #486by ndokutovich
1.1101PR #487by anantdgoel
1.1720PR #489by sofiabod
1.1327PR #492by Divyesh-Thirukonda
1.1591PR #493by parinzee
1.1309PR #498by newjordan
1.1478PR #499by newjordan
1.1478PR #503by EthanYangTW
1.1195PR #505by JoeProAI
1.1181PR #507by skarakulak
1.1558PR #508by newjordan
1.1215PR #509by andrewbaggio1
1.1175PR #516by Asukabot0
1.1428PR #518by sofiabod
1.0622PR #526by Christopher-Lee-McClendon
1.1425PR #528by EthanYangTW
1.1195PR #529by EthanYangTW
1.1195PR #532by NotADevIAmaMeatPopsicle
1.0487PR #533by newjordan
1.1207PR #534by rarce
1.1804PR #535by raahilshah
1.1204PR #537by Christopher-Lee-McClendon
1.1387PR #543by rarce
1.1804PR #545by EthanYangTW
1.1179PR #549by abaybektursunRECORD
1.1194PR #564by sadeghja1070
1.1270PR #573by Sarimsaljook
1.0523PR #576by cmcdnd
1.1164PR #577by newjordan
1.1207PR #585by EthanYangTW
1.1179PR #586by EaCognitive
1.1365PR #592by Skytuhua
1.1476PR #593by abaybektursun
1.1163PR #598by Christopher-Lee-McClendon
1.1334PR #601by anantdgoel
1.1418PR #606by EthanYangTW
1.1162PR #609by saml212
1.1154PR #612by Christopher-Lee-McClendon
1.1079PR #634by raahilshah
1.1171PR #635by aryanbhosale
1.1330PR #638by Asukabot0
1.1164PR #642by minh-stakc
0.8173PR #644by Christopher-Lee-McClendon
1.0944PR #645by FlynnCruse
1.8990PR #653by demirelo
1.1552PR #657by anthony-maio
1.1234PR #661by andrewbaggio1
1.1175PR #668by Christopher-Lee-McClendon
1.0920PR #672by andrewbaggio1
1.0781PR #682by gthgomez
1.1233PR #685by andrewbaggio1
1.0366PR #688by RoyiRa
1.0745PR #690by EthanYangTW
1.1186PR #692by EthanYangTW
1.1186PR #693by EthanYangTW
1.1186PR #695by 0xNoramiya
1.1360PR #698by hesong0222-dev
1.1642PR #703by Gusanidas
1.1176PR #710by Dhruba531
1.1240PR #714by Upsalla
1.1187PR #715by Asukabot0
1.0337PR #720by agalimova
1.1078PR #726by DeepReinforce
1.1147PR #727by Asukabot0
0.9674PR #728by abaybektursun
1.1142PR #734by Robby955
1.1198PR #740by resouer
1.0909PR #741by andrewbaggio1
0.9850PR #752by Naazimsnh02
1.1182PR #754by aryanbhosale
1.1253PR #761by Asukabot0
0.9581PR #768by mradassaad
1.1201PR #770by minh-stakc
0.6672PR #774by travispchen
0.9370PR #778by raahilshah
0.9605PR #779by deanbrr
0.6683PR #786by shinegami-2002
0.8128PR #794by jeremyschied
1.3346PR #796by Robby955
0.6567PR #802by Bortlesboat
0.9123PR #808by Naazimsnh02
0.6364PR #809by AayushBaniya2006
0.2952PR #816by jimliu741523
1.1194PR #826by himanshudongre
0.2951PR #827by Programmerryoki
1.3999PR #828by bigbag
0.9076PR #832by jfprincz
1.1903PR #836by autocode-rayes
1.1219PR #838by aryanbhosale
1.1215PR #841by someone114514
1.1157PR #849by dttdrv
1.1105PR #857by aruniyer
1.1093PR #864by aryanbhosale
0.2841PR #865by aryanbhosale
0.2841PR #871by greqone
0.8004PR #872by gowtham0992
1.0467PR #876by Bortlesboat
0.5863PR #887by anthony-maio
0.9642PR #889by anthony-maio
0.9642PR #890by sofiabod
0.4405PR #891by robbiebusinessacc
1.1428PR #892by robbiebusinessacc
1.1428PR #893by aryanbhosale
0.1310PR #896by MVPandey
1.1896PR #908by albertorkive
1.1734PR #909by sunnypatneedi
0.8609PR #912by Bortlesboat
0.3461PR #915by anthony-maio
0.9642PR #916by Bortlesboat
0.3461PR #918by haikosys
0.1653PR #921by TimPietrusky
0.0939PR #922by greqone
0.0972PR #926by NandhuRajRK
0.8705PR #932by anthony-maio
1.1580PR #937by mihir-s-05
1.4457PR #941by aptsalt
1.3620PR #945by TimPietrusky
0.0274PR #952by FlashyFlash3011
1.1144PR #953by dexhunter
1.0722PR #961by callithyia
0.0881PR #963by sunnypatneedi
0.8609PR #964by vivekvar-dl
1.3900PR #967by dexhunter
1.0450PR #974by anthony-maio
1.6542PR #975by Abhishek8108
1.1216PR #986by sofiabod
0.0830PR #991by ibarrajo
1.1145PR #995by dexhunter
1.0362PR #1004by ibarrajo
1.1182PR #1005by OnlyJundong
1.0853PR #1006by NewyorkDev
1.1085PR #1007by dillon-blake
1.2252PR #1008by monkeyKingProgrammer
1.1538PR #1033by Naazimsnh02
0.4311PR #1037by TimPietruskyRunPod
1.1179PR #1039by yufengli-oai
1.1184PR #1043by okezue
1.1261PR #1051by tejas-goyal
1.2826PR #1056by sofiabod
0.0180PR #1062by yaowubarbara
1.4508PR #1066by adityakm24
1.1259PR #1069by manfromnowhere143
1.1190PR #1070by manfromnowhere143
1.1190PR #1072by vimeto
1.1170PR #1077by malc3om
1.1130PR #1081by michaelwinczuk
1.1220PR #1084by AnubhavBharadwaaj
1.1185PR #1085by adityasasidhar
1.2831PR #1086by Omrigotlieb
1.1349PR #1087by Dhenenjay
1.1407PR #1089by mikeapedia
1.1086PR #1094by michaelwinczuk
0.4027PR #1098by adityakm24
1.1187PR #1099by Bortlesboat
1.1133PR #1101by amrayach
1.1290PR #1105by abaybektursun
1.2208PR #1108by DbBested
1.1502PR #1112by dillon-blake
1.2252PR #1113by gowtham0992
1.3705PR #1117by adityakm24
1.1187PR #1118by adityakm24
1.1187PR #1123by sisegod
1.1986PR #1125by jainpranjal97
1.1946PR #1126by AnirudhRahul
1.1091PR #1127by dentity007
1.1311PR #1128by AnubhavBharadwaaj
1.1154PR #1129by EthanYangTW
1.1174PR #1130by Gusanidas
1.1140PR #1144by inFaaa
1.3572PR #1148by aamodbhatt
1.1179PR #1150by sahiee-dev
1.1151PR #1166by Christopher-Lee-McClendon
1.1347PR #1170by Christopher-Lee-McClendon
1.1199PR #1171by EthanYangTW
1.1145PR #1182by adityakm24
1.1227PR #1184by icryo
0.9485PR #1185by skoustav35
0.9641PR #1209by andrewbaggio1
1.1064PR #1216by SoHarshh
1.1574PR #1221by amabito
1.1915PR #1228by meinlebenswerk
1.1527PR #1230by nestamidavaine
1.1163PR #1231by nestamidavaine
1.1163PR #1236by ibarrajo
1.1179PR #1237by ibarrajo
1.1198PR #1240by andrewbaggio1
1.1064PR #1244by monkeyKingProgrammer
1.1443PR #1246by deborahnelson8788726
0.9650PR #1247by fahmitech
1.2208PR #1252by ahmetdenizyilmaz
1.0713PR #1269by Jtss-ux
1.1194PR #1276by BiggerDABOSS
1.1100PR #1278by GitGeeks
1.1147PR #1284by tyrel-beede
1.1207PR #1289by MatoTeziTanka
1.0819PR #1296by aryanbhosale
1.0926PR #1298by Omrigotlieb
1.1043PR #1303by anthony-maio
0.9462PR #1311by htrung1105
1.1303PR #1313by anthony-maio
0.8637PR #1318by renqianluo
1.0095PR #1321by anthony-maio
0.7406PR #1324by yahya010
0.8275PR #1328by renqianluo
0.6361PR #1329by renqianluo
0.6361PR #1335by WeijieChen2017
1.1948PR #1361by jorge-asenjo
1.1220PR #1366by yunoshev
1.1371PR #1368by JKSNS
0.8503PR #1376by stukenov
0.7094PR #1378by Rajat123456789
1.1711PR #1386by Buld1n
1.1452PR #1389by Rome-1
1.7270PR #1399by AnubhavBharadwaaj
1.0898PR #1405by anthony-maio
1.0856PR #1408by aamodbhatt
1.0800PR #1413by dexhunterRECORD
1.0828PR #1414by Abhishek8108
0.7093PR #1427by kjahan
1.2092PR #1435by AbhayAnandUCSD
1.0980PR #1437by dexhunter
1.0780PR #1440by Mertyandimata
1.1026PR #1444by hypnoastic
1.3081PR #1446by LauraGomezjurado
1.0960PR #1450by andrewbaggio1
1.0848PR #1452by bsisduck
0.3509PR #1454by bsisduck
0.3509PR #1456by sisegod
1.1465PR #1457by DilpreetBansi
1.1454PR #1467by PhamPhuHoa-23
1.1056PR #1472by trhgbao
1.2066PR #1473by AVINASH0052
1.1156PR #1492by bigbag
1.0810PR #1493by bigbagRECORD
1.0810PR #1499by dippatel1994
1.6323PR #1512by Itssshikhar
1.1117PR #1514by dexhunter
1.0798PR #1515by dexhunter
1.0872PR #1520by taka6745
1.0824PR #1528by xiehuanyi
1.1104PR #1536by dexhunter
1.0775PR #1538by davie2009kh
1.1180PR #1539by translatingthename
1.0587PR #1541by bigbag
1.0778PR #1546by SPThole
1.0850PR #1548by dljr-github
1.3220PR #1549by dljr-github
1.3220PR #1550by translatingthename
1.0587PR #1555by andrewbaggio1
1.0764PR #1559by adityasasidhar
1.2498PR #1568by yuitokyouni
1.1639PR #1573by shivangbaveja
1.1464PR #1583by codemath3000
1.0801PR #1584by codemath3000
1.0752PR #1585by codemath3000
1.0639PR #1586by dexhunter
1.0749PR #1600by sayujshah
1.2781PR #1602by SPThole
1.0744PR #1612by seekerPrice
1.5096PR #1616by Vickyrrrrrr
1.4100PR #1617by adityasasidhar
1.2192PR #1619by AVINASH0052
1.1156PR #1621by mrbese
1.1531PR #1628by yu314-coder
1.1921PR #1630by KevinChunye
1.1412PR #1639by kunwar-vikrant
1.0832PR #1646by sergeevii123
1.0909PR #1658by AVINASH0052
1.0810PR #1661by anderamondarainh-stack
1.1444PR #1666by mrbese
1.1531PR #1667by MarioPaerle
1.0714PR #1670by dexhunter
1.0597PR #1672by andrewbaggio1
1.0119PR #1676by aazizyan
1.0788PR #1683by yunoshev
1.1280PR #1688by Buld1n
1.0809PR #1689by chris-colinsky
1.0822PR #1693by dexhunter
1.0573PR #1696by kings-crown
1.1224PR #1714by Anakintano
1.0857PR #1715by G3sparky
1.0809PR #1716by himanshudongre
1.0788PR #1720by kiyoaki
1.0818PR #1722by deborahnelson8788726
0.6580PR #1724by Unwindology
1.1803PR #1728by mikeapedia
1.0771PR #1731by Victory963
1.0785PR #1737by sakthivarshans
1.0723PR #1747by swapp1990
1.0820PR #1755by OE-GOD
1.0746PR #1759by yijieyuan
1.0799Hyperparameters Across PRs
| pr_number | parameters |
|---|---|
| 64 | {"dimensions":16,"total_dimensions":64} |
| 175 | {"train_length":null,"eval_length":null} |
| 218 | {"dimensions":16,"total_dimensions":64} |
| 315 | {"dimensions":16,"total_dimensions":64} |
| 327 | {"fraction":0.5} |
| 330 | {"dimensions":"16/64"} |
| 332 | {"dimensions":16} |
| 334 | {"dimensions":16,"total_head_dims":64} |
| 344 | {"dimensions":"16/64"} |
| 351 | {"dimensions":16,"total_dimensions":64} |
| 352 | {"dimensions":16,"total_dimensions":64} |
| 356 | {"dimensions":16,"total_dimensions":64} |
| 371 | {"dimensions":16} |
| 374 | {"dimensions":16,"total_dimensions":64} |
| 376 | {"rope_dims":16,"total_dims":64,"base":50000} |
| 383 | {"dimensions":16,"base_dimensions":64} |
| 388 | {"dimensions":16,"total_dimensions":64} |
| 389 | {"dimensions":16,"total_head_dims":64} |
| 397 | {"dimensions":16} |
| 398 | {"dimensions":16} |
| 399 | {"dimensions":16} |
| 400 | {"dimensions":16} |
| 401 | {"dimensions":16,"total_dimensions":64} |
| 410 | {"dimensions":"16/64"} |
| 413 | — |
| 414 | {"dimensions":"16/64"} |
| 415 | {"train_length":16,"eval_length":64} |
| 417 | {"train_fraction":16,"total_fraction":64} |
| 418 | {"dimensions":16,"total_dimensions":64} |
| 434 | {"head_dims_rotary":16,"head_dims_total":64,"fraction":0.25} |
| 445 | {"16/64":true} |
| 452 | {"dimensions":"16/64"} |
| 453 | {"dimensions":16,"total_dimensions":64} |
| 455 | {"dimensions":16,"base_dimensions":64} |
| 458 | {"dimensions":"16/64"} |
| 461 | {"dimensions":16,"total_dimensions":64} |
| 462 | {"dimensions":16} |
| 469 | {"dimensions":"16/64"} |
| 473 | {"dimensions":16,"base":64} |
| 477 | {"dimensions":16,"total_dimensions":64} |
| 478 | {"dimensions":16,"total_dimensions":64} |
| 481 | {"dimensions":"16/64"} |
| 482 | {"dimensions":16,"total_dimensions":64} |
| 485 | {"dimensions":16,"total_dimensions":64} |
| 486 | {"dimensions":"16/64"} |
| 487 | {"dimensions":16} |
| 489 | {"rotary_dims":16,"total_dims":64} |
| 492 | {"head_dims":"16/64"} |
| 493 | {"dims_used":16,"total_dims":64} |
| 498 | {"rope_dims":16,"total_dims":64} |
| 499 | {"rope_dims":16,"total_dims":64} |
| 503 | {"dimensions":"16/64"} |
| 505 | {"dimensions":16} |
| 507 | {"percentage":25} |
| 508 | {"dimensions":16,"base_dimensions":64} |
| 509 | {"dimensions":16} |
| 516 | {"dimensions":"16/64"} |
| 518 | {"dimensions":16,"total_dimensions":64} |
| 526 | {"dimensions":16,"total_dimensions":64} |
| 528 | {"dimensions":"16/64"} |
| 529 | {"dimensions":"16/64"} |
| 532 | {"dimensions":"16/64"} |
| 533 | {"numerator":16,"denominator":64} |
| 534 | {"dimensions":16,"total_dimensions":64} |
| 535 | {"dimensions":"16/64"} |
| 537 | {"dimensions":"16/64"} |
| 543 | {"rotary_dims":16,"total_dims":64,"position_free_ratio":0.75} |
| 545 | {"train_length":null,"eval_length":null} |
| 549 | {"dimensions":16} |
| 564 | {"dimensions":16,"total_dimensions":64} |
| 573 | {"dimensions":16,"total_head_dims":64} |
| 576 | {"ratio":"16/64"} |
| 577 | {"scaling":"16/64"} |
| 585 | {"ratio":"16/64"} |
| 586 | {"dimensions":16} |
| 592 | {"dimensions":16} |
| 593 | {"dimensions":16,"total_dimensions":64} |
| 598 | {"dims":16,"total_dims":64,"train_seq_len":1024} |
| 601 | {"dimensions":16} |
| 606 | {"partial_rope":"16/64"} |
| 609 | {"partial_rope":"16/64"} |
| 612 | {"dims":"16/64","train_seq":2048} |
| 634 | {"dimensions":"16/64"} |
| 635 | {"dimensions":"16/64"} |
| 638 | {"train_dims":16,"total_dims":64} |
| 642 | {"dimensions":"16/64"} |
| 644 | {"dims":"16/64","train_seq":2048} |
| 645 | — |
| 653 | {"dims":"16/64"} |
| 657 | {"dimensions":"16/64"} |
| 661 | {"dimensions":16} |
| 668 | {"dimensions":16} |
| 672 | {"dimensions":16} |
| 682 | {"dimensions":16} |
| 685 | — |
| 688 | {"dimensions":"16/64"} |
| 690 | {"train_length":16,"eval_length":64} |
| 692 | {"train_length":16,"eval_length":64} |
| 693 | {"dimensions":"16/64"} |
| 695 | {"dimensions":"16/64"} |
| 698 | {"dimensions":16} |
| 703 | {"dimensions":16} |
| 710 | {"dimensions":16,"total_dimensions":64} |
| 714 | {"dimensions":16,"total_dimensions":64} |
| 715 | {"dimensions":"16/64"} |
| 720 | {"dimensions":16,"total_dimensions":64} |
| 726 | {"dimensions":"16/64"} |
| 727 | {"dimensions":"16/64"} |
| 728 | {"dimensions":16,"base_dimensions":64} |
| 734 | {"dimensions":"16/64"} |
| 740 | {"percentage":25} |
| 741 | — |
| 752 | {"dimensions":16,"total_dimensions":64} |
| 754 | {"dimensions":"16/64"} |
| 761 | {"dimensions":16,"total_dimensions":64} |
| 768 | {"dimensions":[16,64]} |
| 770 | {"train_length":null,"eval_length":null} |
| 774 | {"dimensions":16} |
| 778 | {"train_or_eval":null,"dimensions":"16/64"} |
| 779 | {"dimensions":"16/64"} |
| 786 | {"dimensions":16} |
| 794 | {"dimensions":"16/64"} |
| 796 | {"rope_dims":16,"total_dims":64} |
| 802 | {"fraction":"16/64"} |
| 808 | {"dimensions":16} |
| 809 | {"dims":"16/64"} |
| 816 | {"dimensions":"16/64"} |
| 826 | {"dims":"16/64"} |
| 827 | {"dimensions":16,"total_dimensions":64} |
| 828 | {"dimensions":"16/64"} |
| 832 | {"dimensions":16} |
| 836 | {"dimensions":"16/64"} |
| 838 | {"dimensions":"16/64"} |
| 841 | — |
| 849 | {"dimensions":"16/64"} |
| 857 | {"train":16,"total":64} |
| 864 | {"dimensions":"16/64"} |
| 865 | {"dimensions":"16/64"} |
| 871 | {"train":16,"total":64} |
| 872 | {"dimensions":16,"total_dimensions":64} |
| 876 | — |
| 887 | {"train_length":16,"eval_length":64} |
| 889 | {"train":16,"eval":64} |
| 890 | {"dimensions":"16/64"} |
| 891 | {"dimensions":"16/64"} |
| 892 | {"dimensions":"16/64"} |
| 893 | {"16/64":true} |
| 896 | — |
| 908 | {"dimensions":16} |
| 909 | {"dimensions":"16/64"} |
| 912 | — |
| 915 | {"dimensions":"16/64"} |
| 916 | {"ratio":"16/64"} |
| 918 | {"dimensions":16} |
| 921 | {"dimensions":64} |
| 922 | {"train_eval_ratio":"16/64"} |
| 926 | — |
| 932 | {"train_length":64,"eval_length":16} |
| 937 | {"dimensions":32} |
| 941 | {"dimensions":16,"total_dimensions":64} |
| 945 | {"dimensions":16} |
| 952 | {"dimensions":"16/64"} |
| 953 | {"dimensions":"16/64"} |
| 961 | {"numerator":16,"denominator":64} |
| 963 | {"dimensions":"16/64"} |
| 964 | {"train":"16/64"} |
| 967 | {"dimensions":"16/64"} |
| 974 | — |
| 975 | {"dimensions":16,"base_dimensions":64} |
| 986 | {"fraction":"16/64"} |
| 991 | {"dimensions":16} |
| 995 | — |
| 1004 | {"dimensions":16} |
| 1005 | {"dimensions":16,"total_dimensions":64} |
| 1006 | {"dimensions":16} |
| 1007 | {"dimensions":16,"total_dimensions":64} |
| 1008 | {"dimensions":"16/64"} |
| 1033 | {"dimensions":16} |
| 1037 | {"dimensions":16} |
| 1039 | {"dimensions":16} |
| 1043 | {"dimensions":16,"total_dimensions":64} |
| 1051 | — |
| 1056 | {"dimensions":"16/64"} |
| 1062 | {"range":"16/64"} |
| 1066 | {"dimensions":16} |
| 1069 | {"partial":"16/64"} |
| 1070 | {"head_dims":16,"total_head_dims":64} |
| 1072 | {"dimensions":"16/64"} |
| 1077 | {"rope_dims":16,"total_dims":64} |
| 1081 | — |
| 1084 | {"dimensions":16} |
| 1085 | {"dimensions":16} |
| 1086 | {"rotated_dims":16,"total_dims":64} |
| 1087 | {"dimensions":16,"total_dimensions":64} |
| 1089 | {"dimensions":16} |
| 1094 | — |
| 1098 | {"rope_dims":16} |
| 1099 | {"dimensions":16} |
| 1101 | {"rotated_dims":16,"total_dims":64} |
| 1105 | {"partial":"16/64"} |
| 1108 | {"dimensions":16} |
| 1112 | {"dimensions":16,"total_dimensions":64} |
| 1113 | {"dimensions":"16/64"} |
| 1117 | {"rope_dims":16} |
| 1118 | {"dimensions":16} |
| 1123 | {"dimensions":16} |
| 1125 | {"dimensions":"16/64"} |
| 1126 | {"dimensions":16,"total_dimensions":64} |
| 1127 | {"dimensions":"16/64"} |
| 1128 | {"dimensions":16} |
| 1129 | {"train_fraction":16,"total_fraction":64} |
| 1130 | {"dimensions":"16/64"} |
| 1144 | {"dimensions":16,"total_dimensions":64,"fraction":0.25} |
| 1148 | {"dimensions":16} |
| 1150 | {"rope_dims":16} |
| 1166 | {"dimensions":16} |
| 1170 | {"dimensions":16,"base":10000} |
| 1171 | {"fraction":"16/64"} |
| 1182 | {"dimensions":16} |
| 1184 | {"dimensions":16} |
| 1185 | {"dimensions":"16/64"} |
| 1209 | {"dimensions":16} |
| 1216 | {"dimensions":"16/64"} |
| 1221 | {"dimensions":16} |
| 1228 | — |
| 1230 | {"dimensions":16} |
| 1231 | {"dimensions":"16/64"} |
| 1236 | {"dimensions":16} |
| 1237 | {"dimensions":"16/64"} |
| 1240 | {"dimensions":16} |
| 1244 | {"rope_dims":16,"total_dims":64} |
| 1246 | {"dimensions":16,"total_dimensions":96} |
| 1247 | {"dimensions":16} |
| 1252 | — |
| 1269 | {"dimensions":16,"total_dimensions":64} |
| 1276 | {"dimensions":16} |
| 1278 | — |
| 1284 | {"dims":"16/64"} |
| 1289 | {"dimensions":16} |
| 1296 | {"dimensions":16} |
| 1298 | {"dimensions":16} |
| 1303 | {"train_fraction":16,"total_fraction":64} |
| 1311 | {"dimensions":16,"total_dimensions":64} |
| 1313 | {"train_eval_ratio":"16/64"} |
| 1318 | {"dimensions":16} |
| 1321 | {"partial":"16/64"} |
| 1324 | {"train":16,"eval":64} |
| 1328 | {"dimensions":16} |
| 1329 | {"dimensions":16} |
| 1335 | {"dimensions":"16/64"} |
| 1361 | {"rotary_dims":16,"head_dims":64} |
| 1366 | {"percent":25} |
| 1368 | {"dimensions":16,"total_dimensions":64} |
| 1376 | {"partial":"16/64"} |
| 1378 | {"dimensions":16,"total_dimensions":64} |
| 1386 | {"dimensions":16} |
| 1389 | {"dimensions":"16/64"} |
| 1399 | {"dimensions":16} |
| 1405 | {"numerator":16,"denominator":64} |
| 1408 | {"dimensions":16} |
| 1413 | {"dimensions":16} |
| 1414 | {"dimensions":16,"base":64} |
| 1427 | {"dimensions":16,"head_dimensions":64} |
| 1435 | {"dims":"16/64"} |
| 1437 | {"dimensions":16,"total_dimensions":64} |
| 1440 | {"dimensions":16} |
| 1444 | {"ratio":"16/64"} |
| 1446 | {"dims":"16/64"} |
| 1450 | {"dimensions":16,"total_dimensions":64} |
| 1452 | {"dimensions":"16/64"} |
| 1454 | {"dimensions":16,"total_dimensions":64} |
| 1456 | {"rope_dims":16,"head_dims":64} |
| 1457 | {"dimensions":16} |
| 1467 | {"dimensions":16,"base_dimensions":64} |
| 1472 | {"dimensions":16,"total_dimensions":64} |
| 1473 | {"dimensions":16,"total_dimensions":64} |
| 1492 | {"dimensions":"16/64"} |
| 1493 | {"dimensions":"16/64"} |
| 1499 | {"dimensions":16} |
| 1512 | {"offset":1} |
| 1514 | {"dimensions":16} |
| 1515 | {"dimensions":"16/64"} |
| 1520 | {"dimensions":16,"base_dimensions":64} |
| 1528 | {"dimensions":16,"denominator":64} |
| 1536 | {"dimensions":16,"base_dimensions":64} |
| 1538 | {"dimensions":16,"total_dimensions":64} |
| 1539 | {"dimensions":16,"total_dimensions":64} |
| 1541 | {"dimensions":"16/64"} |
| 1546 | {"dimensions":"16/64"} |
| 1548 | {"dimensions":16} |
| 1549 | {"dimensions":16} |
| 1550 | {"dimensions":"16/64"} |
| 1555 | {"dimensions":"16/64"} |
| 1559 | {"dimensions":32} |
| 1568 | {"dimensions":16} |
| 1573 | {"dimensions":16,"total_dimensions":64} |
| 1583 | {"dimensions":16,"total_dimensions":64} |
| 1584 | {"dimensions":"16/64"} |
| 1585 | {"partial_ratio":"16/64"} |
| 1586 | {"dimensions":"16/64"} |
| 1600 | {"dimensions":16} |
| 1602 | {"rope_dims":16,"head_dims":64} |
| 1612 | {"dimensions":16} |
| 1616 | {"dimensions":16,"total_dimensions":64} |
| 1617 | {"dimensions":32} |
| 1619 | {"head_dims":16,"total_head_dims":64} |
| 1621 | {"dimensions":16} |
| 1628 | {"dimensions":16,"total_dimensions":64} |
| 1630 | {"dimensions":16,"total_dimensions":64} |
| 1639 | {"layers":"16/64"} |
| 1646 | {"ratio":"16/64"} |
| 1658 | {"dimensions":16,"total_dimensions":64} |
| 1661 | {"dimensions":16,"total_dimensions":64} |
| 1666 | {"dimensions":16} |
| 1667 | {"dimensions":"16/64"} |
| 1670 | {"dimensions":"16/64"} |
| 1672 | {"dimensions":"16/64"} |
| 1676 | {"dimensions":16,"total_dimensions":64} |
| 1683 | {"ratio":0.25} |
| 1688 | {"dimensions":"16/64"} |
| 1689 | {"dimensions":16,"total_dimensions":64} |
| 1693 | {"dimensions":16,"total_dimensions":64} |
| 1696 | {"dimensions":"16/64"} |
| 1714 | {"dimensions":16} |
| 1715 | {"dimensions":16,"total_dimensions":64} |
| 1716 | {"dimensions":"16/64"} |
| 1720 | {"dimensions":"16/64"} |
| 1722 | {"dimensions":"16/64"} |
| 1724 | — |
| 1728 | {"layers":[4,9,10],"rope_dims":16,"head_dim":64} |
| 1731 | {"dimensions":16} |
| 1737 | {"dimensions":16,"total_dimensions":64} |
| 1747 | {"dimensions":16,"head_dim":64} |
| 1755 | {"dimensions":16,"total_dimensions":64} |
| 1759 | {"dimensions":16,"total_dimensions":64} |