mixed int6/int8

pr_number	bits	scope
39	6	middle layers 3-6 int6; first/last 3 layers int8
70	6	int6 per-row on MLP and attention projection weights; int8 per-row on embeddings and other tensors
78	6	weights int6, embeddings int8
92	6	weights and embeddings
99	6	.mlp., .attn.c_q., .attn.c_v., .attn.proj. in int6; .attn.c_k. mostly grouped int8; selected late-layer c_k and tok_emb in fp16
104	6	all block matrices
120	—	transformer blocks and embeddings
131	6	transformer block weights; embeddings use int8
160	6	most tensors, with int8 token embedding
164	6	MLP and attention int6; embeddings and bigram int8; controls fp32
176	8	all weights by default, with middle blocks 3,4,5,6 forced to int6; embeddings and LM head kept fp16
198	6	MLP and attention int6; embeddings int8
222	6	layers 2-8 int6; layers 0/1/9 int8 per-row; embeddings fp16
223	—	MLP+Attn int6, embeddings int8
236	6	attention + MLP weights; int8 tok_emb
254	6	MLP+attention; embeddings int8; tied embeddings fp16
274	6	MLP, attention, tied embeddings
281	6	MLP+attention; embeddings int8
287	6	MLP and attention int6, embeddings int8
309	—	boundary layers int8, middle layers int6, tied embeddings fp16, control tensors fp32
312	6	MLP and attention int6; other large tensors int8
315	6	MLP and attention int6; embeddings int8
388	6	MLP and attention weights int6 per-row; embeddings int8 per-row
453	6	MLP and attention int6; embeddings int8
492	—	—
502	6	attn, mlp
534	6	layers 1-9 int6, layers 0 and 10 int8
570	—	all
598	—	int6 per-row for attention projections and MLP weights; int8 per-tensor for layer norms, value embeddings, biases, embedding tables
803	6	model weights
820	6	model weights
898	6	embeddings
958	—	all
1042	—	embeddings
1046	6	MLP and attention weights int6; embeddings and Markov table int8; control tensors fp16
1065	6	block weights and embeddings
1080	6	MLP and attention; embeddings on int8 path
1085	6	MLP and attention projections, plus smaller tensors in int8/fp16
1086	—	embeddings, MLP, attention
1101	6	MLP+attn int6; embeddings+other int8
1142	6	auxiliary embeddings + main trunk int8
1166	—	per-row weights
1204	—	model weights
1205	—	weights and embeddings
1389	—	shared layers int8, others int6
1474	6	most tensors with selected sensitive tensors promoted to int8
1487	—	weights and embeddings
1495	6	most tensors with selected sensitive tensors promoted to int8
1517	—	all
1559	—	model weights
1617	6	attention/MLP layers
1635	6	model weights
1647	6	model weights
1649	—	MLP+attention int6, embeddings int8
1697	—	attention and MLP matrices, embeddings
1698	6	matrices and embeddings
1716	6	matrices, embeddings, control tensors, small 2-D matrices
1720	—	int6 for attention and MLP, int8 for embeddings
1737	—	attention/MLP int6, embeddings int8
1754	—	weights and embeddings
1829	6	export
1889	—	all
1890	6	all weights with SSM dynamics rows at int8
1895	—	embeddings and selected attention projections
1918	—	top-1 salient 64-column group per matrix
1935	—	embeddings/body variants explored
1937	6	attn.proj.weight int6, elsewhere int8
1946	—	top-1 salient 64-column group per matrix
1978	—	model
2058	6	block weights
2106	—	weights and embeddings
2131	—	model weights