<?xml version="1.0" encoding="UTF-8"?>
<!-- This sitemap was dynamically generated on April 13, 2026 at 4:59 am by All in One SEO v4.9.5.1 - the original SEO plugin for WordPress. -->

<?xml-stylesheet type="text/xsl" href="https://logicnest.cc/default-sitemap.xsl"?>

<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
	<channel>
		<title>Logic Nest</title>
		<link><![CDATA[https://logicnest.cc]]></link>
		<description><![CDATA[Logic Nest]]></description>
		<lastBuildDate><![CDATA[Sun, 12 Apr 2026 05:42:54 +0000]]></lastBuildDate>
		<docs>https://validator.w3.org/feed/docs/rss2.html</docs>
		<atom:link href="https://logicnest.cc/sitemap.rss" rel="self" type="application/rss+xml" />
		<ttl><![CDATA[60]]></ttl>

		<item>
			<guid><![CDATA[https://logicnest.cc/can-self-supervised-vision-transformers-match-supervised-reasoning/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-self-supervised-vision-transformers-match-supervised-reasoning/]]></link>
			<title>Can Self-Supervised Vision Transformers Match Supervised Reasoning?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:42:54 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-limitations-of-visual-intelligence-on-small-datasets/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-limitations-of-visual-intelligence-on-small-datasets/]]></link>
			<title>Understanding the Limitations of Visual Intelligence on Small Datasets</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:42:06 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-impact-of-positional-encoding-on-vision-transformer-performance/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-impact-of-positional-encoding-on-vision-transformer-performance/]]></link>
			<title>The Impact of Positional Encoding on Vision Transformer Performance</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:41:33 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-large-vision-transformers-learn-better-global-features/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-large-vision-transformers-learn-better-global-features/]]></link>
			<title>Why Do Large Vision Transformers Learn Better Global Features?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:40:49 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-hybrid-cnn-transformer-architectures-win-again/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-hybrid-cnn-transformer-architectures-win-again/]]></link>
			<title>Can Hybrid CNN-Transformer Architectures Win Again?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:40:21 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-deit-distills-knowledge-from-convolutional-neural-networks-cnns/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-deit-distills-knowledge-from-convolutional-neural-networks-cnns/]]></link>
			<title>How DEIT Distills Knowledge from Convolutional Neural Networks (CNNs)</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:39:44 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-use-of-shifted-windows-in-swin-transformer/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-use-of-shifted-windows-in-swin-transformer/]]></link>
			<title>Understanding the Use of Shifted Windows in Swin Transformer</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:39:14 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/what-makes-the-vit-scale-better-with-data/]]></guid>
			<link><![CDATA[https://logicnest.cc/what-makes-the-vit-scale-better-with-data/]]></link>
			<title>What Makes the VIT Scale Better with Data</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:38:42 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-patch-embeddings-and-their-inductive-bias-in-machine-learning/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-patch-embeddings-and-their-inductive-bias-in-machine-learning/]]></link>
			<title>Understanding Patch Embeddings and Their Inductive Bias in Machine Learning</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:38:09 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-how-patch-embeddings-provide-inductive-bias-in-machine-learning/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-how-patch-embeddings-provide-inductive-bias-in-machine-learning/]]></link>
			<title>Understanding How Patch Embeddings Provide Inductive Bias in Machine Learning</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:37:24 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-vision-transformers-generalize-better-than-cnns-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-vision-transformers-generalize-better-than-cnns-2/]]></link>
			<title>Why Vision Transformers Generalize Better than CNNs</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:32:18 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-vision-transformers-generalize-better-than-cnns/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-vision-transformers-generalize-better-than-cnns/]]></link>
			<title>Why Do Vision Transformers Generalize Better Than CNNs?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:31:25 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-impact-of-tokenization-on-scaling-laws/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-impact-of-tokenization-on-scaling-laws/]]></link>
			<title>The Impact of Tokenization on Scaling Laws</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:30:14 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-deduplication-improves-downstream-tasks/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-deduplication-improves-downstream-tasks/]]></link>
			<title>Why Deduplication Improves Downstream Tasks</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:29:19 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-curated-data-beat-web-scale-pre-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-curated-data-beat-web-scale-pre-training/]]></link>
			<title>Can Curated Data Beat Web-Scale Pre-Training?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:28:34 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-pre-training-diversity-creates-intelligence/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-pre-training-diversity-creates-intelligence/]]></link>
			<title>How Pre-Training Diversity Creates Intelligence</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:28:04 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-shifts-in-chinchilla-optimal-ratios-in-2026/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-shifts-in-chinchilla-optimal-ratios-in-2026/]]></link>
			<title>Understanding the Shifts in Chinchilla-Optimal Ratios in 2026</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:26:29 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-to-filter-data-to-avoid-collapse/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-to-filter-data-to-avoid-collapse/]]></link>
			<title>How to Filter Data to Avoid Collapse</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:25:59 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-model-collapse-on-synthetic-data/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-model-collapse-on-synthetic-data/]]></link>
			<title>Understanding Model Collapse on Synthetic Data</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:25:10 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-synthetic-data-bend-scaling-curves-upward/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-synthetic-data-bend-scaling-curves-upward/]]></link>
			<title>Can Synthetic Data Bend Scaling Curves Upward?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:24:41 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-data-diversity-drives-scaling-exponents-unlocking-potential-through-varied-data-sets/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-data-diversity-drives-scaling-exponents-unlocking-potential-through-varied-data-sets/]]></link>
			<title>How Data Diversity Drives Scaling Exponents: Unlocking Potential Through Varied Data Sets</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:24:12 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-significance-of-locality-hashing-in-reformer-models/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-significance-of-locality-hashing-in-reformer-models/]]></link>
			<title>The Significance of Locality Hashing in Reformer Models</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:23:28 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-does-performer-kernel-approximate-attention/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-does-performer-kernel-approximate-attention/]]></link>
			<title>How Does Performer Kernel Approximate Attention?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:22:56 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-sparse-attention-recover-full-performance/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-sparse-attention-recover-full-performance/]]></link>
			<title>Can Sparse Attention Recover Full Performance?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:22:17 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-grouped-query-trades-quality-for-speed/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-grouped-query-trades-quality-for-speed/]]></link>
			<title>Why Grouped-Query Trades Quality for Speed</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:21:34 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/exploring-the-impact-of-multi-query-attention-on-intelligence/]]></guid>
			<link><![CDATA[https://logicnest.cc/exploring-the-impact-of-multi-query-attention-on-intelligence/]]></link>
			<title>Exploring the Impact of Multi-Query Attention on Intelligence</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:21:03 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-why-larger-models-develop-interpretable-heads/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-why-larger-models-develop-interpretable-heads/]]></link>
			<title>Understanding Why Larger Models Develop Interpretable Heads</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:20:35 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-we-surgically-edit-heads-to-boost-reasoning/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-we-surgically-edit-heads-to-boost-reasoning/]]></link>
			<title>Can We Surgically Edit Heads to Boost Reasoning?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:20:08 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-attention-specialization-across-heads-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-attention-specialization-across-heads-2/]]></link>
			<title>Understanding Attention Specialization Across Heads</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:19:41 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-attention-specialization-across-heads/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-attention-specialization-across-heads/]]></link>
			<title>Understanding Attention Specialization Across Heads</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:19:13 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-emergence-of-induction-heads-in-pre-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-emergence-of-induction-heads-in-pre-training/]]></link>
			<title>Understanding the Emergence of Induction Heads in Pre-Training</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:18:41 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-grokking-relates-to-circuit-discovery/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-grokking-relates-to-circuit-discovery/]]></link>
			<title>How Grokking Relates to Circuit Discovery</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:18:10 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-role-of-replay-in-grokking-a-comprehensive-guide/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-role-of-replay-in-grokking-a-comprehensive-guide/]]></link>
			<title>Understanding the Role of Replay in Grokking: A Comprehensive Guide</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:17:38 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-grokking-predict-emergent-reasoning-ability/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-grokking-predict-emergent-reasoning-ability/]]></link>
			<title>Can Grokking Predict Emergent Reasoning Ability?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:17:07 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-impact-of-batch-size-on-grokking-dynamics/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-impact-of-batch-size-on-grokking-dynamics/]]></link>
			<title>The Impact of Batch Size on Grokking Dynamics</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:15:23 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-rarity-of-grokking-in-natural-language-data/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-rarity-of-grokking-in-natural-language-data/]]></link>
			<title>Understanding the Rarity of Grokking in Natural Language Data</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:14:33 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-weight-decay-speed-grokking-convergence/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-weight-decay-speed-grokking-convergence/]]></link>
			<title>Can Weight Decay Speed Grokking Convergence?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:14:02 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-phase-transitions-in-grokking-triggers-and-mechanisms/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-phase-transitions-in-grokking-triggers-and-mechanisms/]]></link>
			<title>Understanding Phase Transitions in Grokking: Triggers and Mechanisms</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:13:32 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-curriculum-learning-accelerates-grokking/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-curriculum-learning-accelerates-grokking/]]></link>
			<title>How Curriculum Learning Accelerates Grokking</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:12:07 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-necessity-of-extended-training-in-grokking-algorithms/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-necessity-of-extended-training-in-grokking-algorithms/]]></link>
			<title>The Necessity of Extended Training in Grokking Algorithms</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:11:30 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-wide-nets-show-weaker-double-descent/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-wide-nets-show-weaker-double-descent/]]></link>
			<title>Why Do Wide Nets Show Weaker Double Descent?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:11:01 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-ntk-predict-double-descent-in-transformers/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-ntk-predict-double-descent-in-transformers/]]></link>
			<title>Can NTK Predict Double Descent in Transformers?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:10:20 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-feature-learning-in-relation-to-late-descent/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-feature-learning-in-relation-to-late-descent/]]></link>
			<title>Understanding Feature Learning in Relation to Late Descent</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:09:42 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-interpolation/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-interpolation/]]></link>
			<title>Understanding the Drop in Test Error After Interpolation</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 05:09:01 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-why-adversarial-examples-exploit-sharp-minima/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-why-adversarial-examples-exploit-sharp-minima/]]></link>
			<title>Understanding Why Adversarial Examples Exploit Sharp Minima</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 04:53:06 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-flatter-minima-resist-distribution-shifts/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-flatter-minima-resist-distribution-shifts/]]></link>
			<title>Can Flatter Minima Resist Distribution Shifts?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 04:52:34 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-flat-minima-hypothesis-and-its-role-in-generalization-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-flat-minima-hypothesis-and-its-role-in-generalization-2/]]></link>
			<title>Understanding Flat-Minima Hypothesis and Its Role in Generalization</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 04:52:00 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-group-normalization-helps-with-small-batch-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-group-normalization-helps-with-small-batch-training/]]></link>
			<title>How Group Normalization Helps with Small-Batch Training</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 04:46:12 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-causes-of-gradient-vanishing-in-plain-networks-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-causes-of-gradient-vanishing-in-plain-networks-2/]]></link>
			<title>Understanding the Causes of Gradient Vanishing in Plain Networks</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 04:44:49 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/]]></guid>
			<link><![CDATA[https://logicnest.cc/]]></link>
			<title>Home</title>
			<pubDate><![CDATA[Fri, 13 Feb 2026 16:51:24 +0000]]></pubDate>
		</item>
				</channel>
</rss>
