<?xml version="1.0" encoding="UTF-8"?>
<!-- This sitemap was dynamically generated on April 12, 2026 at 9:33 am by All in One SEO v4.9.5.1 - the original SEO plugin for WordPress. -->

<?xml-stylesheet type="text/xsl" href="https://logicnest.cc/default-sitemap.xsl"?>

<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
	<channel>
		<title>Logic Nest</title>
		<link><![CDATA[https://logicnest.cc]]></link>
		<description><![CDATA[Logic Nest]]></description>
		<lastBuildDate><![CDATA[Sun, 12 Apr 2026 04:02:32 +0000]]></lastBuildDate>
		<docs>https://validator.w3.org/feed/docs/rss2.html</docs>
		<atom:link href="https://logicnest.cc/sitemap.rss" rel="self" type="application/rss+xml" />
		<ttl><![CDATA[60]]></ttl>

		<item>
			<guid><![CDATA[https://logicnest.cc/can-masked-modeling-surpass-contrastive-learning-in-reasoning-tasks/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-masked-modeling-surpass-contrastive-learning-in-reasoning-tasks/]]></link>
			<title>Can Masked Modeling Surpass Contrastive Learning in Reasoning Tasks?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 04:02:32 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-data-efficient-self-supervision-in-vision/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-data-efficient-self-supervision-in-vision/]]></link>
			<title>Understanding Data-Efficient Self-Supervision in Vision</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 04:01:56 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-why-dinov2-produces-emergent-object-boundaries/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-why-dinov2-produces-emergent-object-boundaries/]]></link>
			<title>Understanding Why DINOV2 Produces Emergent Object Boundaries</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 04:01:26 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/what-makes-siglip-more-stable-than-the-original-clip/]]></guid>
			<link><![CDATA[https://logicnest.cc/what-makes-siglip-more-stable-than-the-original-clip/]]></link>
			<title>What Makes Siglip More Stable Than the Original Clip</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 04:00:48 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-beit-3-unifies-vision-language-representations/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-beit-3-unifies-vision-language-representations/]]></link>
			<title>How Beit-3 Unifies Vision-Language Representations</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 04:00:14 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-does-masked-image-modeling-learn-strong-semantics/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-does-masked-image-modeling-learn-strong-semantics/]]></link>
			<title>Why Does Masked Image Modeling Learn Strong Semantics?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:59:30 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/enhancing-long-sequence-intelligence-with-xpos/]]></guid>
			<link><![CDATA[https://logicnest.cc/enhancing-long-sequence-intelligence-with-xpos/]]></link>
			<title>Enhancing Long-Sequence Intelligence with XPOS</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:58:55 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-positional-interpolation-extend-context-without-fine-tuning/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-positional-interpolation-extend-context-without-fine-tuning/]]></link>
			<title>Can Positional Interpolation Extend Context Without Fine-Tuning?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:58:12 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-relative-positional-encodings-outperform-learned-positional-encodings/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-relative-positional-encodings-outperform-learned-positional-encodings/]]></link>
			<title>Why Relative Positional Encodings Outperform Learned Positional Encodings</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:57:35 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/what-makes-alibi-better-for-extrapolation-than-rope/]]></guid>
			<link><![CDATA[https://logicnest.cc/what-makes-alibi-better-for-extrapolation-than-rope/]]></link>
			<title>What Makes Alibi Better for Extrapolation Than Rope</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:56:18 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-benefits-of-duplicate-token-heads-in-copying/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-benefits-of-duplicate-token-heads-in-copying/]]></link>
			<title>Understanding the Benefits of Duplicate Token Heads in Copying</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:55:26 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-we-edit-induction-heads-to-enhance-reasoning/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-we-edit-induction-heads-to-enhance-reasoning/]]></link>
			<title>Can We Edit Induction Heads to Enhance Reasoning?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:54:48 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-specialization-of-attention-heads-during-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-specialization-of-attention-heads-during-training/]]></link>
			<title>Understanding the Specialization of Attention Heads During Training</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:54:14 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-modularity-of-large-transformers-circuits/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-modularity-of-large-transformers-circuits/]]></link>
			<title>Understanding the Modularity of Large Transformers Circuits</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:53:35 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-deduplication-improves-downstream-performance/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-deduplication-improves-downstream-performance/]]></link>
			<title>Why Deduplication Improves Downstream Performance</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:52:14 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-curated-data-outperform-web-scale-pre-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-curated-data-outperform-web-scale-pre-training/]]></link>
			<title>Can Curated Data Outperform Web-Scale Pre-Training?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:51:38 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-role-of-pre-training-data-diversity-in-enhancing-intelligence/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-role-of-pre-training-data-diversity-in-enhancing-intelligence/]]></link>
			<title>The Role of Pre-Training Data Diversity in Enhancing Intelligence</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:51:03 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-current-models-exceed-compute-optimal/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-current-models-exceed-compute-optimal/]]></link>
			<title>Why Current Models Exceed Compute-Optimal</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:50:24 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/what-causes-chinchilla-optimal-ratio-to-shift/]]></guid>
			<link><![CDATA[https://logicnest.cc/what-causes-chinchilla-optimal-ratio-to-shift/]]></link>
			<title>What Causes Chinchilla-Optimal Ratio to Shift</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:49:28 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-model-collapse-in-ai-generated-data-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-model-collapse-in-ai-generated-data-2/]]></link>
			<title>Understanding Model Collapse in AI-Generated Data</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:48:16 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-synthetic-data-break-current-scaling-curves/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-synthetic-data-break-current-scaling-curves/]]></link>
			<title>Can Synthetic Data Break Current Scaling Curves?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:47:25 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-impact-of-data-quality-on-scaling-exponents/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-impact-of-data-quality-on-scaling-exponents/]]></link>
			<title>Understanding the Impact of Data Quality on Scaling Exponents</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:46:31 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-performer-kernels-approach-to-attention-mechanisms/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-performer-kernels-approach-to-attention-mechanisms/]]></link>
			<title>Understanding Performer Kernel&#8217;s Approach to Attention Mechanisms</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:41:21 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-sparse-attention-recover-full-intelligence/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-sparse-attention-recover-full-intelligence/]]></link>
			<title>Can Sparse Attention Recover Full Intelligence?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:39:57 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-grouped-query-attention-and-its-quality-trade-offs/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-grouped-query-attention-and-its-quality-trade-offs/]]></link>
			<title>Understanding Grouped-Query Attention and Its Quality Trade-offs</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:39:19 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-impact-of-multi-query-attention-on-representation/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-impact-of-multi-query-attention-on-representation/]]></link>
			<title>Understanding the Impact of Multi-Query Attention on Representation</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:38:12 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-large-models-develop-more-interpretable-heads/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-large-models-develop-more-interpretable-heads/]]></link>
			<title>Why Do Large Models Develop More Interpretable Heads?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:37:16 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-we-edit-attention-heads-to-improve-reasoning/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-we-edit-attention-heads-to-improve-reasoning/]]></link>
			<title>Can We Edit Attention Heads to Improve Reasoning?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:36:35 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/what-causes-attention-patterns-to-specialize/]]></guid>
			<link><![CDATA[https://logicnest.cc/what-causes-attention-patterns-to-specialize/]]></link>
			<title>What Causes Attention Patterns to Specialize</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:35:08 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-induction-heads-formation-during-pre-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-induction-heads-formation-during-pre-training/]]></link>
			<title>Understanding Induction Heads: Formation During Pre-Training</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:33:51 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-transformers-prefer-simpler-circuits-early/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-transformers-prefer-simpler-circuits-early/]]></link>
			<title>Why Transformers Prefer Simpler Circuits Early</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:32:38 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-grokking-and-its-connection-to-circuit-formation/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-grokking-and-its-connection-to-circuit-formation/]]></link>
			<title>Understanding Grokking and Its Connection to Circuit Formation</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:31:36 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-role-of-replay-buffer-in-grokking/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-role-of-replay-buffer-in-grokking/]]></link>
			<title>The Role of Replay Buffer in Grokking</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:31:02 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-networks-learn-modular-solutions-during-grokking/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-networks-learn-modular-solutions-during-grokking/]]></link>
			<title>Why Do Networks Learn Modular Solutions During Grokking?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:30:16 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-grokking-predict-emergent-reasoning-capabilities/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-grokking-predict-emergent-reasoning-capabilities/]]></link>
			<title>Can Grokking Predict Emergent Reasoning Capabilities?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:29:34 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-impact-of-batch-size-on-grokking-dynamics/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-impact-of-batch-size-on-grokking-dynamics/]]></link>
			<title>Understanding the Impact of Batch Size on Grokking Dynamics</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:28:39 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/exploring-the-rarity-of-grokking-in-natural-datasets/]]></guid>
			<link><![CDATA[https://logicnest.cc/exploring-the-rarity-of-grokking-in-natural-datasets/]]></link>
			<title>Exploring the Rarity of Grokking in Natural Datasets</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:27:54 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-weight-decay-speed-up-grokking-convergence/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-weight-decay-speed-up-grokking-convergence/]]></link>
			<title>Can Weight Decay Speed Up Grokking Convergence?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:26:56 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-phase-transition-during-grokking/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-phase-transition-during-grokking/]]></link>
			<title>Understanding Phase Transition During Grokking</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:26:11 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/accelerating-grokking-through-curriculum-learning-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/accelerating-grokking-through-curriculum-learning-2/]]></link>
			<title>Accelerating Grokking Through Curriculum Learning</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:25:31 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-need-for-multiple-epochs-in-grokking-algorithmic-data/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-need-for-multiple-epochs-in-grokking-algorithmic-data/]]></link>
			<title>Understanding the Need for Multiple Epochs in Grokking Algorithmic Data</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:24:33 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-wide-networks-show-weaker-double-descent/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-wide-networks-show-weaker-double-descent/]]></link>
			<title>Why Do Wide Networks Show Weaker Double Descent?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:23:53 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-ntk-theory-predict-double-descent-in-transformers/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-ntk-theory-predict-double-descent-in-transformers/]]></link>
			<title>Can NTK Theory Predict Double Descent in Transformers?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:23:22 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-late-double-descent-through-feature-learning/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-late-double-descent-through-feature-learning/]]></link>
			<title>Understanding Late Double Descent Through Feature Learning</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:22:46 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-the-interpolation-point-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-the-interpolation-point-2/]]></link>
			<title>Understanding the Drop in Test Error After the Interpolation Point</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:22:20 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-the-interpolation-point/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-the-interpolation-point/]]></link>
			<title>Understanding the Drop in Test Error After the Interpolation Point</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:21:47 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-double-descent-in-modern-overparameterized-networks/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-double-descent-in-modern-overparameterized-networks/]]></link>
			<title>Understanding Double Descent in Modern Overparameterized Networks</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:20:45 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/navigating-loss-geometry-with-sam-optimizer/]]></guid>
			<link><![CDATA[https://logicnest.cc/navigating-loss-geometry-with-sam-optimizer/]]></link>
			<title>Navigating Loss Geometry with SAM Optimizer</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:19:58 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-adversarial-attacks-target-sharp-minima/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-adversarial-attacks-target-sharp-minima/]]></link>
			<title>Why Do Adversarial Attacks Target Sharp Minima?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:19:19 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/]]></guid>
			<link><![CDATA[https://logicnest.cc/]]></link>
			<title>Home</title>
			<pubDate><![CDATA[Fri, 13 Feb 2026 16:51:24 +0000]]></pubDate>
		</item>
				</channel>
</rss>
