<?xml version="1.0" encoding="UTF-8"?>
<!-- This sitemap was dynamically generated on April 29, 2026 at 4:12 pm by All in One SEO v4.9.6.2 - the original SEO plugin for WordPress. -->

<?xml-stylesheet type="text/xsl" href="https://logicnest.cc/default-sitemap.xsl"?>

<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
	<channel>
		<title>Logic Nest</title>
		<link><![CDATA[https://logicnest.cc]]></link>
		<description><![CDATA[Logic Nest]]></description>
		<lastBuildDate><![CDATA[Wed, 29 Apr 2026 03:38:51 +0000]]></lastBuildDate>
		<docs>https://validator.w3.org/feed/docs/rss2.html</docs>
		<atom:link href="https://logicnest.cc/sitemap.rss" rel="self" type="application/rss+xml" />
		<ttl><![CDATA[60]]></ttl>

		<item>
			<guid><![CDATA[https://logicnest.cc/can-self-supervised-vits-match-supervised-reasoning-quality/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-self-supervised-vits-match-supervised-reasoning-quality/]]></link>
			<title>Can Self-Supervised VITs Match Supervised Reasoning Quality?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:38:51 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-limitations-of-vision-transformers-vit-performance-on-small-datasets/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-limitations-of-vision-transformers-vit-performance-on-small-datasets/]]></link>
			<title>Understanding the Limitations of Vision Transformers (ViT) Performance on Small Datasets</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:37:25 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-impact-of-positional-encoding-on-vision-transformers-generalization/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-impact-of-positional-encoding-on-vision-transformers-generalization/]]></link>
			<title>The Impact of Positional Encoding on Vision Transformers&#8217; Generalization</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:36:55 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-why-large-vision-transformers-learn-stronger-global-features-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-why-large-vision-transformers-learn-stronger-global-features-2/]]></link>
			<title>Understanding Why Large Vision Transformers Learn Stronger Global Features</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:36:22 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-why-large-vision-transformers-learn-stronger-global-features/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-why-large-vision-transformers-learn-stronger-global-features/]]></link>
			<title>Understanding Why Large Vision Transformers Learn Stronger Global Features</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:35:48 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-hybrid-cnn-transformer-architectures-regain-dominance/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-hybrid-cnn-transformer-architectures-regain-dominance/]]></link>
			<title>Can Hybrid CNN-Transformer Architectures Regain Dominance?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:35:17 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-does-deit-distill-knowledge-from-cnn-teachers/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-does-deit-distill-knowledge-from-cnn-teachers/]]></link>
			<title>How Does DEIT Distill Knowledge from CNN Teachers?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:34:39 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-shifted-window-attention-in-swin-transformers/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-shifted-window-attention-in-swin-transformers/]]></link>
			<title>Understanding Shifted Window Attention in Swin Transformers</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:33:52 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-effectiveness-of-the-vit-scale-with-data-size/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-effectiveness-of-the-vit-scale-with-data-size/]]></link>
			<title>Understanding the Effectiveness of the Vit Scale with Data Size</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:33:24 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-inductive-bias-in-vision-transformers-through-patch-embeddings/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-inductive-bias-in-vision-transformers-through-patch-embeddings/]]></link>
			<title>Understanding Inductive Bias in Vision Transformers Through Patch Embeddings</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:32:54 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-vision-transformers-generalize-better-than-cnns-3/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-vision-transformers-generalize-better-than-cnns-3/]]></link>
			<title>Why Vision Transformers Generalize Better Than CNNs</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:32:24 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-impact-of-tokenization-choices-on-scaling-laws/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-impact-of-tokenization-choices-on-scaling-laws/]]></link>
			<title>The Impact of Tokenization Choices on Scaling Laws</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:31:51 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-impact-of-deduplication-on-downstream-task-performance/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-impact-of-deduplication-on-downstream-task-performance/]]></link>
			<title>The Impact of Deduplication on Downstream Task Performance</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:31:22 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-curated-high-quality-data-outperform-web-scale-pre-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-curated-high-quality-data-outperform-web-scale-pre-training/]]></link>
			<title>Can Curated High-Quality Data Outperform Web-Scale Pre-Training?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:30:45 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-pre-training-data-diversity-drives-emergent-intelligence/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-pre-training-data-diversity-drives-emergent-intelligence/]]></link>
			<title>How Pre-Training Data Diversity Drives Emergent Intelligence</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:30:13 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-frontier-models-exceed-compute-optimal-scaling/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-frontier-models-exceed-compute-optimal-scaling/]]></link>
			<title>Why Do Frontier Models Exceed Compute-Optimal Scaling?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:29:39 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-factors-behind-the-shift-in-chinchilla-optimal-ratio-2026/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-factors-behind-the-shift-in-chinchilla-optimal-ratio-2026/]]></link>
			<title>Understanding the Factors Behind the Shift in Chinchilla-Optimal Ratio 2026</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:29:08 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-shift-in-chinchilla-optimal-ratio-2026/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-shift-in-chinchilla-optimal-ratio-2026/]]></link>
			<title>Understanding the Shift in Chinchilla-Optimal Ratio 2026</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:28:38 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-to-filter-datasets-to-prevent-model-collapse/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-to-filter-datasets-to-prevent-model-collapse/]]></link>
			<title>How to Filter Datasets to Prevent Model Collapse</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:27:55 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-model-collapse-in-synthetic-data-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-model-collapse-in-synthetic-data-training/]]></link>
			<title>Understanding Model Collapse in Synthetic Data Training</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:27:08 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-synthetic-data-bend-current-scaling-curves-upward/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-synthetic-data-bend-current-scaling-curves-upward/]]></link>
			<title>Can Synthetic Data Bend Current Scaling Curves Upward?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:26:38 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-data-diversity-influences-scaling-law-exponents/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-data-diversity-influences-scaling-law-exponents/]]></link>
			<title>How Data Diversity Influences Scaling Law Exponents</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:26:07 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-dependence-of-reformer-on-locality-sensitive-hashing/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-dependence-of-reformer-on-locality-sensitive-hashing/]]></link>
			<title>Understanding the Dependence of Reformer on Locality-Sensitive Hashing</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:25:36 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-how-performer-kernel-approximates-full-attention/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-how-performer-kernel-approximates-full-attention/]]></link>
			<title>Understanding How Performer Kernel Approximates Full Attention</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:25:01 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-sparse-attention-mechanisms-recover-full-transformer-performance/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-sparse-attention-mechanisms-recover-full-transformer-performance/]]></link>
			<title>Can Sparse Attention Mechanisms Recover Full Transformer Performance?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:24:08 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-grouped-query-attention-trades-quality-for-inference-speed/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-grouped-query-attention-trades-quality-for-inference-speed/]]></link>
			<title>Why Grouped-Query Attention Trades Quality for Inference Speed</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:23:07 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-impact-of-multi-query-attention-on-representation-quality/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-impact-of-multi-query-attention-on-representation-quality/]]></link>
			<title>The Impact of Multi-Query Attention on Representation Quality</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:22:34 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-larger-models-develop-more-interpretable-attention-heads/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-larger-models-develop-more-interpretable-attention-heads/]]></link>
			<title>Why Do Larger Models Develop More Interpretable Attention Heads?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:22:03 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/surgically-editing-attention-heads-a-path-to-enhanced-reasoning/]]></guid>
			<link><![CDATA[https://logicnest.cc/surgically-editing-attention-heads-a-path-to-enhanced-reasoning/]]></link>
			<title>Surgically Editing Attention Heads: A Path to Enhanced Reasoning</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:21:31 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-causes-of-specialized-attention-patterns-across-heads/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-causes-of-specialized-attention-patterns-across-heads/]]></link>
			<title>Understanding the Causes of Specialized Attention Patterns Across Heads</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:21:02 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-emergence-of-induction-heads-during-pre-training-phases/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-emergence-of-induction-heads-during-pre-training-phases/]]></link>
			<title>Understanding the Emergence of Induction Heads During Pre-Training Phases</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:20:31 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-transformers-prefer-simpler-circuits-in-early-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-transformers-prefer-simpler-circuits-in-early-training/]]></link>
			<title>Why Transformers Prefer Simpler Circuits in Early Training</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:19:49 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-grokking-and-its-role-in-automated-circuit-discovery/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-grokking-and-its-role-in-automated-circuit-discovery/]]></link>
			<title>Understanding Grokking and Its Role in Automated Circuit Discovery</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:19:12 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-role-of-experience-replay-in-grokking/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-role-of-experience-replay-in-grokking/]]></link>
			<title>The Role of Experience Replay in Grokking</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:18:36 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-networks-discover-modular-solutions-during-grokking/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-networks-discover-modular-solutions-during-grokking/]]></link>
			<title>Why Networks Discover Modular Solutions During Grokking</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:18:04 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-grokking-predict-emergent-reasoning-capabilities-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-grokking-predict-emergent-reasoning-capabilities-2/]]></link>
			<title>Can Grokking Predict Emergent Reasoning Capabilities?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:17:31 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-batch-size-influences-grokking-dynamics/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-batch-size-influences-grokking-dynamics/]]></link>
			<title>How Batch Size Influences Grokking Dynamics</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:17:03 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-rarity-of-grokking-in-natural-language-data-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-rarity-of-grokking-in-natural-language-data-2/]]></link>
			<title>Understanding the Rarity of Grokking in Natural Language Data</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:16:32 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-weight-decay-significantly-speed-up-grokking-convergence/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-weight-decay-significantly-speed-up-grokking-convergence/]]></link>
			<title>Can Weight Decay Significantly Speed Up Grokking Convergence?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:15:59 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-sudden-phase-transitions-in-grokking/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-sudden-phase-transitions-in-grokking/]]></link>
			<title>Understanding Sudden Phase Transitions in Grokking</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:15:27 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/how-curriculum-learning-accelerates-grokking-speed/]]></guid>
			<link><![CDATA[https://logicnest.cc/how-curriculum-learning-accelerates-grokking-speed/]]></link>
			<title>How Curriculum Learning Accelerates Grokking Speed</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:14:36 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-grokking-why-algorithms-demand-thousands-of-epochs/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-grokking-why-algorithms-demand-thousands-of-epochs/]]></link>
			<title>Understanding Grokking: Why Algorithms Demand Thousands of Epochs</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:14:06 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-weaker-double-descent-phenomenon-in-very-wide-networks/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-weaker-double-descent-phenomenon-in-very-wide-networks/]]></link>
			<title>Understanding the Weaker Double Descent Phenomenon in Very Wide Networks</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:13:31 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-ntk-theory-predict-double-descent-in-transformers-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-ntk-theory-predict-double-descent-in-transformers-2/]]></link>
			<title>Can NTK Theory Predict Double Descent in Transformers?</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:13:00 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-feature-learning-and-the-late-double-descent-phenomenon/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-feature-learning-and-the-late-double-descent-phenomenon/]]></link>
			<title>Understanding Feature Learning and the Late Double Descent Phenomenon</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:12:24 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-the-interpolation-threshold/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-the-interpolation-threshold/]]></link>
			<title>Understanding the Drop in Test Error After the Interpolation Threshold</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:11:47 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-double-descent-in-modern-overparameterized-regimes/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-double-descent-in-modern-overparameterized-regimes/]]></link>
			<title>Understanding Double Descent in Modern Overparameterized Regimes</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:10:28 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/navigating-complex-loss-geometry-with-sam-optimizer/]]></guid>
			<link><![CDATA[https://logicnest.cc/navigating-complex-loss-geometry-with-sam-optimizer/]]></link>
			<title>Navigating Complex Loss Geometry with SAM Optimizer</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:09:51 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-adversarial-examples-the-role-of-sharp-loss-minima/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-adversarial-examples-the-role-of-sharp-loss-minima/]]></link>
			<title>Understanding Adversarial Examples: The Role of Sharp Loss Minima</title>
			<pubDate><![CDATA[Wed, 29 Apr 2026 03:09:14 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/]]></guid>
			<link><![CDATA[https://logicnest.cc/]]></link>
			<title>Home</title>
			<pubDate><![CDATA[Fri, 13 Feb 2026 16:51:24 +0000]]></pubDate>
		</item>
				</channel>
</rss>
