<?xml version="1.0" encoding="UTF-8"?>
<!-- This sitemap was dynamically generated on April 12, 2026 at 9:07 am by All in One SEO v4.9.5.1 - the original SEO plugin for WordPress. -->

<?xml-stylesheet type="text/xsl" href="https://logicnest.cc/default-sitemap.xsl"?>

<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
	<channel>
		<title>Logic Nest</title>
		<link><![CDATA[https://logicnest.cc]]></link>
		<description><![CDATA[Logic Nest]]></description>
		<lastBuildDate><![CDATA[Sun, 12 Apr 2026 03:37:16 +0000]]></lastBuildDate>
		<docs>https://validator.w3.org/feed/docs/rss2.html</docs>
		<atom:link href="https://logicnest.cc/sitemap.rss" rel="self" type="application/rss+xml" />
		<ttl><![CDATA[60]]></ttl>

		<item>
			<guid><![CDATA[https://logicnest.cc/why-do-large-models-develop-more-interpretable-heads/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-large-models-develop-more-interpretable-heads/]]></link>
			<title>Why Do Large Models Develop More Interpretable Heads?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:37:16 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-we-edit-attention-heads-to-improve-reasoning/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-we-edit-attention-heads-to-improve-reasoning/]]></link>
			<title>Can We Edit Attention Heads to Improve Reasoning?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:36:35 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/what-causes-attention-patterns-to-specialize/]]></guid>
			<link><![CDATA[https://logicnest.cc/what-causes-attention-patterns-to-specialize/]]></link>
			<title>What Causes Attention Patterns to Specialize</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:35:08 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-induction-heads-formation-during-pre-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-induction-heads-formation-during-pre-training/]]></link>
			<title>Understanding Induction Heads: Formation During Pre-Training</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:33:51 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-transformers-prefer-simpler-circuits-early/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-transformers-prefer-simpler-circuits-early/]]></link>
			<title>Why Transformers Prefer Simpler Circuits Early</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:32:38 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-grokking-and-its-connection-to-circuit-formation/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-grokking-and-its-connection-to-circuit-formation/]]></link>
			<title>Understanding Grokking and Its Connection to Circuit Formation</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:31:36 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/the-role-of-replay-buffer-in-grokking/]]></guid>
			<link><![CDATA[https://logicnest.cc/the-role-of-replay-buffer-in-grokking/]]></link>
			<title>The Role of Replay Buffer in Grokking</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:31:02 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-networks-learn-modular-solutions-during-grokking/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-networks-learn-modular-solutions-during-grokking/]]></link>
			<title>Why Do Networks Learn Modular Solutions During Grokking?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:30:16 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-grokking-predict-emergent-reasoning-capabilities/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-grokking-predict-emergent-reasoning-capabilities/]]></link>
			<title>Can Grokking Predict Emergent Reasoning Capabilities?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:29:34 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-impact-of-batch-size-on-grokking-dynamics/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-impact-of-batch-size-on-grokking-dynamics/]]></link>
			<title>Understanding the Impact of Batch Size on Grokking Dynamics</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:28:39 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/exploring-the-rarity-of-grokking-in-natural-datasets/]]></guid>
			<link><![CDATA[https://logicnest.cc/exploring-the-rarity-of-grokking-in-natural-datasets/]]></link>
			<title>Exploring the Rarity of Grokking in Natural Datasets</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:27:54 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-weight-decay-speed-up-grokking-convergence/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-weight-decay-speed-up-grokking-convergence/]]></link>
			<title>Can Weight Decay Speed Up Grokking Convergence?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:26:56 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-phase-transition-during-grokking/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-phase-transition-during-grokking/]]></link>
			<title>Understanding Phase Transition During Grokking</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:26:11 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/accelerating-grokking-through-curriculum-learning-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/accelerating-grokking-through-curriculum-learning-2/]]></link>
			<title>Accelerating Grokking Through Curriculum Learning</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:25:31 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-need-for-multiple-epochs-in-grokking-algorithmic-data/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-need-for-multiple-epochs-in-grokking-algorithmic-data/]]></link>
			<title>Understanding the Need for Multiple Epochs in Grokking Algorithmic Data</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:24:33 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-wide-networks-show-weaker-double-descent/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-wide-networks-show-weaker-double-descent/]]></link>
			<title>Why Do Wide Networks Show Weaker Double Descent?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:23:53 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-ntk-theory-predict-double-descent-in-transformers/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-ntk-theory-predict-double-descent-in-transformers/]]></link>
			<title>Can NTK Theory Predict Double Descent in Transformers?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:23:22 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-late-double-descent-through-feature-learning/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-late-double-descent-through-feature-learning/]]></link>
			<title>Understanding Late Double Descent Through Feature Learning</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:22:46 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-the-interpolation-point-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-the-interpolation-point-2/]]></link>
			<title>Understanding the Drop in Test Error After the Interpolation Point</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:22:20 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-the-interpolation-point/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-the-drop-in-test-error-after-the-interpolation-point/]]></link>
			<title>Understanding the Drop in Test Error After the Interpolation Point</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:21:47 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-double-descent-in-modern-overparameterized-networks/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-double-descent-in-modern-overparameterized-networks/]]></link>
			<title>Understanding Double Descent in Modern Overparameterized Networks</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:20:45 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/navigating-loss-geometry-with-sam-optimizer/]]></guid>
			<link><![CDATA[https://logicnest.cc/navigating-loss-geometry-with-sam-optimizer/]]></link>
			<title>Navigating Loss Geometry with SAM Optimizer</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:19:58 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-adversarial-attacks-target-sharp-minima/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-adversarial-attacks-target-sharp-minima/]]></link>
			<title>Why Do Adversarial Attacks Target Sharp Minima?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:19:19 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-flatter-minima-improve-out-of-distribution-robustness/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-flatter-minima-improve-out-of-distribution-robustness/]]></link>
			<title>Can Flatter Minima Improve Out-of-Distribution Robustness?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:18:48 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-flat-minima-hypothesis-and-its-role-in-generalization/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-flat-minima-hypothesis-and-its-role-in-generalization/]]></link>
			<title>Understanding Flat-Minima Hypothesis and Its Role in Generalization</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:18:15 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-sharpness-aware-minimization-how-it-finds-better-minima/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-sharpness-aware-minimization-how-it-finds-better-minima/]]></link>
			<title>Understanding Sharpness-Aware Minimization: How It Finds Better Minima</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:17:40 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-rmsnorm-outperforms-layer-norm-in-transformers/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-rmsnorm-outperforms-layer-norm-in-transformers/]]></link>
			<title>Why RMSNorm Outperforms Layer Norm in Transformers</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:17:03 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-group-normalization-the-advantage-of-small-batches/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-group-normalization-the-advantage-of-small-batches/]]></link>
			<title>Understanding Group Normalization: The Advantage of Small Batches</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:16:34 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-gradient-explosion-in-unnormalized-layers/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-gradient-explosion-in-unnormalized-layers/]]></link>
			<title>Understanding Gradient Explosion in Unnormalized Layers</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:15:57 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-plain-deep-networks-collapse-without-normalization/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-plain-deep-networks-collapse-without-normalization/]]></link>
			<title>Why Do Plain Deep Networks Collapse Without Normalization?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:15:18 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-reversible-architectures-enable-infinite-depth-training/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-reversible-architectures-enable-infinite-depth-training/]]></link>
			<title>Can Reversible Architectures Enable Infinite-Depth Training?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:14:41 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-do-deep-equilibrium-models-converge-faster/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-do-deep-equilibrium-models-converge-faster/]]></link>
			<title>Why Do Deep Equilibrium Models Converge Faster?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:13:59 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-layer-normalization-and-its-interaction-with-residuals-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-layer-normalization-and-its-interaction-with-residuals-2/]]></link>
			<title>Understanding Layer Normalization and Its Interaction with Residuals</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:13:13 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-why-residual-connections-flatten-loss-landscapes/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-why-residual-connections-flatten-loss-landscapes/]]></link>
			<title>Understanding Why Residual Connections Flatten Loss Landscapes</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:12:40 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/exploring-the-impact-of-vector-quantization-on-emergent-representations/]]></guid>
			<link><![CDATA[https://logicnest.cc/exploring-the-impact-of-vector-quantization-on-emergent-representations/]]></link>
			<title>Exploring the Impact of Vector Quantization on Emergent Representations</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:12:04 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-vq-vae-the-path-to-discovering-discrete-meaningful-latents/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-vq-vae-the-path-to-discovering-discrete-meaningful-latents/]]></link>
			<title>Understanding VQ-VAE: The Path to Discovering Discrete Meaningful Latents</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:11:29 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-autoregressive-models-excel-in-likelihood-estimation/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-autoregressive-models-excel-in-likelihood-estimation/]]></link>
			<title>Why Autoregressive Models Excel in Likelihood Estimation</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:10:52 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-conditional-image-generation-with-biggan/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-conditional-image-generation-with-biggan/]]></link>
			<title>Understanding Conditional Image Generation with BigGAN</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:10:09 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-diffusion-models-surpass-gans-in-reasoning-tasks/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-diffusion-models-surpass-gans-in-reasoning-tasks/]]></link>
			<title>Can Diffusion Models Surpass GANs in Reasoning Tasks?</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:07:53 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-why-stylegan-achieves-better-disentanglement/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-why-stylegan-achieves-better-disentanglement/]]></link>
			<title>Understanding Why StyleGAN Achieves Better Disentanglement</title>
			<pubDate><![CDATA[Sun, 12 Apr 2026 03:06:47 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-why-progressive-gans-produce-coherent-high-resolution-images/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-why-progressive-gans-produce-coherent-high-resolution-images/]]></link>
			<title>Understanding Why Progressive GANs Produce Coherent High-Resolution Images</title>
			<pubDate><![CDATA[Sat, 11 Apr 2026 08:11:35 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/stabilizing-deep-gan-training-with-spectral-normalization/]]></guid>
			<link><![CDATA[https://logicnest.cc/stabilizing-deep-gan-training-with-spectral-normalization/]]></link>
			<title>Stabilizing Deep GAN Training with Spectral Normalization</title>
			<pubDate><![CDATA[Sat, 11 Apr 2026 08:10:08 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-elastic-weight-consolidation-preserve-intelligence/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-elastic-weight-consolidation-preserve-intelligence/]]></link>
			<title>Can Elastic Weight Consolidation Preserve Intelligence?</title>
			<pubDate><![CDATA[Sat, 11 Apr 2026 08:08:52 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-catastrophic-forgetting-in-continual-deep-learning-2/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-catastrophic-forgetting-in-continual-deep-learning-2/]]></link>
			<title>Understanding Catastrophic Forgetting in Continual Deep Learning</title>
			<pubDate><![CDATA[Sat, 11 Apr 2026 08:08:00 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/can-adapter-fusion-create-multi-task-intelligence/]]></guid>
			<link><![CDATA[https://logicnest.cc/can-adapter-fusion-create-multi-task-intelligence/]]></link>
			<title>Can Adapter Fusion Create Multi-Task Intelligence?</title>
			<pubDate><![CDATA[Sat, 11 Apr 2026 08:05:07 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-prompt-tuning-scaling-with-large-language-models/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-prompt-tuning-scaling-with-large-language-models/]]></link>
			<title>Understanding Prompt Tuning: Scaling with Large Language Models</title>
			<pubDate><![CDATA[Sat, 11 Apr 2026 08:02:07 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/what-makes-dora-outperform-lora-in-low-rank-adaptation/]]></guid>
			<link><![CDATA[https://logicnest.cc/what-makes-dora-outperform-lora-in-low-rank-adaptation/]]></link>
			<title>What Makes DORA Outperform LoRA in Low-Rank Adaptation</title>
			<pubDate><![CDATA[Sat, 11 Apr 2026 07:57:52 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/understanding-qlora-achieving-4-bit-fine-tuning-without-loss/]]></guid>
			<link><![CDATA[https://logicnest.cc/understanding-qlora-achieving-4-bit-fine-tuning-without-loss/]]></link>
			<title>Understanding Qlora: Achieving 4-Bit Fine-Tuning Without Loss</title>
			<pubDate><![CDATA[Sat, 11 Apr 2026 07:56:56 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/why-does-lora-preserve-more-pre-trained-knowledge/]]></guid>
			<link><![CDATA[https://logicnest.cc/why-does-lora-preserve-more-pre-trained-knowledge/]]></link>
			<title>Why Does LoRA Preserve More Pre-trained Knowledge?</title>
			<pubDate><![CDATA[Sat, 11 Apr 2026 07:56:19 +0000]]></pubDate>
		</item>
					<item>
			<guid><![CDATA[https://logicnest.cc/]]></guid>
			<link><![CDATA[https://logicnest.cc/]]></link>
			<title>Home</title>
			<pubDate><![CDATA[Fri, 13 Feb 2026 16:51:24 +0000]]></pubDate>
		</item>
				</channel>
</rss>
