<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  <channel>
    <title>音频场景理解 on 语音/音频论文速递</title>
    <link>https://nanless.github.io/audio-paper-digest-blog/tags/%E9%9F%B3%E9%A2%91%E5%9C%BA%E6%99%AF%E7%90%86%E8%A7%A3/</link>
    <description>Recent content in 音频场景理解 on 语音/音频论文速递</description>
    <generator>Hugo</generator>
    <language>zh-cn</language>
    <lastBuildDate>Wed, 29 Apr 2026 00:00:00 +0000</lastBuildDate>
    <atom:link href="https://nanless.github.io/audio-paper-digest-blog/tags/%E9%9F%B3%E9%A2%91%E5%9C%BA%E6%99%AF%E7%90%86%E8%A7%A3/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Acoustic Teleportation Via Disentangled Neural Audio Codec Representations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acoustic-teleportation-via-disentangled-neural/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acoustic-teleportation-via-disentangled-neural/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>AUDIOGENIE-Reasoner: A Training-Free Multi-Agent Framework for Coarse-to-Fine Audio Deep Reasoning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audiogenie-reasoner-a-training-free-multi-agent/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audiogenie-reasoner-a-training-free-multi-agent/</guid>
      <description>音频问答 | 7.0/10</description>
    </item>
    <item>
      <title>Can Large Audio Language Models Understand Audio Well? Speech, Scene and Events Understanding Benchmark for LALMs</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-can-large-audio-language-models-understand-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-can-large-audio-language-models-understand-audio/</guid>
      <description>基准测试 | 7.0/10</description>
    </item>
    <item>
      <title>Class-Aware Permutation-Invariant Signal-to-Distortion Ratio for Semantic Segmentation of Sound Scene with Same-Class Sources</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-class-aware-permutation-invariant-signal-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-class-aware-permutation-invariant-signal-to/</guid>
      <description>音频场景理解 | 7.5/10</description>
    </item>
    <item>
      <title>DSpAST: Disentangled Representations for Spatial Audio Reasoning with Large Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dspast-disentangled-representations-for-spatial/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dspast-disentangled-representations-for-spatial/</guid>
      <description>音频问答 | 8.0/10</description>
    </item>
    <item>
      <title>From Contrast to Commonality: Audio Commonality Captioning for Enhanced Audio-Text Cross-Modal Understanding in Multimodal LLMS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-from-contrast-to-commonality-audio-commonality/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-from-contrast-to-commonality-audio-commonality/</guid>
      <description>音频场景理解 | 7.5/10</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频场景理解 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-121/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-121/</guid>
      <description>共 3 篇 ICASSP 2026 音频场景理解 方向论文</description>
    </item>
    <item>
      <title>LAMB: LLM-Based Audio Captioning with Modality Gap Bridging Via Cauchy-Schwarz Divergence</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lamb-llm-based-audio-captioning-with-modality-gap/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lamb-llm-based-audio-captioning-with-modality-gap/</guid>
      <description>音频描述 | 7.0/10</description>
    </item>
    <item>
      <title>Lightweight and Generalizable Acoustic Scene Representations Via Contrastive Fine-Tuning and Distillation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lightweight-and-generalizable-acoustic-scene/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lightweight-and-generalizable-acoustic-scene/</guid>
      <description>音频场景理解 | 8.0/10</description>
    </item>
    <item>
      <title>Segmentwise Pruning in Audio-Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-segmentwise-pruning-in-audio-language-models/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-segmentwise-pruning-in-audio-language-models/</guid>
      <description>音频问答 | 7.0/10</description>
    </item>
    <item>
      <title>Teaching Audio Models to Reason: A Unified Framework for Source- and Layer-Wise Distillation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-teaching-audio-models-to-reason-a-unified/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-teaching-audio-models-to-reason-a-unified/</guid>
      <description>音频问答 | 7.0/10</description>
    </item>
    <item>
      <title>Listening with Time: Precise Temporal Awareness for Long-Form Audio Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-28-listening-with-time-precise-temporal-awareness/</link>
      <pubDate>Tue, 28 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-28-listening-with-time-precise-temporal-awareness/</guid>
      <description>音频场景理解 | 8.0/10</description>
    </item>
    <item>
      <title>Listening with Time: Precise Temporal Awareness for Long-Form Audio Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-27-listening-with-time-precise-temporal-awareness/</link>
      <pubDate>Mon, 27 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-27-listening-with-time-precise-temporal-awareness/</guid>
      <description>音频场景理解 | 8.0/10</description>
    </item>
  </channel>
</rss>
