<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  <channel>
    <title>注意力机制 on 语音/音频论文速递</title>
    <link>https://nanless.github.io/audio-paper-digest-blog/tags/%E6%B3%A8%E6%84%8F%E5%8A%9B%E6%9C%BA%E5%88%B6/</link>
    <description>Recent content in 注意力机制 on 语音/音频论文速递</description>
    <generator>Hugo</generator>
    <language>zh-cn</language>
    <lastBuildDate>Wed, 29 Apr 2026 00:00:00 +0000</lastBuildDate>
    <atom:link href="https://nanless.github.io/audio-paper-digest-blog/tags/%E6%B3%A8%E6%84%8F%E5%8A%9B%E6%9C%BA%E5%88%B6/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>A Consistent Learning Depression Detection Framework Integrating Multi-View Attention</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-consistent-learning-depression-detection/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-consistent-learning-depression-detection/</guid>
      <description>语音生物标志物 | 6.5/10</description>
    </item>
    <item>
      <title>A Distribution Matching Approach to Neural Piano Transcription with Optimal Transport</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-distribution-matching-approach-to-neural-piano/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-distribution-matching-approach-to-neural-piano/</guid>
      <description>音乐转录 | 7.0/10</description>
    </item>
    <item>
      <title>Adversarial Rivalry Learning for Music Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adversarial-rivalry-learning-for-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adversarial-rivalry-learning-for-music/</guid>
      <description>音乐分类 | 6.5/10</description>
    </item>
    <item>
      <title>An Audio-Visual Speech Separation Network with Joint Cross-Attention and Iterative Modeling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-audio-visual-speech-separation-network-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-audio-visual-speech-separation-network-with/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>Attentive AV-Fusionnet: Audio-Visual Quality Prediction with Hybrid Attention</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attentive-av-fusionnet-audio-visual-quality/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attentive-av-fusionnet-audio-visual-quality/</guid>
      <description>音视频 | 7.0/10</description>
    </item>
    <item>
      <title>Caption and Audio-Guided Video Representation Learning with Gated Attention for Partially Relevant Video Retrieval</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-caption-and-audio-guided-video-representation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-caption-and-audio-guided-video-representation/</guid>
      <description>视频检索 | 7.0/10</description>
    </item>
    <item>
      <title>Chunk-Wise Attention Transducers for Fast and Accurate Streaming Speech-to-Text</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-chunk-wise-attention-transducers-for-fast-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-chunk-wise-attention-transducers-for-fast-and/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Combining Multi-Order Attention and Multi-Resolution Discriminator for High-Fidelity Neural Vocoder</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-combining-multi-order-attention-and-multi/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-combining-multi-order-attention-and-multi/</guid>
      <description>语音合成 | 6.5/10</description>
    </item>
    <item>
      <title>DAT-CFTNet: Speech Enhancement for Cochlear Implant Recipients using Attention-based Dual-Path Recurrent Neural Network</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dat-cftnet-speech-enhancement-for-cochlear/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dat-cftnet-speech-enhancement-for-cochlear/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>Distilling Attention Knowledge for Speaker Verification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-distilling-attention-knowledge-for-speaker/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-distilling-attention-knowledge-for-speaker/</guid>
      <description>说话人验证 | 8.0/10</description>
    </item>
    <item>
      <title>Expressive Voice Conversion with Controllable Emotional Intensity</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-expressive-voice-conversion-with-controllable/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-expressive-voice-conversion-with-controllable/</guid>
      <description>语音转换 | 7.5/10</description>
    </item>
    <item>
      <title>FDCNet: Frequency Domain Channel Attention and Convolution for Lipreading</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fdcnet-frequency-domain-channel-attention-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fdcnet-frequency-domain-channel-attention-and/</guid>
      <description>视觉语音识别 | 8.5/10</description>
    </item>
    <item>
      <title>HarmoNet: Music Grounding by Short Video via Harmonic Resample and Dynamic Sparse Alignment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-harmonet-music-grounding-by-short-video-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-harmonet-music-grounding-by-short-video-via/</guid>
      <description>音乐检索 | 7.0/10</description>
    </item>
    <item>
      <title>Learning What to Hear: Boosting Sound-Source Association for Robust Audiovisual Instance Segmentation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-what-to-hear-boosting-sound-source/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-what-to-hear-boosting-sound-source/</guid>
      <description>音视频实例分割 | 7.5/10</description>
    </item>
    <item>
      <title>MFF-RVRDI: Multimodal Fusion Framework for Robust Video Recording Device Identification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mff-rvrdi-multimodal-fusion-framework-for-robust/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mff-rvrdi-multimodal-fusion-framework-for-robust/</guid>
      <description>视频设备识别 | 7.5/10</description>
    </item>
    <item>
      <title>MSCT: Differential Cross-Modal Attention for Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-msct-differential-cross-modal-attention-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-msct-differential-cross-modal-attention-for/</guid>
      <description>音频深度伪造检测 | 6.5/10</description>
    </item>
    <item>
      <title>Musicdetr: A Position-Aware Spectral Note Detection Model for Singing Transcription</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-musicdetr-a-position-aware-spectral-note/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-musicdetr-a-position-aware-spectral-note/</guid>
      <description>歌唱语音转录 | 8.5/10</description>
    </item>
    <item>
      <title>QFOCUS: Controllable Synthesis for Automated Speech Stress Editing to Deliver Human-Like Emphatic Intent</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-qfocus-controllable-synthesis-for-automated/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-qfocus-controllable-synthesis-for-automated/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Spiking Attention Network: A Hybrid Neuromorphic Approach to Underwater Acoustic Localization and Zero-Shot Adaptation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spiking-attention-network-a-hybrid-neuromorphic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spiking-attention-network-a-hybrid-neuromorphic/</guid>
      <description>声源定位 | 7.0/10</description>
    </item>
    <item>
      <title>TAG: Structured Temporal Audio Generation via LLM-Guided Manual Scription and Control</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tag-structured-temporal-audio-generation-via-llm/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tag-structured-temporal-audio-generation-via-llm/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Time vs. Layer: Locating Predictive Cues for Dysarthric Speech Descriptors in Wav2vec 2.0</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-time-vs-layer-locating-predictive-cues-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-time-vs-layer-locating-predictive-cues-for/</guid>
      <description>语音质量评估 | 7.5/10</description>
    </item>
    <item>
      <title>Toward Robust And Efficient Beat Tracking Via Beat-Aware Attention</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-toward-robust-and-efficient-beat-tracking-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-toward-robust-and-efficient-beat-tracking-via/</guid>
      <description>音乐理解 | 8.5/10</description>
    </item>
    <item>
      <title>Tri-Attention Fusion: Joint Temporal-Spectral and Bidirectional Modeling for Speech Spoofing Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tri-attention-fusion-joint-temporal-spectral-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tri-attention-fusion-joint-temporal-spectral-and/</guid>
      <description>语音伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>Whisper-MLA: Reducing GPU Memory Consumption of ASR Models Based on MHA2MLA Conversion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-whisper-mla-reducing-gpu-memory-consumption-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-whisper-mla-reducing-gpu-memory-consumption-of/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
  </channel>
</rss>
