<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/">
  <channel>
    <title>Icassp-2026 on 语音/音频论文速递</title>
    <link>https://nanless.github.io/audio-paper-digest-blog/categories/icassp-2026/</link>
    <description>Recent content in Icassp-2026 on 语音/音频论文速递</description>
    <generator>Hugo</generator>
    <language>zh-cn</language>
    <lastBuildDate>Wed, 29 Apr 2026 00:00:00 +0000</lastBuildDate>
    <atom:link href="https://nanless.github.io/audio-paper-digest-blog/categories/icassp-2026/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>3D Mesh Grid Room Impulse Responses Measured with A Linear Microphone Array And Suppression of Frame Reflections</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-3d-mesh-grid-room-impulse-responses-measured-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-3d-mesh-grid-room-impulse-responses-measured-with/</guid>
      <description>空间音频 | 8.3/10</description>
    </item>
    <item>
      <title>A Bayesian Approach to Singing Skill Evaluation Using Semitone Pitch Histogram and MCMC-Based Generated Quantities</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-bayesian-approach-to-singing-skill-evaluation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-bayesian-approach-to-singing-skill-evaluation/</guid>
      <description>音乐理解 | 7.0/10</description>
    </item>
    <item>
      <title>A Bimodal Approach for Detecting Fatigue Using Speech and Personal Assessments in College Students</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-bimodal-approach-for-detecting-fatigue-using/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-bimodal-approach-for-detecting-fatigue-using/</guid>
      <description>A Bimodal Approach for Detecting Fatigue Using Speech and Personal Assessments in College Students</description>
    </item>
    <item>
      <title>A Consistent Learning Depression Detection Framework Integrating Multi-View Attention</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-consistent-learning-depression-detection/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-consistent-learning-depression-detection/</guid>
      <description>语音生物标志物 | 6.5/10</description>
    </item>
    <item>
      <title>A Data-Driven Framework for Personal Sound Zone Control Addressing Loudspeaker Nonlinearities</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-data-driven-framework-for-personal-sound-zone/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-data-driven-framework-for-personal-sound-zone/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>A Dataset of Robot-Patient and Doctor-Patient Medical Dialogues for Spoken Language Processing Tasks</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-dataset-of-robot-patient-and-doctor-patient/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-dataset-of-robot-patient-and-doctor-patient/</guid>
      <description>语音对话系统 | 7.5/10</description>
    </item>
    <item>
      <title>A Distribution Matching Approach to Neural Piano Transcription with Optimal Transport</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-distribution-matching-approach-to-neural-piano/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-distribution-matching-approach-to-neural-piano/</guid>
      <description>音乐转录 | 7.0/10</description>
    </item>
    <item>
      <title>A Dynamic Gated Cross-Attention Framework for Audio-Text Apparent Personality Analysis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-dynamic-gated-cross-attention-framework-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-dynamic-gated-cross-attention-framework-for/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>A Feature-Optimized Audio Watermarking Algorithm with Adaptive Embedding Strength</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-feature-optimized-audio-watermarking-algorithm/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-feature-optimized-audio-watermarking-algorithm/</guid>
      <description>音频安全 | 7.5/10</description>
    </item>
    <item>
      <title>A Framework for Controlled Multi-Speaker Audio Synthesis for Robustness Evaluation of Speaker Diarisation Systems</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-framework-for-controlled-multi-speaker-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-framework-for-controlled-multi-speaker-audio/</guid>
      <description>说话人日志 | 7.5/10</description>
    </item>
    <item>
      <title>A Generalization Strategy for Speech Quality Prediction: From Domain-Specific to Unified Datasets</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-generalization-strategy-for-speech-quality/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-generalization-strategy-for-speech-quality/</guid>
      <description>语音质量评估 | 6.5/10</description>
    </item>
    <item>
      <title>A Generative-First Neural Audio Autoencoder</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-generative-first-neural-audio-autoencoder/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-generative-first-neural-audio-autoencoder/</guid>
      <description>音乐生成 | 8.5/10</description>
    </item>
    <item>
      <title>A Hybrid Convolution-Mamba Network with Tone-Octave Contrastive Learning for Stratified Semi-Supervised Singing Melody Extraction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-hybrid-convolution-mamba-network-with-tone/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-hybrid-convolution-mamba-network-with-tone/</guid>
      <description>歌唱旋律提取 | 7.5/10</description>
    </item>
    <item>
      <title>A Learning-Based Automotive Sound Field Reproduction Method Using Plane-Wave Decomposition and Multi-Position Constraint</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-learning-based-automotive-sound-field/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-learning-based-automotive-sound-field/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>A Lightweight Fourier-Based Network for Binaural Speech Enhancement with Spatial Cue Preservation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-lightweight-fourier-based-network-for-binaural/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-lightweight-fourier-based-network-for-binaural/</guid>
      <description>语音增强 | 8.5/10</description>
    </item>
    <item>
      <title>A LLM-Driven Acoustic Semantic Enriched Framework for Underwater Acoustic Target Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-llm-driven-acoustic-semantic-enriched-framework/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-llm-driven-acoustic-semantic-enriched-framework/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>A Metric Learning Approach to Heart Murmur Detection from Phonocardiogram Recordings</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-metric-learning-approach-to-heart-murmur/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-metric-learning-approach-to-heart-murmur/</guid>
      <description>音频分类 | 7.7/10</description>
    </item>
    <item>
      <title>A New Method and Dataset for Classroom Teaching Stage Segmentation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-new-method-and-dataset-for-classroom-teaching/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-new-method-and-dataset-for-classroom-teaching/</guid>
      <description>课堂阶段分割 | 6.5/10</description>
    </item>
    <item>
      <title>A Noniterative Phase Retrieval Considering the Zeros of STFT Magnitude</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-noniterative-phase-retrieval-considering-the/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-noniterative-phase-retrieval-considering-the/</guid>
      <description>信号处理 | 7.5/10</description>
    </item>
    <item>
      <title>A Noval Monte Carlo Gradient Method Based on Meta-Learning for Effective Step-Size Selection in Active Noise Control</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-noval-monte-carlo-gradient-method-based-on-meta/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-noval-monte-carlo-gradient-method-based-on-meta/</guid>
      <description>噪声控制 | 6.5/10</description>
    </item>
    <item>
      <title>A Parameter-Efficient Multi-Scale Convolutional Adapter for Synthetic Speech Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-parameter-efficient-multi-scale-convolutional/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-parameter-efficient-multi-scale-convolutional/</guid>
      <description>A Parameter-Efficient Multi-Scale Convolutional Adapter for Synthetic Speech Detection</description>
    </item>
    <item>
      <title>A Personalized Real-Time Proactive Voice Memory Assistant</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-personalized-real-time-proactive-voice-memory/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-personalized-real-time-proactive-voice-memory/</guid>
      <description>实时处理 | 7.0/10</description>
    </item>
    <item>
      <title>A Robust KNN Approach for Multi-Class Laryngeal Disease Detection using MFCC Features</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-robust-knn-approach-for-multi-class-laryngeal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-robust-knn-approach-for-multi-class-laryngeal/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>A Robust Multi-Scale Framework with Test-Time Adaptation for sEEG-Based Speech Decoding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-robust-multi-scale-framework-with-test-time/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-robust-multi-scale-framework-with-test-time/</guid>
      <description>语音解码 | 7.5/10</description>
    </item>
    <item>
      <title>A Speech-Driven Paradigm for Physics-Informed Modeling of Coupled Micro-Speakers</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-speech-driven-paradigm-for-physics-informed/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-speech-driven-paradigm-for-physics-informed/</guid>
      <description>音频生成 | 7.0/10</description>
    </item>
    <item>
      <title>A Stabilized Hybrid Active Noise Control Algorithm of GFANC and FxNLMS with Online Clustering</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-stabilized-hybrid-active-noise-control/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-stabilized-hybrid-active-noise-control/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>A State-Dependent Markov Diffusion Process for Generative Speech Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-state-dependent-markov-diffusion-process-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-state-dependent-markov-diffusion-process-for/</guid>
      <description>语音增强 | 6.5/10</description>
    </item>
    <item>
      <title>A Study of Data Selection Strategies for Pre-Training Self-Supervised Speech Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-study-of-data-selection-strategies-for-pre/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-study-of-data-selection-strategies-for-pre/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>A Superb-Style Benchmark of Self-Supervised Speech Models for Audio Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-superb-style-benchmark-of-self-supervised/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-superb-style-benchmark-of-self-supervised/</guid>
      <description>音频深度伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>A Task-Aware Dual-Level Self-Supervised Learning Method for Effective Sound Event Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-task-aware-dual-level-self-supervised-learning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-task-aware-dual-level-self-supervised-learning/</guid>
      <description>音频事件检测 | 7.5/10</description>
    </item>
    <item>
      <title>A Text-To-Text Alignment Algorithm for Better Evaluation of Modern Speech Recognition Systems</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-text-to-text-alignment-algorithm-for-better/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-text-to-text-alignment-algorithm-for-better/</guid>
      <description>模型评估 | 7.5/10</description>
    </item>
    <item>
      <title>A Unified SVD-Modal Solution for Sparse Sound Field Reconstruction with Hybrid Spherical-Linear Microphone Arrays</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-unified-svd-modal-solution-for-sparse-sound/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-unified-svd-modal-solution-for-sparse-sound/</guid>
      <description>声源定位 | 6.5/10</description>
    </item>
    <item>
      <title>A Unsupervised Domain Adaptation Framework For Semi-Supervised Melody Extraction Using Confidence Matrix Replace and Nearest Neighbour Supervision</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-unsupervised-domain-adaptation-framework-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-a-unsupervised-domain-adaptation-framework-for/</guid>
      <description>音乐信息检索 | 8.0/10</description>
    </item>
    <item>
      <title>ACAVCaps: Enabling Large-Scale Training for Fine-Grained and Diverse Audio Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acavcaps-enabling-large-scale-training-for-fine/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acavcaps-enabling-large-scale-training-for-fine/</guid>
      <description>音频分类 | 8.5/10</description>
    </item>
    <item>
      <title>AccLID: Accent-aware Language Identification for Robust Multilingual Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acclid-accent-aware-language-identification-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acclid-accent-aware-language-identification-for/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>ACIR-MACL: Effective Multimodal Sentiment Analysis via Attention-Based Causal Intervention Regularization and Multi-Aspect Contrastive Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acir-macl-effective-multimodal-sentiment-analysis/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acir-macl-effective-multimodal-sentiment-analysis/</guid>
      <description>情感分析 | 7.0/10</description>
    </item>
    <item>
      <title>Acoustic and Facial Markers of Perceived Conversational Success in Spontaneous Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acoustic-and-facial-markers-of-perceived/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acoustic-and-facial-markers-of-perceived/</guid>
      <description>语音情感识别 | 6.0/10</description>
    </item>
    <item>
      <title>Acoustic Feedback Cancellation in Hearing Aids Exploiting an Inertial Sensor</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acoustic-feedback-cancellation-in-hearing-aids/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acoustic-feedback-cancellation-in-hearing-aids/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>Acoustic Non-Stationarity Objective Assessment with Hard Label Criteria for Supervised Learning Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acoustic-non-stationarity-objective-assessment/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acoustic-non-stationarity-objective-assessment/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>Acoustic Teleportation Via Disentangled Neural Audio Codec Representations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acoustic-teleportation-via-disentangled-neural/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-acoustic-teleportation-via-disentangled-neural/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>Adapting Diarization-Conditioned Whisper for End-to-End Multi-Talker Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adapting-diarization-conditioned-whisper-for-end/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adapting-diarization-conditioned-whisper-for-end/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Adaptive Deterministic Flow Matching for Target Speaker Extraction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-deterministic-flow-matching-for-target/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-deterministic-flow-matching-for-target/</guid>
      <description>目标说话人提取 | 8.0/10</description>
    </item>
    <item>
      <title>Adaptive Embedding Fusion with Contrastive Learning for Robust Fully Few-Shot Class-Incremental Audio Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-embedding-fusion-with-contrastive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-embedding-fusion-with-contrastive/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>Adaptive Per-Channel Energy Normalization Front-End for Robust Audio Signal Processing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-per-channel-energy-normalization-front/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-per-channel-energy-normalization-front/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>Adaptive Rotary Steering with Joint Autoregression for Robust Extraction of Closely Moving Speakers in Dynamic Scenarios</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-rotary-steering-with-joint/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-rotary-steering-with-joint/</guid>
      <description>语音分离 | 8.5/10</description>
    </item>
    <item>
      <title>Adaptive Spectral Weighting in Sagittal-Plane Sound Localization: A Reliability-Driven Approach</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-spectral-weighting-in-sagittal-plane/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-spectral-weighting-in-sagittal-plane/</guid>
      <description>声源定位 | 6.5/10</description>
    </item>
    <item>
      <title>Adaptive Task-Incremental Learning For Underwater Acoustic Recognition Based on Mixture-of-Experts Adapter</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-task-incremental-learning-for-underwater/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adaptive-task-incremental-learning-for-underwater/</guid>
      <description>水下声学目标识别 | 7.0/10</description>
    </item>
    <item>
      <title>Addressing Gradient Misalignment in Data-Augmented Training for Robust Speech Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-addressing-gradient-misalignment-in-data/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-addressing-gradient-misalignment-in-data/</guid>
      <description>语音伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>ADH-VA: Adaptive Directed-Hypergraph Convolution with VA Contrastive Learning for Multimodal Conversational Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adh-va-adaptive-directed-hypergraph-convolution/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adh-va-adaptive-directed-hypergraph-convolution/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Advanced modeling of interlanguage speech intelligibility benefit with L1-L2 multi-task learning using differentiable K-means for accent-robust discrete token-based ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-advanced-modeling-of-interlanguage-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-advanced-modeling-of-interlanguage-speech/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Advancing LLM-Based Multi-Channel Multi-Speaker Speech Recognition with Global Cross-Channel Attention and Sentence-Ordered First-In First-Out Serialized Output Training</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-advancing-llm-based-multi-channel-multi-speaker/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-advancing-llm-based-multi-channel-multi-speaker/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Advancing Semi-Supervised Child Speech Recognition with Omni-Temporal Classification under Label Noise</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-advancing-semi-supervised-child-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-advancing-semi-supervised-child-speech/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Advancing Speech Summarization in Multi-Modal LLMs with Reinforcement Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-advancing-speech-summarization-in-multi-modal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-advancing-speech-summarization-in-multi-modal/</guid>
      <description>音频问答 | 7.0/10</description>
    </item>
    <item>
      <title>Advancing Speech Understanding in Speech-Aware Language Models with GRPO</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-advancing-speech-understanding-in-speech-aware/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-advancing-speech-understanding-in-speech-aware/</guid>
      <description>语音问答 | 7.0/10</description>
    </item>
    <item>
      <title>Adversarial Defense via Generative Speech Enhancement Module</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adversarial-defense-via-generative-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adversarial-defense-via-generative-speech/</guid>
      <description>语音增强 对抗防御 | 7.5/10</description>
    </item>
    <item>
      <title>Adversarial Fine-Tuning on Speech Foundation Model with Vulnerable Attention Consistency Regularization for Robust Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adversarial-fine-tuning-on-speech-foundation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adversarial-fine-tuning-on-speech-foundation/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Adversarial Rivalry Learning for Music Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adversarial-rivalry-learning-for-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-adversarial-rivalry-learning-for-music/</guid>
      <description>音乐分类 | 6.5/10</description>
    </item>
    <item>
      <title>Affect-Jigsaw: Integrating Core and Peripheral Emotions for Harmonious Fine-Grained Multimodal Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-affect-jigsaw-integrating-core-and-peripheral/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-affect-jigsaw-integrating-core-and-peripheral/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>AFT: An Exemplar-Free Class Incremental Learning Method for Environmental Sound Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aft-an-exemplar-free-class-incremental-learning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aft-an-exemplar-free-class-incremental-learning/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>AI-Generated Music Detection in Broadcast Monitoring</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ai-generated-music-detection-in-broadcast/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ai-generated-music-detection-in-broadcast/</guid>
      <description>音频深度伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>Ailive Mixer: A Deep Learning Based Zero Latency Automatic Music Mixer for Live Music Performances</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ailive-mixer-a-deep-learning-based-zero-latency/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ailive-mixer-a-deep-learning-based-zero-latency/</guid>
      <description>音乐混合 | 7.0/10</description>
    </item>
    <item>
      <title>AISHELL6-Whisper: A Chinese Mandarin Audio-Visual Whisper Speech Dataset with Speech Recognition Baselines</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aishell6-whisper-a-chinese-mandarin-audio-visual/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aishell6-whisper-a-chinese-mandarin-audio-visual/</guid>
      <description>语音识别 | 8.3/10</description>
    </item>
    <item>
      <title>Aligning Generative Speech Enhancement with Perceptual Feedback</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aligning-generative-speech-enhancement-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aligning-generative-speech-enhancement-with/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Aligning Language Models for Lyric-to-Melody Generation with Rule-Based Musical Constraints</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aligning-language-models-for-lyric-to-melody/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aligning-language-models-for-lyric-to-melody/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>ALMA-Chor: Leveraging Audio-Lyric Alignment with Mamba for Chorus Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-alma-chor-leveraging-audio-lyric-alignment-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-alma-chor-leveraging-audio-lyric-alignment-with/</guid>
      <description>音乐信息检索 | 7.0/10</description>
    </item>
    <item>
      <title>AMBER2: Dual Ambiguity-Aware Emotion Recognition Applied to Speech and Text</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-amber2-dual-ambiguity-aware-emotion-recognition/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-amber2-dual-ambiguity-aware-emotion-recognition/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>AmbiDrop: Array-Agnostic Speech Enhancement Using Ambisonics Encoding and Dropout-Based Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ambidrop-array-agnostic-speech-enhancement-using/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ambidrop-array-agnostic-speech-enhancement-using/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>AMBISONIC-DML: A Benchmark Dataset for Dynamic Higher-Order Ambisonics Music with Motion-Aligned Stems</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ambisonic-dml-a-benchmark-dataset-for-dynamic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ambisonic-dml-a-benchmark-dataset-for-dynamic/</guid>
      <description>数据集 | 7.5/10</description>
    </item>
    <item>
      <title>An Anomaly-Aware and Audio-Enhanced Dual-Pathway Framework for Alzheimer’s Disease Progression Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-anomaly-aware-and-audio-enhanced-dual-pathway/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-anomaly-aware-and-audio-enhanced-dual-pathway/</guid>
      <description>语音生物标志物 | 7.0/10</description>
    </item>
    <item>
      <title>An Audio-Visual Speech Separation Network with Joint Cross-Attention and Iterative Modeling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-audio-visual-speech-separation-network-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-audio-visual-speech-separation-network-with/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>An Efficient Neural Network for Modeling Human Auditory Neurograms for Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-efficient-neural-network-for-modeling-human/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-efficient-neural-network-for-modeling-human/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>An End-to-End Multimodal System for Subtitle Recognition and Chinese-Japanese Translation in Short Dramas</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-end-to-end-multimodal-system-for-subtitle/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-end-to-end-multimodal-system-for-subtitle/</guid>
      <description>多模态模型 | 7.0/10</description>
    </item>
    <item>
      <title>An Envelope Separation Aided Multi-Task Learning Model for Blind Source Counting and Localization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-envelope-separation-aided-multi-task-learning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-envelope-separation-aided-multi-task-learning/</guid>
      <description>声源定位 | 6.5/10</description>
    </item>
    <item>
      <title>An Event-Based Sequence Modeling Approach to Recognizing Non-Triad Chords with Oversegmentation Minimization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-event-based-sequence-modeling-approach-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-event-based-sequence-modeling-approach-to/</guid>
      <description>音乐信息检索 | 7.5/10</description>
    </item>
    <item>
      <title>An Unsupervised Alignment Feature Fusion System for Spoken Language-Based Dementia Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-unsupervised-alignment-feature-fusion-system/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-an-unsupervised-alignment-feature-fusion-system/</guid>
      <description>语音生物标志物 | 7.0/10</description>
    </item>
    <item>
      <title>Aneural Forward Filtering for Speaker-Image Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aneural-forward-filtering-for-speaker-image/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aneural-forward-filtering-for-speaker-image/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>AnimalCLAP: Taxonomy-Aware Language-Audio Pretraining for Species Recognition and Trait Inference</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-animalclap-taxonomy-aware-language-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-animalclap-taxonomy-aware-language-audio/</guid>
      <description>音频分类 | 8.0/10</description>
    </item>
    <item>
      <title>AnyAccomp: Generalizable Accompaniment Generation Via Quantized Melodic Bottleneck</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-anyaccomp-generalizable-accompaniment-generation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-anyaccomp-generalizable-accompaniment-generation/</guid>
      <description>音乐生成 | 8.0/10</description>
    </item>
    <item>
      <title>AnyRIR: Robust Non-Intrusive Room Impulse Response Estimation in the Wild</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-anyrir-robust-non-intrusive-room-impulse-response/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-anyrir-robust-non-intrusive-room-impulse-response/</guid>
      <description>空间音频 | 7.0/10</description>
    </item>
    <item>
      <title>APKD: Aligned And Paced Knowledge Distillation Towards Lightweight Heterogeneous Multimodal Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-apkd-aligned-and-paced-knowledge-distillation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-apkd-aligned-and-paced-knowledge-distillation/</guid>
      <description>情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>AQUA-Bench: Beyond finding answers to knowing when there are None in Audio Question Answering</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aqua-bench-beyond-finding-answers-to-knowing-when/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aqua-bench-beyond-finding-answers-to-knowing-when/</guid>
      <description>音频问答 | 7.0/10</description>
    </item>
    <item>
      <title>AR-BSNet: Towards Ultra-Low Complexity Autoregressive Target Speaker Extraction With Band-Split Modeling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ar-bsnet-towards-ultra-low-complexity/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ar-bsnet-towards-ultra-low-complexity/</guid>
      <description>语音分离 | 7.0/10</description>
    </item>
    <item>
      <title>AR&amp;D: A Framework for Retrieving and Describing Concepts for Interpreting AudioLLMs</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ard-a-framework-for-retrieving-and-describing/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ard-a-framework-for-retrieving-and-describing/</guid>
      <description>音频大模型 | 6.5/10</description>
    </item>
    <item>
      <title>Ara-BEST-RQ: Multi Dialectal Arabic SSL</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ara-best-rq-multi-dialectal-arabic-ssl/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ara-best-rq-multi-dialectal-arabic-ssl/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Arbitrarily Settable Frame Rate Neural Speech Codec with Content Adaptive Variable Length Segmentation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-arbitrarily-settable-frame-rate-neural-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-arbitrarily-settable-frame-rate-neural-speech/</guid>
      <description>音频生成 | 7.0/10</description>
    </item>
    <item>
      <title>ARCHI-TTS: A Flow-Matching-Based Text-to-Speech Model with Self-Supervised Semantic Aligner and Accelerated Inference</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-archi-tts-a-flow-matching-based-text-to-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-archi-tts-a-flow-matching-based-text-to-speech/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>Are Modern Speech Enhancement Systems Vulnerable to Adversarial Attacks?</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-are-modern-speech-enhancement-systems-vulnerable/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-are-modern-speech-enhancement-systems-vulnerable/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Assessing Identity Leakage in Talking Face Generation: Metrics and Evaluation Framework</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-assessing-identity-leakage-in-talking-face/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-assessing-identity-leakage-in-talking-face/</guid>
      <description>说话人脸生成 | 7.5/10</description>
    </item>
    <item>
      <title>Assessing the Impact of Speaker Identity in Speech Spoofing Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-assessing-the-impact-of-speaker-identity-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-assessing-the-impact-of-speaker-identity-in/</guid>
      <description>音频深度伪造检测 | 8.0/10</description>
    </item>
    <item>
      <title>Assessing The Perceptual Impact of Low-Altitude Aircraft Noise in Cities: An Auralization Framework Using Gaussian Beam Tracing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-assessing-the-perceptual-impact-of-low-altitude/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-assessing-the-perceptual-impact-of-low-altitude/</guid>
      <description>音频生成 | 8.0/10</description>
    </item>
    <item>
      <title>Asynchrony-Aware Decoupled Multimodal Control for Cued Speech Video Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-asynchrony-aware-decoupled-multimodal-control-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-asynchrony-aware-decoupled-multimodal-control-for/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>ATOM: Adaptive Token-Level Optimal Transport Mixup for Speech Translation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-atom-adaptive-token-level-optimal-transport-mixup/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-atom-adaptive-token-level-optimal-transport-mixup/</guid>
      <description>语音翻译 | 8.0/10</description>
    </item>
    <item>
      <title>Atomic Norm Minimization Revisited: Progressive Atom Identification And Refinement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-atomic-norm-minimization-revisited-progressive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-atomic-norm-minimization-revisited-progressive/</guid>
      <description>声源定位 | 7.5/10</description>
    </item>
    <item>
      <title>Attention-Based Encoder-Decoder Target-Speaker Voice Activity Detection for Robust Speaker Diarization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attention-based-encoder-decoder-target-speaker/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attention-based-encoder-decoder-target-speaker/</guid>
      <description>说话人分离 | 8.0/10</description>
    </item>
    <item>
      <title>Attention-Weighted Centered Kernel Alignment for Knowledge Distillation in Large Audio-Language Models Applied To Speech Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attention-weighted-centered-kernel-alignment-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attention-weighted-centered-kernel-alignment-for/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>Attention2Probability: Attention-Driven Terminology Probability Estimation for Robust Speech-to-text System</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attention2probability-attention-driven/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attention2probability-attention-driven/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Attentive AV-Fusionnet: Audio-Visual Quality Prediction with Hybrid Attention</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attentive-av-fusionnet-audio-visual-quality/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attentive-av-fusionnet-audio-visual-quality/</guid>
      <description>音视频 | 7.0/10</description>
    </item>
    <item>
      <title>Attentive Masked Self-Distillation for Respiratory Sound Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attentive-masked-self-distillation-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-attentive-masked-self-distillation-for/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>Auden-Voice: General-Purpose Voice Encoder for Speech and Language Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auden-voice-general-purpose-voice-encoder-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auden-voice-general-purpose-voice-encoder-for/</guid>
      <description>语音编码器 | 7.5/10</description>
    </item>
    <item>
      <title>Audience-Aware Co-speech Gesture Generation in Public Speaking via Anticipation Tokens</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audience-aware-co-speech-gesture-generation-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audience-aware-co-speech-gesture-generation-in/</guid>
      <description>音频生成 | 8.0/10</description>
    </item>
    <item>
      <title>Audio Classification Models are Vulnerable to Filter Perturbations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-classification-models-are-vulnerable-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-classification-models-are-vulnerable-to/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>Audio Deepfake Detection at the First Greeting: &#34;Hi!&#34;</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-deepfake-detection-at-the-first-greeting-hi/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-deepfake-detection-at-the-first-greeting-hi/</guid>
      <description>音频深度伪造检测 | 7.5/10</description>
    </item>
    <item>
      <title>Audio Effect Estimation with DNN-Based Prediction and Search Algorithm</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-effect-estimation-with-dnn-based-prediction/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-effect-estimation-with-dnn-based-prediction/</guid>
      <description>音频效果估计 | 7.0/10</description>
    </item>
    <item>
      <title>Audio-Conditioned Diffusion LLMs for ASR and Deliberation Processing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-conditioned-diffusion-llms-for-asr-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-conditioned-diffusion-llms-for-asr-and/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Audio-Guided Multimodal Approach for Fine-Grained Alignment and Boundary Modeling in Active Speaker Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-guided-multimodal-approach-for-fine-grained/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-guided-multimodal-approach-for-fine-grained/</guid>
      <description>说话人检测 | 7.5/10</description>
    </item>
    <item>
      <title>Audio-Text Jailbreak Attack on Large Audio-Language Models: Towards Generality and Stealthiness</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-text-jailbreak-attack-on-large-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-text-jailbreak-attack-on-large-audio/</guid>
      <description>音频安全 | 7.0/10</description>
    </item>
    <item>
      <title>Audio-to-Score Jazz Solo Transcription with the Rhythm Perceiver</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-to-score-jazz-solo-transcription-with-the/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-to-score-jazz-solo-transcription-with-the/</guid>
      <description>音乐信息检索 | 7.5/10</description>
    </item>
    <item>
      <title>Audio-Visual Deepfake Generation and Detection: An Exploratory Survey</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-visual-deepfake-generation-and-detection-an/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-visual-deepfake-generation-and-detection-an/</guid>
      <description>音频深度伪造检测 | 6.5/10</description>
    </item>
    <item>
      <title>Audio-Visual Feature Fusion for Calibrating Relevance Scores of Video Moment Retrieval</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-visual-feature-fusion-for-calibrating/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audio-visual-feature-fusion-for-calibrating/</guid>
      <description>视频片段检索 | 7.0/10</description>
    </item>
    <item>
      <title>AUDIOCARDS: Structured Metadata Improves Audio Language Models for Sound Design</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audiocards-structured-metadata-improves-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audiocards-structured-metadata-improves-audio/</guid>
      <description>音频检索 | 7.5/10</description>
    </item>
    <item>
      <title>AudioFuse: Unified Spectral-Temporal Learning Via A Hybrid VIT-1D CNN Architecture for Phonocardiogram Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audiofuse-unified-spectral-temporal-learning-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audiofuse-unified-spectral-temporal-learning-via/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>AudioGen-Omni: A Unified Multimodal Diffusion Transformer for Video-Synchronized Audio, Speech, and Song Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audiogen-omni-a-unified-multimodal-diffusion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audiogen-omni-a-unified-multimodal-diffusion/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>AUDIOGENIE-Reasoner: A Training-Free Multi-Agent Framework for Coarse-to-Fine Audio Deep Reasoning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audiogenie-reasoner-a-training-free-multi-agent/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-audiogenie-reasoner-a-training-free-multi-agent/</guid>
      <description>音频问答 | 7.0/10</description>
    </item>
    <item>
      <title>Auditory Illusion Benchmark for Large Audio Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auditory-illusion-benchmark-for-large-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auditory-illusion-benchmark-for-large-audio/</guid>
      <description>模型评估 | 7.0/10</description>
    </item>
    <item>
      <title>Auditory-Inspired Transformer for Binaural Speech Enhancement and Spatial Cue Preservation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auditory-inspired-transformer-for-binaural-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auditory-inspired-transformer-for-binaural-speech/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>AURA: A Stegaformer-Based Scalable Deep Audio Watermark with Extreme Robustness</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aura-a-stegaformer-based-scalable-deep-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-aura-a-stegaformer-based-scalable-deep-audio/</guid>
      <description>音频水印 | 7.5/10</description>
    </item>
    <item>
      <title>Auto-MatchCut: An Audio-Visual Retrieval Framework for Seamless Match Cutting</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auto-matchcut-an-audio-visual-retrieval-framework/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auto-matchcut-an-audio-visual-retrieval-framework/</guid>
      <description>跨模态检索 | 7.0/10</description>
    </item>
    <item>
      <title>Automated Dysphagia Screening Using Noninvasive Neck Acoustic Sensing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-automated-dysphagia-screening-using-noninvasive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-automated-dysphagia-screening-using-noninvasive/</guid>
      <description>音频分类 | 8.0/10</description>
    </item>
    <item>
      <title>Automatic Estimation of Speaker Diarization Error Rate Based on Features of Audio Quality and Speaker Discriminability</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-automatic-estimation-of-speaker-diarization-error/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-automatic-estimation-of-speaker-diarization-error/</guid>
      <description>说话人分离 | 7.5/10</description>
    </item>
    <item>
      <title>Automatic Music Mixing Using a Generative Model of Effect Embeddings</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-automatic-music-mixing-using-a-generative-model/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-automatic-music-mixing-using-a-generative-model/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>Automatic Music Sample Identification with Multi-Track Contrastive Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-automatic-music-sample-identification-with-multi/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-automatic-music-sample-identification-with-multi/</guid>
      <description>音频检索 | 7.5/10</description>
    </item>
    <item>
      <title>AUV: Teaching Audio Universal Vector Quantization with Single Nested Codebook</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auv-teaching-audio-universal-vector-quantization/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auv-teaching-audio-universal-vector-quantization/</guid>
      <description>音频生成 | 8.0/10</description>
    </item>
    <item>
      <title>Auxiliary Multi-Label Training For Improving the Robustness of Audio Deepfake Detection on AI-Processed Data</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auxiliary-multi-label-training-for-improving-the/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-auxiliary-multi-label-training-for-improving-the/</guid>
      <description>音频深度伪造检测 | 6.5/10</description>
    </item>
    <item>
      <title>AVATAR: Audio-Visual Adaptive Fusion via Trained Agent Reinforcement for Multimodal Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-avatar-audio-visual-adaptive-fusion-via-trained/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-avatar-audio-visual-adaptive-fusion-via-trained/</guid>
      <description>音频深度伪造检测 | 7.5/10</description>
    </item>
    <item>
      <title>AVO-65: A Large-Scale Hierarchical Audio-Visual Object Dataset</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-avo-65-a-large-scale-hierarchical-audio-visual/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-avo-65-a-large-scale-hierarchical-audio-visual/</guid>
      <description>音视频 | 7.0/10</description>
    </item>
    <item>
      <title>B-GRPO: Unsupervised Speech Emotion Recognition Based on Batched-Group Relative Policy Optimization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-b-grpo-unsupervised-speech-emotion-recognition/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-b-grpo-unsupervised-speech-emotion-recognition/</guid>
      <description>语音情感识别 | 6.5/10</description>
    </item>
    <item>
      <title>BACHI: Boundary-Aware Symbolic Chord Recognition Through Masked Iterative Decoding on POP and Classical Music</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bachi-boundary-aware-symbolic-chord-recognition/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bachi-boundary-aware-symbolic-chord-recognition/</guid>
      <description>音乐信息检索 | 7.5/10</description>
    </item>
    <item>
      <title>Bayesian Low-Rank Factorization for Robust Model Adaptation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bayesian-low-rank-factorization-for-robust-model/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bayesian-low-rank-factorization-for-robust-model/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Bayesian Signal Separation Via Plug-and-Play Diffusion-Within-Gibbs Sampling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bayesian-signal-separation-via-plug-and-play/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bayesian-signal-separation-via-plug-and-play/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>BBPE16: UTF-16-Based Byte-Level Byte-Pair Encoding for Improved Multilingual Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bbpe16-utf-16-based-byte-level-byte-pair-encoding/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bbpe16-utf-16-based-byte-level-byte-pair-encoding/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Beamforming Using Virtual Microphones for Hearing Aid Applications</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beamforming-using-virtual-microphones-for-hearing/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beamforming-using-virtual-microphones-for-hearing/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Beat and Downbeat Detection: A Reformulated Approach</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beat-and-downbeat-detection-a-reformulated/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beat-and-downbeat-detection-a-reformulated/</guid>
      <description>音乐理解 | 7.5/10</description>
    </item>
    <item>
      <title>BeatMamba: Bidirectional Selective State-Space Modeling for Efficient Beat Tracking</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beatmamba-bidirectional-selective-state-space/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beatmamba-bidirectional-selective-state-space/</guid>
      <description>音乐信息检索 | 7.5/10</description>
    </item>
    <item>
      <title>Behind the Scenes: Mechanistic Interpretability of Lora-Adapted Whisper for Speech Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-behind-the-scenes-mechanistic-interpretability-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-behind-the-scenes-mechanistic-interpretability-of/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Benchmarking Humans And Machines On Complex Multilingual Speech Understanding Tasks</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-benchmarking-humans-and-machines-on-complex/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-benchmarking-humans-and-machines-on-complex/</guid>
      <description>音频问答 | 7.5/10</description>
    </item>
    <item>
      <title>Benchmarking Music Autotagging with MGPHot Expert Annotations vs. Generic Tag Datasets</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-benchmarking-music-autotagging-with-mgphot-expert/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-benchmarking-music-autotagging-with-mgphot-expert/</guid>
      <description>音乐信息检索 | 7.5/10</description>
    </item>
    <item>
      <title>BEST-RQ-based Self-Supervised Learning for Whisper Domain Adaptation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-best-rq-based-self-supervised-learning-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-best-rq-based-self-supervised-learning-for/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>BEST-STD 2.0: Balanced and Efficient Speech Tokenizer for Spoken Term Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-best-std-20-balanced-and-efficient-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-best-std-20-balanced-and-efficient-speech/</guid>
      <description>音频检索 | 7.5/10</description>
    </item>
    <item>
      <title>Beyond Face Swapping: A Diffusion-Based Digital Human Benchmark for Multimodal Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beyond-face-swapping-a-diffusion-based-digital/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beyond-face-swapping-a-diffusion-based-digital/</guid>
      <description>音频深度伪造检测 | 8.1/10</description>
    </item>
    <item>
      <title>Beyond Global Emotion: Fine-Grained Emotional Speech Synthesis with Dynamic Word-Level Modulation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beyond-global-emotion-fine-grained-emotional/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beyond-global-emotion-fine-grained-emotional/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Beyond Mapping: Domain-Invariant Representations via Spectral Embedding of Optimal Transport Plans</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beyond-mapping-domain-invariant-representations/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-beyond-mapping-domain-invariant-representations/</guid>
      <description>领域适应 | 7.5/10</description>
    </item>
    <item>
      <title>Bimodal Fusion Framework for Dynamic Facial Expression Recognition In-The-Wild</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bimodal-fusion-framework-for-dynamic-facial/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bimodal-fusion-framework-for-dynamic-facial/</guid>
      <description>语音情感识别 | 7.0/10</description>
    </item>
    <item>
      <title>BioSEN: A Bio-Acoustic Signal Enhancement Network for Animal Vocalizations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-biosen-a-bio-acoustic-signal-enhancement-network/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-biosen-a-bio-acoustic-signal-enhancement-network/</guid>
      <description>生物声学 | 7.5/10</description>
    </item>
    <item>
      <title>BiRQ: Bi-Level Self-Labeling Random Quantization for Self-Supervised Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-birq-bi-level-self-labeling-random-quantization/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-birq-bi-level-self-labeling-random-quantization/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Bleed No More: Generative Interference Reduction for Musical Recordings</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bleed-no-more-generative-interference-reduction/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bleed-no-more-generative-interference-reduction/</guid>
      <description>音乐源分离 | 7.0/10</description>
    </item>
    <item>
      <title>Bloodroot: When Watermarking Turns Poisonous for Stealthy Backdoor</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bloodroot-when-watermarking-turns-poisonous-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bloodroot-when-watermarking-turns-poisonous-for/</guid>
      <description>音频安全 | 7.5/10</description>
    </item>
    <item>
      <title>Bone-Conduction Guided Multimodal Speech Enhancement with Conditional Diffusion Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bone-conduction-guided-multimodal-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bone-conduction-guided-multimodal-speech/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Brainprint-Modulated Target Speaker Extraction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-brainprint-modulated-target-speaker-extraction/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-brainprint-modulated-target-speaker-extraction/</guid>
      <description>语音分离 | 8.0/10</description>
    </item>
    <item>
      <title>Break-the-Beat! Controllable MIDI-to-Drum audio synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-break-the-beat-controllable-midi-to-drum-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-break-the-beat-controllable-midi-to-drum-audio/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>BridgeCode: A Dual Speech Representation Paradigm for Autoregressive Zero-Shot Text-to-Speech Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bridgecode-a-dual-speech-representation-paradigm/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bridgecode-a-dual-speech-representation-paradigm/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>Bridging the Front-End and Back-End for Robust ASR via Cross-Attention-Based U-Net</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bridging-the-front-end-and-back-end-for-robust/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bridging-the-front-end-and-back-end-for-robust/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Bridging the Measurement–Simulation Gap in Room Acoustics with Real2sim Diffusion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bridging-the-measurementsimulation-gap-in-room/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bridging-the-measurementsimulation-gap-in-room/</guid>
      <description>声源定位 | 8.5/10</description>
    </item>
    <item>
      <title>Bridging the Semantic Gap: Cross-Attentive Fusion for Joint Acoustic-Semantic Speech Quality Assessment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bridging-the-semantic-gap-cross-attentive-fusion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bridging-the-semantic-gap-cross-attentive-fusion/</guid>
      <description>语音质量评估 | 8.5/10</description>
    </item>
    <item>
      <title>BSMP-SENet:Band-Split Magnitude-Phase Network for Speech Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bsmp-senetband-split-magnitude-phase-network-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-bsmp-senetband-split-magnitude-phase-network-for/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>CALM: Joint Contextual Acoustic-Linguistic Modeling for Personalization of Multi-Speaker ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-calm-joint-contextual-acoustic-linguistic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-calm-joint-contextual-acoustic-linguistic/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>CaMoD: Causal-Aware Modality Denoising for Multimodal Dialogue Intent Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-camod-causal-aware-modality-denoising-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-camod-causal-aware-modality-denoising-for/</guid>
      <description>多模态对话意图识别 | 7.5/10</description>
    </item>
    <item>
      <title>Can Hierarchical Cross-Modal Fusion Predict Human Perception of AI Dubbed Content?</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-can-hierarchical-cross-modal-fusion-predict-human/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-can-hierarchical-cross-modal-fusion-predict-human/</guid>
      <description>模型评估 | 6.0/10</description>
    </item>
    <item>
      <title>Can Large Audio Language Models Understand Audio Well? Speech, Scene and Events Understanding Benchmark for LALMs</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-can-large-audio-language-models-understand-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-can-large-audio-language-models-understand-audio/</guid>
      <description>基准测试 | 7.0/10</description>
    </item>
    <item>
      <title>Caption and Audio-Guided Video Representation Learning with Gated Attention for Partially Relevant Video Retrieval</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-caption-and-audio-guided-video-representation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-caption-and-audio-guided-video-representation/</guid>
      <description>视频检索 | 7.0/10</description>
    </item>
    <item>
      <title>Cardiobridge-DM: Bridging Cross-Cohort Heart Sound Synthesis via Rhythm-Aware Semi-Supervised Diffusion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cardiobridge-dm-bridging-cross-cohort-heart-sound/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cardiobridge-dm-bridging-cross-cohort-heart-sound/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>CASTELLA: Long Audio Dataset with Captions and Temporal Boundaries</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-castella-long-audio-dataset-with-captions-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-castella-long-audio-dataset-with-captions-and/</guid>
      <description>音频检索 | 8.5/10</description>
    </item>
    <item>
      <title>CCST: Cross-Modal and Consistency-Aware Self-Training for Source-Free Unsupervised Domain Adaptation in Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ccst-cross-modal-and-consistency-aware-self/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ccst-cross-modal-and-consistency-aware-self/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Chunk-Wise Attention Transducers for Fast and Accurate Streaming Speech-to-Text</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-chunk-wise-attention-transducers-for-fast-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-chunk-wise-attention-transducers-for-fast-and/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Chunkwise Aligners for Streaming Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-chunkwise-aligners-for-streaming-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-chunkwise-aligners-for-streaming-speech/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Class-Aware Permutation-Invariant Signal-to-Distortion Ratio for Semantic Segmentation of Sound Scene with Same-Class Sources</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-class-aware-permutation-invariant-signal-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-class-aware-permutation-invariant-signal-to/</guid>
      <description>音频场景理解 | 7.5/10</description>
    </item>
    <item>
      <title>Clue2Emo: A Brain-Inspired Framework for Open-Vocabulary Multimodal Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-clue2emo-a-brain-inspired-framework-for-open/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-clue2emo-a-brain-inspired-framework-for-open/</guid>
      <description>语音情感识别 | 8.5/10</description>
    </item>
    <item>
      <title>CMSA-Mamba: Hierarchical State Space Modeling for Audio-Based Depression Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cmsa-mamba-hierarchical-state-space-modeling-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cmsa-mamba-hierarchical-state-space-modeling-for/</guid>
      <description>语音生物标志物 | 7.0/10</description>
    </item>
    <item>
      <title>Co-Initialization of Control Filter and Secondary Path via Meta-Learning for Active Noise Control</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-co-initialization-of-control-filter-and-secondary/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-co-initialization-of-control-filter-and-secondary/</guid>
      <description>音频安全 | 7.5/10</description>
    </item>
    <item>
      <title>CodecSlime: Temporal Redundancy Compression of Neural Speech Codec via Dynamic Frame Rate</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-codecslime-temporal-redundancy-compression-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-codecslime-temporal-redundancy-compression-of/</guid>
      <description>语音编码 | 7.5/10</description>
    </item>
    <item>
      <title>CodeSep: Low-Bitrate Codec-Driven Speech Separation with Base-Token Disentanglement and Auxiliary-Token Serial Prediction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-codesep-low-bitrate-codec-driven-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-codesep-low-bitrate-codec-driven-speech/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>Combining Multi-Order Attention and Multi-Resolution Discriminator for High-Fidelity Neural Vocoder</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-combining-multi-order-attention-and-multi/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-combining-multi-order-attention-and-multi/</guid>
      <description>语音合成 | 6.5/10</description>
    </item>
    <item>
      <title>Combining SSL Speech Features, Contextual Transformers and Mamba Models for Realistic Audio Spoofing Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-combining-ssl-speech-features-contextual/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-combining-ssl-speech-features-contextual/</guid>
      <description>音频深度伪造检测 | 7.5/10</description>
    </item>
    <item>
      <title>Compression meets Sampling: LZ78-SPA for Efficient Symbolic Music Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-compression-meets-sampling-lz78-spa-for-efficient/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-compression-meets-sampling-lz78-spa-for-efficient/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>CompSpoof: A Dataset and Joint Learning Framework for Component-Level Audio Anti-Spoofing Countermeasures</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-compspoof-a-dataset-and-joint-learning-framework/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-compspoof-a-dataset-and-joint-learning-framework/</guid>
      <description>音频深度伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>Condition-Invariant fMRI decoding of speech intelligibility with deep state space model</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-condition-invariant-fmri-decoding-of-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-condition-invariant-fmri-decoding-of-speech/</guid>
      <description>神经解码 | 7.0/10</description>
    </item>
    <item>
      <title>Conditional Diffusion Models for Mental Health-Preserving Voice Conversion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-conditional-diffusion-models-for-mental-health/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-conditional-diffusion-models-for-mental-health/</guid>
      <description>语音转换 | 8.0/10</description>
    </item>
    <item>
      <title>Confidence-Based Filtering for Speech Dataset Curation with Generative Speech Enhancement Using Discrete Tokens</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-confidence-based-filtering-for-speech-dataset/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-confidence-based-filtering-for-speech-dataset/</guid>
      <description>语音增强 | 6.5/10</description>
    </item>
    <item>
      <title>Confidence-Guided Error Correction for Disordered Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-confidence-guided-error-correction-for-disordered/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-confidence-guided-error-correction-for-disordered/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Connecting Layer-Wise Representation of Wavlm with Spectro-Temporal Modulation on Speaker Verification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-connecting-layer-wise-representation-of-wavlm/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-connecting-layer-wise-representation-of-wavlm/</guid>
      <description>说话人验证 | 6.0/10</description>
    </item>
    <item>
      <title>Constraint Optimized Multichannel Mixer-Limiter Design</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-constraint-optimized-multichannel-mixer-limiter/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-constraint-optimized-multichannel-mixer-limiter/</guid>
      <description>多通道 | 7.0/10</description>
    </item>
    <item>
      <title>Constructing Composite Features for Interpretable Music-Tagging</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-constructing-composite-features-for-interpretable/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-constructing-composite-features-for-interpretable/</guid>
      <description>音乐信息检索 | 7.5/10</description>
    </item>
    <item>
      <title>Content Anonymization for Privacy in Long-Form Audio</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-content-anonymization-for-privacy-in-long-form/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-content-anonymization-for-privacy-in-long-form/</guid>
      <description>语音匿名化 | 7.5/10</description>
    </item>
    <item>
      <title>Content Leakage in Librispeech and its Impact on the Privacy Evaluation of Speaker Anonymization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-content-leakage-in-librispeech-and-its-impact-on/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-content-leakage-in-librispeech-and-its-impact-on/</guid>
      <description>语音匿名化 | 7.5/10</description>
    </item>
    <item>
      <title>Content-Preserving Speech Representation Learning Via Adaptive Segment-Level Alignment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-content-preserving-speech-representation-learning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-content-preserving-speech-representation-learning/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Context-Aware Dynamic Graph Learning for Multimodal Emotion Recognition with Missing Modalities</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-context-aware-dynamic-graph-learning-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-context-aware-dynamic-graph-learning-for/</guid>
      <description>语音情感识别 | 8.8/10</description>
    </item>
    <item>
      <title>Contextual Biasing for ASR in Speech LLM with Common Word Cues and Bias Word Position Prediction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-contextual-biasing-for-asr-in-speech-llm-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-contextual-biasing-for-asr-in-speech-llm-with/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Continuation Method for Feedback Delay Network Modal Decomposition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-continuation-method-for-feedback-delay-network/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-continuation-method-for-feedback-delay-network/</guid>
      <description>空间音频 | 6.5/10</description>
    </item>
    <item>
      <title>Continuous-Token Diffusion for Speaker-Referenced TTS in Multimodal LLMs</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-continuous-token-diffusion-for-speaker-referenced/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-continuous-token-diffusion-for-speaker-referenced/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>Contrastive Timbre Representations for Musical Instrument And Synthesizer Retrieval</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-contrastive-timbre-representations-for-musical/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-contrastive-timbre-representations-for-musical/</guid>
      <description>音频检索 | 7.5/10</description>
    </item>
    <item>
      <title>Controllable Embedding Transformation for Mood-Guided Music Retrieval</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-controllable-embedding-transformation-for-mood/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-controllable-embedding-transformation-for-mood/</guid>
      <description>音乐检索 | 7.5/10</description>
    </item>
    <item>
      <title>Cooperative Multi-Agent Reinforcement Learning for Adaptive Aggregation in Semi-Supervised Federated Learning with non-IID Data</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cooperative-multi-agent-reinforcement-learning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cooperative-multi-agent-reinforcement-learning/</guid>
      <description>联邦学习 | 7.0/10</description>
    </item>
    <item>
      <title>CosyAccent: Duration-Controllable Accent Normalization using Source-Synthesis Training Data</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cosyaccent-duration-controllable-accent/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cosyaccent-duration-controllable-accent/</guid>
      <description>语音转换 | 7.8/10</description>
    </item>
    <item>
      <title>Coupling Acoustic Geometry and Visual Semantics for Robust Depth Estimation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-coupling-acoustic-geometry-and-visual-semantics/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-coupling-acoustic-geometry-and-visual-semantics/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>CoVA: Text-Guided Composed Video Retrieval for Audio-Visual Content</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cova-text-guided-composed-video-retrieval-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cova-text-guided-composed-video-retrieval-for/</guid>
      <description>跨模态检索 | 6.5/10</description>
    </item>
    <item>
      <title>Cross-Architecture Knowledge Distillation of WavLM for Lightweight Speaker Verification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-architecture-knowledge-distillation-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-architecture-knowledge-distillation-of/</guid>
      <description>说话人验证 | 8.0/10</description>
    </item>
    <item>
      <title>Cross-Cultural Bias in Mel-Scale Representations: Evidence and Alternatives from Speech and Music</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-cultural-bias-in-mel-scale-representations/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-cultural-bias-in-mel-scale-representations/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Cross-Domain Contrastive Learning with Dynamic Threshold Calibration for Source Speaker Tracing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-domain-contrastive-learning-with-dynamic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-domain-contrastive-learning-with-dynamic/</guid>
      <description>说话人验证 | 8.0/10</description>
    </item>
    <item>
      <title>Cross-Lingual Alzheimer’s Disease Detection with Multimodal LLMs via Speech Cue-Augmented Prompting and Instruction Tuning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-lingual-alzheimers-disease-detection-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-lingual-alzheimers-disease-detection-with/</guid>
      <description>语音生物标志物 | 6.5/10</description>
    </item>
    <item>
      <title>Cross-Lingual F5-TTS: Towards Language-Agnostic Voice Cloning and Speech Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-lingual-f5-tts-towards-language-agnostic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-lingual-f5-tts-towards-language-agnostic/</guid>
      <description>语音克隆 | 7.5/10</description>
    </item>
    <item>
      <title>Cross-Lingual Interleaving for Speech Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-lingual-interleaving-for-speech-language/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-lingual-interleaving-for-speech-language/</guid>
      <description>语音大模型 | 7.5/10</description>
    </item>
    <item>
      <title>Cross-Modal Bottleneck Fusion for Noise Robust Audio-Visual Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-modal-bottleneck-fusion-for-noise-robust/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-modal-bottleneck-fusion-for-noise-robust/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Cross-Modal Knowledge Distillation for Speech Large Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-modal-knowledge-distillation-for-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-cross-modal-knowledge-distillation-for-speech/</guid>
      <description>语音大模型 | 7.0/10</description>
    </item>
    <item>
      <title>CTC-DID: CTC-Based Arabic Dialect Identification for Streaming Applications</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ctc-did-ctc-based-arabic-dialect-identification/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ctc-did-ctc-based-arabic-dialect-identification/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Curriculum Learning with Contrastive Loss for Lightweight Speaker Verification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-curriculum-learning-with-contrastive-loss-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-curriculum-learning-with-contrastive-loss-for/</guid>
      <description>说话人验证 | 6.5/10</description>
    </item>
    <item>
      <title>D3PIA: A Discrete Denoising Diffusion Model for Piano Accompaniment Generation from Lead Sheet</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-d3pia-a-discrete-denoising-diffusion-model-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-d3pia-a-discrete-denoising-diffusion-model-for/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>DAIEN-TTS: Disentangled Audio Infilling for Environment-Aware Text-to-Speech Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-daien-tts-disentangled-audio-infilling-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-daien-tts-disentangled-audio-infilling-for/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>DAMO: A Data-Efficient Multimodal Orchestrator for Temporal Reasoning with Video LLMS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-damo-a-data-efficient-multimodal-orchestrator-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-damo-a-data-efficient-multimodal-orchestrator-for/</guid>
      <description>视频问答 | 7.0/10</description>
    </item>
    <item>
      <title>DAT-CFTNet: Speech Enhancement for Cochlear Implant Recipients using Attention-based Dual-Path Recurrent Neural Network</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dat-cftnet-speech-enhancement-for-cochlear/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dat-cftnet-speech-enhancement-for-cochlear/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>DBFT-SD: Weakly Supervised Multimodal Detection of Sensitive Audio-Visual Content</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dbft-sd-weakly-supervised-multimodal-detection-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dbft-sd-weakly-supervised-multimodal-detection-of/</guid>
      <description>音频事件检测 | 8.0/10</description>
    </item>
    <item>
      <title>DDSC: Dynamic Dual-Signal Curriculum for Data-Efficient Acoustic Scene Classification Under Domain Shift</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ddsc-dynamic-dual-signal-curriculum-for-data/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ddsc-dynamic-dual-signal-curriculum-for-data/</guid>
      <description>音频场景分类 | 7.0/10</description>
    </item>
    <item>
      <title>DDSR-Net: Robust Multimodal Sentiment Analysis via Dynamic Modality Reliability Assessment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ddsr-net-robust-multimodal-sentiment-analysis-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ddsr-net-robust-multimodal-sentiment-analysis-via/</guid>
      <description>语音情感识别 | 6.5/10</description>
    </item>
    <item>
      <title>DECAF: Dynamic Envelope Context-Aware Fusion for Speech-Envelope Reconstruction from EEG</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-decaf-dynamic-envelope-context-aware-fusion-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-decaf-dynamic-envelope-context-aware-fusion-for/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>Decoder-Only Conformer with Modality-Aware Sparse Mixtures of Experts for ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-decoder-only-conformer-with-modality-aware-sparse/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-decoder-only-conformer-with-modality-aware-sparse/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Decorrelation-Enhanced Multiband Subband Adaptive Filtering for RIR Tracking in Sound Field Control</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-decorrelation-enhanced-multiband-subband-adaptive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-decorrelation-enhanced-multiband-subband-adaptive/</guid>
      <description>空间音频 | 7.0/10</description>
    </item>
    <item>
      <title>Deep Dubbing: End-to-End Auto-Audiobook System with Text-to-Timbre and Context-Aware Instruct-TTS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-deep-dubbing-end-to-end-auto-audiobook-system/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-deep-dubbing-end-to-end-auto-audiobook-system/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Deep Learning-Based Joint Optimization of Adaptive Feedback Cancellation and Residual Feedback Suppression for Hearing Aids</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-deep-learning-based-joint-optimization-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-deep-learning-based-joint-optimization-of/</guid>
      <description>语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>Deep Spatial Clue Informed Ambisonic Encoding for Irregular Microphone Arrays</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-deep-spatial-clue-informed-ambisonic-encoding-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-deep-spatial-clue-informed-ambisonic-encoding-for/</guid>
      <description>空间音频 | 7.0/10</description>
    </item>
    <item>
      <title>Deepaq: A Perceptual Audio Quality Metric Based on Foundational Models and Weakly Supervised Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-deepaq-a-perceptual-audio-quality-metric-based-on/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-deepaq-a-perceptual-audio-quality-metric-based-on/</guid>
      <description>音频质量评估 | 7.5/10</description>
    </item>
    <item>
      <title>Denoising Of Stochastic Ray Tracing Room Impulse Responses</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-denoising-of-stochastic-ray-tracing-room-impulse/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-denoising-of-stochastic-ray-tracing-room-impulse/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>DepthTalk: Few-Shot Talking Head Generation with Depth-Aware 3D Gaussian Field Motion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-depthtalk-few-shot-talking-head-generation-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-depthtalk-few-shot-talking-head-generation-with/</guid>
      <description>说话人生成 | 7.0/10</description>
    </item>
    <item>
      <title>Detecting and Attributing Synthetic Spanish Speech: The HISPASpoof Dataset</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-detecting-and-attributing-synthetic-spanish/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-detecting-and-attributing-synthetic-spanish/</guid>
      <description>语音伪造检测 | 7.5/10</description>
    </item>
    <item>
      <title>DGSDNet: Dual-Graph Spectral Diffusion Network for Incomplete Multimodal Emotion Recognition in Conversations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dgsdnet-dual-graph-spectral-diffusion-network-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dgsdnet-dual-graph-spectral-diffusion-network-for/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>Diff-vs: Efficient Audio-Aware Diffusion U-Net for Vocals Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-diff-vs-efficient-audio-aware-diffusion-u-net-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-diff-vs-efficient-audio-aware-diffusion-u-net-for/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>Diffemotalk: Audio-Driven Facial Animation with Fine-Grained Emotion Control via Diffusion Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-diffemotalk-audio-driven-facial-animation-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-diffemotalk-audio-driven-facial-animation-with/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Differentiable Grouped Feedback Delay Networks for Learning Direction and Position-Dependent Late Reverberation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-differentiable-grouped-feedback-delay-networks/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-differentiable-grouped-feedback-delay-networks/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>Differentiable Pulsetable Synthesis for Wind Instrument Modeling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-differentiable-pulsetable-synthesis-for-wind/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-differentiable-pulsetable-synthesis-for-wind/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>Diffusion Timbre Transfer via Mutual Information Guided Inpainting</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-diffusion-timbre-transfer-via-mutual-information/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-diffusion-timbre-transfer-via-mutual-information/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>Direct Preference Optimization For Speech Autoregressive Diffusion Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-direct-preference-optimization-for-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-direct-preference-optimization-for-speech/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Direct Simultaneous Translation Activation for Large Audio-Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-direct-simultaneous-translation-activation-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-direct-simultaneous-translation-activation-for/</guid>
      <description>语音翻译 | 6.0/10</description>
    </item>
    <item>
      <title>Direct Transfer of Prosody in Speech-to-speech Translation using Disentangled Speech Tokens</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-direct-transfer-of-prosody-in-speech-to-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-direct-transfer-of-prosody-in-speech-to-speech/</guid>
      <description>语音翻译 | 7.5/10</description>
    </item>
    <item>
      <title>Directly Trained Spiking Neural Networks with Adaptive Phase Coding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-directly-trained-spiking-neural-networks-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-directly-trained-spiking-neural-networks-with/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>DisContSE: Single-Step Diffusion Speech Enhancement based on Joint Discrete and Continuous Embeddings</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-discontse-single-step-diffusion-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-discontse-single-step-diffusion-speech/</guid>
      <description>语音增强 | 8.5/10</description>
    </item>
    <item>
      <title>Discrete Diffusion for Generative Modeling of Text-Aligned Speech Tokens</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-discrete-diffusion-for-generative-modeling-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-discrete-diffusion-for-generative-modeling-of/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Discrete-Continuous Fusion With Adaptive Hierarchical Features For Audio Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-discrete-continuous-fusion-with-adaptive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-discrete-continuous-fusion-with-adaptive/</guid>
      <description>音频深度伪造检测 | 8.0/10</description>
    </item>
    <item>
      <title>Disentangled Authenticity Representation for Partially Deepfake Audio Localization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-disentangled-authenticity-representation-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-disentangled-authenticity-representation-for/</guid>
      <description>音频深度伪造检测 | 6.5/10</description>
    </item>
    <item>
      <title>Disentangling Physiology from Fidelity: Latent-Guided Diffusion Models for Cross-Modal Cardiac Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-disentangling-physiology-from-fidelity-latent/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-disentangling-physiology-from-fidelity-latent/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Dissecting Performance Degradation in Audio Source Separation under Sampling Frequency Mismatch</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dissecting-performance-degradation-in-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dissecting-performance-degradation-in-audio/</guid>
      <description>音乐源分离 | 7.5/10</description>
    </item>
    <item>
      <title>DISSR: Disentangling Speech Representation for Degradation-Prior Guided Cross-Domain Speech Restoration</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dissr-disentangling-speech-representation-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dissr-disentangling-speech-representation-for/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Distilling Attention Knowledge for Speaker Verification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-distilling-attention-knowledge-for-speaker/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-distilling-attention-knowledge-for-speaker/</guid>
      <description>说话人验证 | 8.0/10</description>
    </item>
    <item>
      <title>Distributed Multichannel Active Noise Control with Asynchronous Communication</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-distributed-multichannel-active-noise-control/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-distributed-multichannel-active-noise-control/</guid>
      <description>信号处理 | 8.0/10</description>
    </item>
    <item>
      <title>DiTSE: High-Fidelity Generative Speech Enhancement via Latent Diffusion Transformers</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ditse-high-fidelity-generative-speech-enhancement/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ditse-high-fidelity-generative-speech-enhancement/</guid>
      <description>语音增强 | 8.5/10</description>
    </item>
    <item>
      <title>DiTSinger: Scaling Singing Voice Synthesis with Diffusion Transformer and Implicit Alignment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ditsinger-scaling-singing-voice-synthesis-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ditsinger-scaling-singing-voice-synthesis-with/</guid>
      <description>歌唱语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Diverse and Few-Step Audio Captioning via Flow Matching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-diverse-and-few-step-audio-captioning-via-flow/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-diverse-and-few-step-audio-captioning-via-flow/</guid>
      <description>音频字幕生成 | 6.5/10</description>
    </item>
    <item>
      <title>DMP-TTS: Disentangled Multi-Modal Prompting for Controllable Text-to-Speech with Chained Guidance</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dmp-tts-disentangled-multi-modal-prompting-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dmp-tts-disentangled-multi-modal-prompting-for/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Do Bias Benchmarks Generalise? Evidence from Voice-Based Evaluation of Gender Bias in Speechllms</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-bias-benchmarks-generalise-evidence-from-voice/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-bias-benchmarks-generalise-evidence-from-voice/</guid>
      <description>模型评估 | 8.0/10</description>
    </item>
    <item>
      <title>Do Foundational Audio Encoders Understand Music Structure?</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-foundational-audio-encoders-understand-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-foundational-audio-encoders-understand-music/</guid>
      <description>音乐信息检索 | 7.0/10</description>
    </item>
    <item>
      <title>Do Speech LLMs Learn Crossmodal Embedding Spaces?</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-speech-llms-learn-crossmodal-embedding-spaces/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-speech-llms-learn-crossmodal-embedding-spaces/</guid>
      <description>音频检索 | 6.5/10</description>
    </item>
    <item>
      <title>Do We Need EMA for Diffusion-Based Speech Enhancement? Toward A Magnitude-Preserving Network Architecture</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-we-need-ema-for-diffusion-based-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-we-need-ema-for-diffusion-based-speech/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Do we really need self-attention for streaming automatic speech recognition?</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-we-really-need-self-attention-for-streaming/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-we-really-need-self-attention-for-streaming/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Do You Hear What I Mean? Quantifying the Instruction-Perception GAP in Instruction-Guided Expressive Text-to-Speech Systems</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-you-hear-what-i-mean-quantifying-the/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-do-you-hear-what-i-mean-quantifying-the/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>Does the Pre-Training of an Embedding Influence its Encoding of Age?</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-does-the-pre-training-of-an-embedding-influence/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-does-the-pre-training-of-an-embedding-influence/</guid>
      <description>语音生物标志物 | 7.0/10</description>
    </item>
    <item>
      <title>DOMA: Leveraging Diffusion Language Models with Adaptive Prior for Intent Classification and Slot Filling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-doma-leveraging-diffusion-language-models-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-doma-leveraging-diffusion-language-models-with/</guid>
      <description>语音对话系统 | 8.5/10</description>
    </item>
    <item>
      <title>Domain Partitioning Meets Parameter-Efficient Fine-Tuning: A Novel Method for Improved Language-Queried Audio Source Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-domain-partitioning-meets-parameter-efficient/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-domain-partitioning-meets-parameter-efficient/</guid>
      <description>音频分离 | 7.5/10</description>
    </item>
    <item>
      <title>Domain-Aware Scheduling for ASR Fine-Tuning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-domain-aware-scheduling-for-asr-fine-tuning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-domain-aware-scheduling-for-asr-fine-tuning/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Domain-Invariant Representation Learning of Bird Sounds</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-domain-invariant-representation-learning-of-bird/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-domain-invariant-representation-learning-of-bird/</guid>
      <description>生物声学 | 6.5/10</description>
    </item>
    <item>
      <title>DPO-Regularized Regression for Age Prediction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dpo-regularized-regression-for-age-prediction/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dpo-regularized-regression-for-age-prediction/</guid>
      <description>说话人识别 | 7.5/10</description>
    </item>
    <item>
      <title>DPT-Net: Dual-Path Transformer Network with Hierarchical Fusion for EEG-based Envelope Reconstruction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dpt-net-dual-path-transformer-network-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dpt-net-dual-path-transformer-network-with/</guid>
      <description>语音生物标志物 | 7.0/10</description>
    </item>
    <item>
      <title>DSpAST: Disentangled Representations for Spatial Audio Reasoning with Large Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dspast-disentangled-representations-for-spatial/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dspast-disentangled-representations-for-spatial/</guid>
      <description>音频问答 | 8.0/10</description>
    </item>
    <item>
      <title>DSRMS-TransUnet: A Decentralized Non-Shifted Transunet for Shallow Water Acoustic Source Range Estimation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dsrms-transunet-a-decentralized-non-shifted/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dsrms-transunet-a-decentralized-non-shifted/</guid>
      <description>声源定位 | 8.0/10</description>
    </item>
    <item>
      <title>DSSR: Decoupling Salient and Subtle Representations Under Missing Modalities for Multimodal Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dssr-decoupling-salient-and-subtle/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dssr-decoupling-salient-and-subtle/</guid>
      <description>情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Dual Contrastive Learning for Semi-Supervised Domain Adaptation in Bi-Modal Depression Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dual-contrastive-learning-for-semi-supervised/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dual-contrastive-learning-for-semi-supervised/</guid>
      <description>语音生物标志物 | 7.0/10</description>
    </item>
    <item>
      <title>Dual Data Scaling for Robust Two-Stage User-Defined Keyword Spotting</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dual-data-scaling-for-robust-two-stage-user/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dual-data-scaling-for-robust-two-stage-user/</guid>
      <description>语音活动检测 | 7.5/10</description>
    </item>
    <item>
      <title>Dual-Perspective Multimodal Sentiment Analysis with MoE Fusion: Representation Learning via Semantic Resonance and Divergence</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dual-perspective-multimodal-sentiment-analysis/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dual-perspective-multimodal-sentiment-analysis/</guid>
      <description>多模态情感分析 | 7.0/10</description>
    </item>
    <item>
      <title>Dual-Strategy-Enhanced Conbimamba for Neural Speaker Diarization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dual-strategy-enhanced-conbimamba-for-neural/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dual-strategy-enhanced-conbimamba-for-neural/</guid>
      <description>说话人分离 | 8.0/10</description>
    </item>
    <item>
      <title>Dynamic Balanced Cross-Modal Attention with Gated Sequence Restoration: Towards Robust Multimodal Sentiment Analysis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dynamic-balanced-cross-modal-attention-with-gated/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dynamic-balanced-cross-modal-attention-with-gated/</guid>
      <description>跨模态 | 7.5/10</description>
    </item>
    <item>
      <title>Dynamic Noise-Aware Multi Lora Framework Towards Real-World Audio Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dynamic-noise-aware-multi-lora-framework-towards/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dynamic-noise-aware-multi-lora-framework-towards/</guid>
      <description>音频深度伪造检测 | 8.0/10</description>
    </item>
    <item>
      <title>Dynamic Spectrogram Analysis with Local-Aware Graph Networks for Audio Anti-Spoofing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dynamic-spectrogram-analysis-with-local-aware/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dynamic-spectrogram-analysis-with-local-aware/</guid>
      <description>音频深度伪造检测 | 8.5/10</description>
    </item>
    <item>
      <title>Dynamically Slimmable Speech Enhancement Network with Metric-Guided Training</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dynamically-slimmable-speech-enhancement-network/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-dynamically-slimmable-speech-enhancement-network/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>E2E-AEC: Implementing An End-To-End Neural Network Learning Approach for Acoustic Echo Cancellation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-e2e-aec-implementing-an-end-to-end-neural-network/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-e2e-aec-implementing-an-end-to-end-neural-network/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Easy Turn: Integrating Acoustic and Linguistic Modalities for Robust Turn-Taking in Full-Duplex Spoken Dialogue Systems</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-easy-turn-integrating-acoustic-and-linguistic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-easy-turn-integrating-acoustic-and-linguistic/</guid>
      <description>语音对话系统 | 7.0/10</description>
    </item>
    <item>
      <title>ECHO: Frequency-Aware Hierarchical Encoding for Variable-Length Signals</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-echo-frequency-aware-hierarchical-encoding-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-echo-frequency-aware-hierarchical-encoding-for/</guid>
      <description>音频分类 | 9.5/10</description>
    </item>
    <item>
      <title>EchoFake: A Replay-Aware Dataset For Practical Speech Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-echofake-a-replay-aware-dataset-for-practical/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-echofake-a-replay-aware-dataset-for-practical/</guid>
      <description>音频深度伪造检测 | 8.5/10</description>
    </item>
    <item>
      <title>EchoRAG: A Two-Stage Framework for Audio-Text Retrieval and Temporal Grounding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-echorag-a-two-stage-framework-for-audio-text/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-echorag-a-two-stage-framework-for-audio-text/</guid>
      <description>音频检索 | 7.5/10</description>
    </item>
    <item>
      <title>ECSA: Dual-Branch Emotion Compensation for Emotion-Consistent Speaker Anonymization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ecsa-dual-branch-emotion-compensation-for-emotion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ecsa-dual-branch-emotion-compensation-for-emotion/</guid>
      <description>语音匿名化 | 8.5/10</description>
    </item>
    <item>
      <title>EdgeSpot: Efficient and High-Performance Few-Shot Model for Keyword Spotting</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-edgespot-efficient-and-high-performance-few-shot/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-edgespot-efficient-and-high-performance-few-shot/</guid>
      <description>语音活动检测 | 7.5/10</description>
    </item>
    <item>
      <title>EEG and Eye-Tracking Driven Dynamic Target Speaker Extraction with Spontaneous Attention Switching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-eeg-and-eye-tracking-driven-dynamic-target/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-eeg-and-eye-tracking-driven-dynamic-target/</guid>
      <description>语音分离 | 7.0/10</description>
    </item>
    <item>
      <title>EEND-SAA: Enrollment-Less Main Speaker Voice Activity Detection Using Self-Attention Attractors</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-eend-saa-enrollment-less-main-speaker-voice/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-eend-saa-enrollment-less-main-speaker-voice/</guid>
      <description>语音活动检测 | 7.5/10</description>
    </item>
    <item>
      <title>Efficient Audio-Visual Inference Via Token Clustering And Modality Fusion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-efficient-audio-visual-inference-via-token/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-efficient-audio-visual-inference-via-token/</guid>
      <description>音频问答 | 7.5/10</description>
    </item>
    <item>
      <title>Efficient Depression Detection from Speech via Language-Independent Prompt-Driven Reprogramming</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-efficient-depression-detection-from-speech-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-efficient-depression-detection-from-speech-via/</guid>
      <description>语音生物标志物 | 7.5/10</description>
    </item>
    <item>
      <title>Efficient Solutions for Mitigating Initialization Bias in Unsupervised Self-Adaptive Auditory Attention Decoding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-efficient-solutions-for-mitigating-initialization/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-efficient-solutions-for-mitigating-initialization/</guid>
      <description>听觉注意解码 | 8.5/10</description>
    </item>
    <item>
      <title>EMG-to-Speech with Fewer Channels</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emg-to-speech-with-fewer-channels/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emg-to-speech-with-fewer-channels/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Emilia-NV: A Non-Verbal Speech Dataset with Word-Level Annotation for Human-Like Speech Modeling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emilia-nv-a-non-verbal-speech-dataset-with-word/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emilia-nv-a-non-verbal-speech-dataset-with-word/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Emo-TTA: Improving Test-Time Adaptation of Audio-Language Models for Speech Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emo-tta-improving-test-time-adaptation-of-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emo-tta-improving-test-time-adaptation-of-audio/</guid>
      <description>语音情感识别 | 7.0/10</description>
    </item>
    <item>
      <title>EMORL-TTS: Reinforcement Learning for Fine-Grained Emotion Control in LLM-based TTS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emorl-tts-reinforcement-learning-for-fine-grained/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emorl-tts-reinforcement-learning-for-fine-grained/</guid>
      <description>语音合成 | 8.5/10</description>
    </item>
    <item>
      <title>EmoShift: Lightweight Activation Steering for Enhanced Emotion-Aware Speech Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emoshift-lightweight-activation-steering-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emoshift-lightweight-activation-steering-for/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Emotion-Aligned Generation in Diffusion Text to Speech Models Via Preference-Guided Optimization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emotion-aligned-generation-in-diffusion-text-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emotion-aligned-generation-in-diffusion-text-to/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>Emotional Damage: Investigating Safety Vulnerabilities of Large Audio-Language Models Under Speaker Emotional Variations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emotional-damage-investigating-safety/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emotional-damage-investigating-safety/</guid>
      <description>音频安全 | 7.5/10</description>
    </item>
    <item>
      <title>Emotional Dimension Control in Language Model-Based Text-To-Speech: Spanning a Broad Spectrum of Human Emotions</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emotional-dimension-control-in-language-model/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emotional-dimension-control-in-language-model/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>EmoTri-RL: Emotion- and Cause-Aware Reinforcement Learning for Multi-Modal Empathetic Dialogue</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emotri-rl-emotion-and-cause-aware-reinforcement/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-emotri-rl-emotion-and-cause-aware-reinforcement/</guid>
      <description>语音情感识别 | 7.0/10</description>
    </item>
    <item>
      <title>Empowering Multimodal Respiratory Sound Classification with Counterfactual Adversarial Debiasing for Out-of-Distribution Robustness</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-empowering-multimodal-respiratory-sound/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-empowering-multimodal-respiratory-sound/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>Enabling Multi-Species Bird Classification on Low-Power Bioacoustic Loggers</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enabling-multi-species-bird-classification-on-low/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enabling-multi-species-bird-classification-on-low/</guid>
      <description>生物声学 | 8.0/10</description>
    </item>
    <item>
      <title>Encoding Emotion Through Self-Supervised Eye Movement Reconstruction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-encoding-emotion-through-self-supervised-eye/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-encoding-emotion-through-self-supervised-eye/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Enhanced Generative Machine Listener</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhanced-generative-machine-listener/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhanced-generative-machine-listener/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>Enhancing Audio Question-Answering Performance Through Log-Likelihood Guided Reward Functions</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-audio-question-answering-performance/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-audio-question-answering-performance/</guid>
      <description>音频问答 | 8.5/10</description>
    </item>
    <item>
      <title>Enhancing Automatic Drum Transcription with Online Dynamic Few-Shot Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-automatic-drum-transcription-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-automatic-drum-transcription-with/</guid>
      <description>音乐信息检索 | 7.0/10</description>
    </item>
    <item>
      <title>Enhancing Dialogue-Related Speech Tasks with Generated Spoken Dialogues</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-dialogue-related-speech-tasks-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-dialogue-related-speech-tasks-with/</guid>
      <description>语音对话系统 | 6.5/10</description>
    </item>
    <item>
      <title>Enhancing Noise Robustness for Neural Speech Codecs Through Resource-Efficient Progressive Quantization Perturbation Simulation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-noise-robustness-for-neural-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-noise-robustness-for-neural-speech/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Enhancing Speaker Verification with w2v-BERT 2.0 and Knowledge Distillation Guided Structured Pruning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-speaker-verification-with-w2v-bert-20/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-speaker-verification-with-w2v-bert-20/</guid>
      <description>说话人验证 | 7.5/10</description>
    </item>
    <item>
      <title>Enhancing Speech Intelligibility Prediction for Hearing Aids with Complementary Speech Foundation Model Representations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-speech-intelligibility-prediction-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-enhancing-speech-intelligibility-prediction-for/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Entropy-Guided GRVQ for Ultra-Low Bitrate Neural Speech Codec</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-entropy-guided-grvq-for-ultra-low-bitrate-neural/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-entropy-guided-grvq-for-ultra-low-bitrate-neural/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Equipping Large Language Model with Directional Speech Understanding Capabilities</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-equipping-large-language-model-with-directional/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-equipping-large-language-model-with-directional/</guid>
      <description>语音识别 语音翻译 | 7.0/10</description>
    </item>
    <item>
      <title>Erasing Your Voice Before it’s Heard: Training-Free Speaker Unlearning for Zero-Shot Text-to-Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-erasing-your-voice-before-its-heard-training-free/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-erasing-your-voice-before-its-heard-training-free/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Estimating Hand-Related Features from Speech Using Machine Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-estimating-hand-related-features-from-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-estimating-hand-related-features-from-speech/</guid>
      <description>语音生物标志物 | 5.0/10</description>
    </item>
    <item>
      <title>Estimating Respiratory Effort from Nocturnal Breathing Sounds for Obstructive Sleep Apnoea Screening</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-estimating-respiratory-effort-from-nocturnal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-estimating-respiratory-effort-from-nocturnal/</guid>
      <description>音频分类 | 6.5/10</description>
    </item>
    <item>
      <title>Etude: Piano Cover Generation with a Three-Stage Approach — Extract, Structuralize, and Decode</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-etude-piano-cover-generation-with-a-three-stage/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-etude-piano-cover-generation-with-a-three-stage/</guid>
      <description>音乐生成 | 7.0/10</description>
    </item>
    <item>
      <title>EuleroDec: A Complex-Valued RVQ-VAE for Efficient and Robust Audio Coding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-eulerodec-a-complex-valued-rvq-vae-for-efficient/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-eulerodec-a-complex-valued-rvq-vae-for-efficient/</guid>
      <description>音频生成 | 8.0/10</description>
    </item>
    <item>
      <title>Evaluating Bias in Spoken Dialogue LLMs for Real-World Decisions and Recommendations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-bias-in-spoken-dialogue-llms-for-real/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-bias-in-spoken-dialogue-llms-for-real/</guid>
      <description>模型评估 | 7.0/10</description>
    </item>
    <item>
      <title>Evaluating Compositional Structure in Audio Representations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-compositional-structure-in-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-compositional-structure-in-audio/</guid>
      <description>模型评估 | 7.0/10</description>
    </item>
    <item>
      <title>Evaluating Disentangled Representations for Controllable Music Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-disentangled-representations-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-disentangled-representations-for/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>Evaluating Emotion Recognition in Spoken Language Models on Emotionally Incongruent Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-emotion-recognition-in-spoken-language/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-emotion-recognition-in-spoken-language/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Evaluating High-Resolution Piano Sustain Pedal Depth Estimation with Musically Informed Metrics</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-high-resolution-piano-sustain-pedal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-high-resolution-piano-sustain-pedal/</guid>
      <description>音乐信息检索 | 8.0/10</description>
    </item>
    <item>
      <title>Evaluating Pretrained Speech Embedding Systems for Dysarthria Detection Across Heterogenous Datasets</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-pretrained-speech-embedding-systems/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-evaluating-pretrained-speech-embedding-systems/</guid>
      <description>语音生物标志物 | 7.5/10</description>
    </item>
    <item>
      <title>Event Classification by Physics-Informed Inpainting for Distributed Multichannel Acoustic Sensor with Partially Degraded Channels</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-event-classification-by-physics-informed/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-event-classification-by-physics-informed/</guid>
      <description>音频事件检测 | 8.0/10</description>
    </item>
    <item>
      <title>Exploring Fine-Tuning Of Large Audio Language Models For Spoken Language Understanding Under Limited Speech Data</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-exploring-fine-tuning-of-large-audio-language/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-exploring-fine-tuning-of-large-audio-language/</guid>
      <description>语音理解 | 8.0/10</description>
    </item>
    <item>
      <title>Exploring How Audio Effects Alter Emotion with Foundation Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-exploring-how-audio-effects-alter-emotion-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-exploring-how-audio-effects-alter-emotion-with/</guid>
      <description>音乐理解 | 7.0/10</description>
    </item>
    <item>
      <title>Exploring Resolution-Wise Shared Attention in Hybrid Mamba-U-Nets for Improved Cross-Corpus Speech Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-exploring-resolution-wise-shared-attention-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-exploring-resolution-wise-shared-attention-in/</guid>
      <description>语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>Exploring SSL Discrete Tokens for Multilingual Automatic Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-exploring-ssl-discrete-tokens-for-multilingual/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-exploring-ssl-discrete-tokens-for-multilingual/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Expressive Voice Conversion with Controllable Emotional Intensity</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-expressive-voice-conversion-with-controllable/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-expressive-voice-conversion-with-controllable/</guid>
      <description>语音转换 | 7.5/10</description>
    </item>
    <item>
      <title>Exterior Sound Field Estimation Based on Physics-Constrained Kernel</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-exterior-sound-field-estimation-based-on-physics/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-exterior-sound-field-estimation-based-on-physics/</guid>
      <description>空间音频 | 6.5/10</description>
    </item>
    <item>
      <title>FAC-FACodec: Controllable Zero-Shot Foreign Accent Conversion with Factorized Speech Codec</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fac-facodec-controllable-zero-shot-foreign-accent/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fac-facodec-controllable-zero-shot-foreign-accent/</guid>
      <description>语音转换 | 8.0/10</description>
    </item>
    <item>
      <title>Face-Voice Association with Inductive Bias for Maximum Class Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-face-voice-association-with-inductive-bias-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-face-voice-association-with-inductive-bias-for/</guid>
      <description>说话人验证 | 7.0/10</description>
    </item>
    <item>
      <title>Fake Speech Wild: Detecting Deepfake Speech on Social Media Platform</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fake-speech-wild-detecting-deepfake-speech-on/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fake-speech-wild-detecting-deepfake-speech-on/</guid>
      <description>语音伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>Fast-ULCNet: A Fast and Ultra Low Complexity Network for Single-Channel Speech Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fast-ulcnet-a-fast-and-ultra-low-complexity/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fast-ulcnet-a-fast-and-ultra-low-complexity/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>FastAV: Efficient Token Pruning for Audio-Visual Large Language Model Inference</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fastav-efficient-token-pruning-for-audio-visual/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fastav-efficient-token-pruning-for-audio-visual/</guid>
      <description>音频问答 | 7.0/10</description>
    </item>
    <item>
      <title>FastEnhancer: Speed-Optimized Streaming Neural Speech Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fastenhancer-speed-optimized-streaming-neural/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fastenhancer-speed-optimized-streaming-neural/</guid>
      <description>语音增强 | 8.5/10</description>
    </item>
    <item>
      <title>FD-ARL: Feature Disentanglement with Adversarial-Reconstruction Learning for Cross-Subject Auditory Attention Decoding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fd-arl-feature-disentanglement-with-adversarial/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fd-arl-feature-disentanglement-with-adversarial/</guid>
      <description>听觉注意力解码 | 7.5/10</description>
    </item>
    <item>
      <title>FDCNet: Frequency Domain Channel Attention and Convolution for Lipreading</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fdcnet-frequency-domain-channel-attention-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fdcnet-frequency-domain-channel-attention-and/</guid>
      <description>视觉语音识别 | 8.5/10</description>
    </item>
    <item>
      <title>FED-PISA: Federated Voice Cloning Via Personalized Identity-Style Adaptation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fed-pisa-federated-voice-cloning-via-personalized/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fed-pisa-federated-voice-cloning-via-personalized/</guid>
      <description>语音克隆 | 8.0/10</description>
    </item>
    <item>
      <title>Feedback-Driven Retrieval-Augmented Audio Generation with Large Audio Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-feedback-driven-retrieval-augmented-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-feedback-driven-retrieval-augmented-audio/</guid>
      <description>音频生成 | 6.5/10</description>
    </item>
    <item>
      <title>Few-Shot Recognition of Audio Deepfake Generators using Graph-Based Prototype Adaptation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-few-shot-recognition-of-audio-deepfake-generators/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-few-shot-recognition-of-audio-deepfake-generators/</guid>
      <description>音频深度伪造检测 | 7.5/10</description>
    </item>
    <item>
      <title>FIDIC:Fine-Grained Conversational Emotion Recognition via Individual Differences in Inertia and Contagion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fidicfine-grained-conversational-emotion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fidicfine-grained-conversational-emotion/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Fine-Grained Frame Modeling in Multi-Head Self-Attention for Speech Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fine-grained-frame-modeling-in-multi-head-self/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fine-grained-frame-modeling-in-multi-head-self/</guid>
      <description>语音伪造检测 | 8.0/10</description>
    </item>
    <item>
      <title>Fine-Tuning Bigvgan-V2 for Robust Musical Tuning Preservation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fine-tuning-bigvgan-v2-for-robust-musical-tuning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fine-tuning-bigvgan-v2-for-robust-musical-tuning/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>Fine-Tuning Large Audio-Language Models with Lora for Precise Temporal Localization of Prolonged Exposure Therapy Elements</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fine-tuning-large-audio-language-models-with-lora/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fine-tuning-large-audio-language-models-with-lora/</guid>
      <description>音频事件检测 | 6.5/10</description>
    </item>
    <item>
      <title>Fine-Tuning Large Multimodal Models for Automatic Pronunciation Assessment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fine-tuning-large-multimodal-models-for-automatic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fine-tuning-large-multimodal-models-for-automatic/</guid>
      <description>语音评估 | 7.0/10</description>
    </item>
    <item>
      <title>FinHuBERT: Hierarchical Feature Imitating Networks for Low-Resource Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-finhubert-hierarchical-feature-imitating-networks/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-finhubert-hierarchical-feature-imitating-networks/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>FlashFoley: Fast Interactive Sketch2audio Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-flashfoley-fast-interactive-sketch2audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-flashfoley-fast-interactive-sketch2audio/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Flexi-LoRA with Input-Adaptive Ranks: Efficient Finetuning for Speech and Reasoning Tasks</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-flexi-lora-with-input-adaptive-ranks-efficient/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-flexi-lora-with-input-adaptive-ranks-efficient/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Flexio: Flexible Single- and Multi-Channel Speech Separation and Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-flexio-flexible-single-and-multi-channel-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-flexio-flexible-single-and-multi-channel-speech/</guid>
      <description>语音分离 | 8.0/10</description>
    </item>
    <item>
      <title>FlowSE-GRPO: Training Flow Matching Speech Enhancement via Online Reinforcement Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-flowse-grpo-training-flow-matching-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-flowse-grpo-training-flow-matching-speech/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>FOCA: Multimodal Malware Classification via Hyperbolic Cross-Attention</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-foca-multimodal-malware-classification-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-foca-multimodal-malware-classification-via/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>FocalCodec-Stream: Streaming Low-Bitrate Speech Coding via Causal Distillation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-focalcodec-stream-streaming-low-bitrate-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-focalcodec-stream-streaming-low-bitrate-speech/</guid>
      <description>语音编码 | 8.0/10</description>
    </item>
    <item>
      <title>FODGE : High-Fidelity Dance Generation via Full-Body Optimization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fodge-high-fidelity-dance-generation-via-full/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fodge-high-fidelity-dance-generation-via-full/</guid>
      <description>音频生成 | 6.5/10</description>
    </item>
    <item>
      <title>FoleyBench: A Benchmark for Video-to-Audio Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-foleybench-a-benchmark-for-video-to-audio-models/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-foleybench-a-benchmark-for-video-to-audio-models/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Forward Convolutive Prediction for Frame Online Monaural Speech Dereverberation based on Kronecker Product Decomposition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-forward-convolutive-prediction-for-frame-online/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-forward-convolutive-prediction-for-frame-online/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Frame-Stacked Local Transformers for Efficient Multi-Codebook Speech Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-frame-stacked-local-transformers-for-efficient/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-frame-stacked-local-transformers-for-efficient/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Frequency-Independent Ambisonics Upscaling Using Deep Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-frequency-independent-ambisonics-upscaling-using/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-frequency-independent-ambisonics-upscaling-using/</guid>
      <description>空间音频 | 6.5/10</description>
    </item>
    <item>
      <title>From Contrast to Commonality: Audio Commonality Captioning for Enhanced Audio-Text Cross-Modal Understanding in Multimodal LLMS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-from-contrast-to-commonality-audio-commonality/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-from-contrast-to-commonality-audio-commonality/</guid>
      <description>音频场景理解 | 7.5/10</description>
    </item>
    <item>
      <title>From Diet to Free Lunch: Estimating Auxiliary Signal Properties Using Dynamic Pruning Masks in Speech Enhancement Networks</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-from-diet-to-free-lunch-estimating-auxiliary/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-from-diet-to-free-lunch-estimating-auxiliary/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>From Hallucination to Articulation: Language Model-Driven Losses for Ultra Low-Bitrate Neural Speech Coding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-from-hallucination-to-articulation-language-model/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-from-hallucination-to-articulation-language-model/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>From Human Speech to Ocean Signals: Transferring Speech Large Models for Underwater Acoustic Target Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-from-human-speech-to-ocean-signals-transferring/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-from-human-speech-to-ocean-signals-transferring/</guid>
      <description>水下声学目标识别 | 7.0/10</description>
    </item>
    <item>
      <title>Frontend Token Enhancement for Token-Based Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-frontend-token-enhancement-for-token-based-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-frontend-token-enhancement-for-token-based-speech/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Full Band Denoising of Room Impulse Response in the Wavelet Domain with Dictionary Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-full-band-denoising-of-room-impulse-response-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-full-band-denoising-of-room-impulse-response-in/</guid>
      <description>房间脉冲响应去噪 | 7.5/10</description>
    </item>
    <item>
      <title>FUN-SSL: Full-Band Layer Followed by U-Net With Narrow-Band Layers for Multiple Moving Sound Source Localization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fun-ssl-full-band-layer-followed-by-u-net-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fun-ssl-full-band-layer-followed-by-u-net-with/</guid>
      <description>声源定位 | 8.0/10</description>
    </item>
    <item>
      <title>FUSEMOS: Perceptual Evaluation of Text-to-Music Generation with Dual-Encoder Fusion and Ranking-Aware Composite Loss</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fusemos-perceptual-evaluation-of-text-to-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fusemos-perceptual-evaluation-of-text-to-music/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>Fusion of Multimodal Estimations by Extended State Hidden Markov Model: Application to Fetal Heart Rate Monitoring</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fusion-of-multimodal-estimations-by-extended/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fusion-of-multimodal-estimations-by-extended/</guid>
      <description>生物声学 | 7.0/10</description>
    </item>
    <item>
      <title>FxSearcher: Gradient-Free Text-Driven Audio Transformation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fxsearcher-gradient-free-text-driven-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-fxsearcher-gradient-free-text-driven-audio/</guid>
      <description>音频生成 | 7.0/10</description>
    </item>
    <item>
      <title>Game-Time: Evaluating Temporal Dynamics in Spoken Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-game-time-evaluating-temporal-dynamics-in-spoken/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-game-time-evaluating-temporal-dynamics-in-spoken/</guid>
      <description>语音对话系统 | 7.5/10</description>
    </item>
    <item>
      <title>Gdiffuse: Diffusion-Based Speech Enhancement with Noise Model Guidance</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gdiffuse-diffusion-based-speech-enhancement-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gdiffuse-diffusion-based-speech-enhancement-with/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>Gelina: Unified Speech and Gesture Synthesis Via Interleaved Token Prediction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gelina-unified-speech-and-gesture-synthesis-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gelina-unified-speech-and-gesture-synthesis-via/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Gen-SER: When the Generative Model Meets Speech Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gen-ser-when-the-generative-model-meets-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gen-ser-when-the-generative-model-meets-speech/</guid>
      <description>语音情感识别 | 6.5/10</description>
    </item>
    <item>
      <title>Generalizability of Predictive and Generative Speech Enhancement Models to Pathological Speakers</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-generalizability-of-predictive-and-generative/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-generalizability-of-predictive-and-generative/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>Generating Localized Audible Zones Using a Single-Channel Parametric Loudspeaker</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-generating-localized-audible-zones-using-a-single/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-generating-localized-audible-zones-using-a-single/</guid>
      <description>空间音频 | 6.5/10</description>
    </item>
    <item>
      <title>Generating Moving 3d Soundscapes with Latent Diffusion Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-generating-moving-3d-soundscapes-with-latent/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-generating-moving-3d-soundscapes-with-latent/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>Generative Audio Extension and Morphing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-generative-audio-extension-and-morphing/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-generative-audio-extension-and-morphing/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>GLA-GRAD&#43;&#43;: An Improved Griffin-Lim Guided Diffusion Model for Speech Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gla-grad-an-improved-griffin-lim-guided-diffusion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gla-grad-an-improved-griffin-lim-guided-diffusion/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>GLAP: General Contrastive Audio-Text Pretraining Across Domains and Languages</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-glap-general-contrastive-audio-text-pretraining/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-glap-general-contrastive-audio-text-pretraining/</guid>
      <description>音频检索 | 8.5/10</description>
    </item>
    <item>
      <title>GLoRIA: Gated Low-Rank Interpretable Adaptation for Dialectal ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gloria-gated-low-rank-interpretable-adaptation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gloria-gated-low-rank-interpretable-adaptation/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>GLUE: Gradient-free Learning to Unify Experts</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-glue-gradient-free-learning-to-unify-experts/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-glue-gradient-free-learning-to-unify-experts/</guid>
      <description>迁移学习 | 6.5/10</description>
    </item>
    <item>
      <title>GMS-CAVP: Improving Audio-Video Correspondence with Multi-Scale Constrative and Generative Pretraining</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gms-cavp-improving-audio-video-correspondence/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-gms-cavp-improving-audio-video-correspondence/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Graph-Based Emotion Consensus Perception Learning for Multimodal Emotion Recognition in Conversation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-graph-based-emotion-consensus-perception-learning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-graph-based-emotion-consensus-perception-learning/</guid>
      <description>多模态情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Graph-based Modality Alignment for Robustness in Conversational Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-graph-based-modality-alignment-for-robustness-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-graph-based-modality-alignment-for-robustness-in/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>Graph-Biased EEG Transformers for Silent Speech Decoding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-graph-biased-eeg-transformers-for-silent-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-graph-biased-eeg-transformers-for-silent-speech/</guid>
      <description>语音生物标志物 | 6.5/10</description>
    </item>
    <item>
      <title>Grey-Box Prompt Tuning With Graph Alignment for Speech-Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-grey-box-prompt-tuning-with-graph-alignment-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-grey-box-prompt-tuning-with-graph-alignment-for/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>GRNet: Graph Reconstruction Network for Robust Multimodal Sentiment Analysis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-grnet-graph-reconstruction-network-for-robust/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-grnet-graph-reconstruction-network-for-robust/</guid>
      <description>多模态情感分析 | 7.5/10</description>
    </item>
    <item>
      <title>Group Relative Policy Optimization for Text-to-Speech with Large Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-group-relative-policy-optimization-for-text-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-group-relative-policy-optimization-for-text-to/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>Group-Sparse Gaussian Process Regression for Inhomogeneous Sound Field Estimation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-group-sparse-gaussian-process-regression-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-group-sparse-gaussian-process-regression-for/</guid>
      <description>声场估计 | 7.5/10</description>
    </item>
    <item>
      <title>H-nnPBFDAF: Hierarchical Neural Network Partitioned Block Frequency Domain Adaptive Filter with Novel Block Activation Probability</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-h-nnpbfdaf-hierarchical-neural-network/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-h-nnpbfdaf-hierarchical-neural-network/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Hair Noise Analysis and Mitigation for Smart Glasses Audio Captures</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hair-noise-analysis-and-mitigation-for-smart/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hair-noise-analysis-and-mitigation-for-smart/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Hanui: Harnessing Distributional Discrepancies for Singing Voice Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hanui-harnessing-distributional-discrepancies-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hanui-harnessing-distributional-discrepancies-for/</guid>
      <description>音频深度伪造检测 | 8.0/10</description>
    </item>
    <item>
      <title>HarmoNet: Music Grounding by Short Video via Harmonic Resample and Dynamic Sparse Alignment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-harmonet-music-grounding-by-short-video-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-harmonet-music-grounding-by-short-video-via/</guid>
      <description>音乐检索 | 7.0/10</description>
    </item>
    <item>
      <title>Hashing-Baseline: Rethinking Hashing in the Age of Pretrained Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hashing-baseline-rethinking-hashing-in-the-age-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hashing-baseline-rethinking-hashing-in-the-age-of/</guid>
      <description>音频检索 音频分类 | 8.0/10</description>
    </item>
    <item>
      <title>HAVT-IVD: Heterogeneity-Aware Cross-Modal Network for Audio-Visual Surveillance: Idling Vehicles Detection with Multichannel Audio and Multiscale Visual Cues</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-havt-ivd-heterogeneity-aware-cross-modal-network/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-havt-ivd-heterogeneity-aware-cross-modal-network/</guid>
      <description>音频事件检测 | 8.0/10</description>
    </item>
    <item>
      <title>HCGAN: Harmonic-Coupled Generative Adversarial Network for Speech Super-Resolution in Low-Bandwidth Scenarios</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hcgan-harmonic-coupled-generative-adversarial/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hcgan-harmonic-coupled-generative-adversarial/</guid>
      <description>语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>HD-PPT: Hierarchical Decoding of Content- and Prompt-Preference Tokens for Instruction-Based TTS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hd-ppt-hierarchical-decoding-of-content-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hd-ppt-hierarchical-decoding-of-content-and/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>HergNet: A Fast Neural Surrogate Model for Sound Field Predictions Via Superposition of Plane Waves</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hergnet-a-fast-neural-surrogate-model-for-sound/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hergnet-a-fast-neural-surrogate-model-for-sound/</guid>
      <description>空间音频 | 7.0/10</description>
    </item>
    <item>
      <title>HFSQVAE: Hierarchical Vector Quantization with Residuals for Frequency-Specific Embedding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hfsqvae-hierarchical-vector-quantization-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hfsqvae-hierarchical-vector-quantization-with/</guid>
      <description>音频生成 | 7.0/10</description>
    </item>
    <item>
      <title>Hierarchical Activity Recognition and Captioning from Long-Form Audio</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hierarchical-activity-recognition-and-captioning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hierarchical-activity-recognition-and-captioning/</guid>
      <description>音频事件检测 | 7.5/10</description>
    </item>
    <item>
      <title>Hierarchical Discrete Flow Matching For Multi-Codebook Codec-Based Text-To-Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hierarchical-discrete-flow-matching-for-multi/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hierarchical-discrete-flow-matching-for-multi/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Hierarchical Tokenization of Multimodal Music Data for Generative Music Retrieval</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hierarchical-tokenization-of-multimodal-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hierarchical-tokenization-of-multimodal-music/</guid>
      <description>音乐检索 | 7.0/10</description>
    </item>
    <item>
      <title>HiFi-HARP: A High-Fidelity 7th-Order Ambisonic Room Impulse Response Dataset</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hifi-harp-a-high-fidelity-7th-order-ambisonic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hifi-harp-a-high-fidelity-7th-order-ambisonic/</guid>
      <description>数据集 | 7.5/10</description>
    </item>
    <item>
      <title>High-Fidelity Speech Enhancement Via Discrete Audio Tokens</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-high-fidelity-speech-enhancement-via-discrete/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-high-fidelity-speech-enhancement-via-discrete/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>How Far Do SSL Speech Models Listen for Tone? Temporal Focus of Tone Representation under Low-Resource Transfer</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-how-far-do-ssl-speech-models-listen-for-tone/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-how-far-do-ssl-speech-models-listen-for-tone/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>How to Label Resynthesized Audio: The Dual Role of Neural Audio Codecs in Audio Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-how-to-label-resynthesized-audio-the-dual-role-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-how-to-label-resynthesized-audio-the-dual-role-of/</guid>
      <description>音频深度伪造检测 | 7.5/10</description>
    </item>
    <item>
      <title>HVAC-EAR: Eavesdropping Human Speech Using HVAC Systems</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hvac-ear-eavesdropping-human-speech-using-hvac/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hvac-ear-eavesdropping-human-speech-using-hvac/</guid>
      <description>音频安全 | 8.5/10</description>
    </item>
    <item>
      <title>Hybrid Pruning: In-Situ Compression of Self-Supervised Speech Models for Speaker Verification and Anti-Spoofing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hybrid-pruning-in-situ-compression-of-self/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hybrid-pruning-in-situ-compression-of-self/</guid>
      <description>说话人验证 | 8.0/10</description>
    </item>
    <item>
      <title>HyFlowSE: Hybrid End-To-End Flow-Matching Speech Enhancement via Generative-Discriminative Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hyflowse-hybrid-end-to-end-flow-matching-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-hyflowse-hybrid-end-to-end-flow-matching-speech/</guid>
      <description>语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>I-DCCRN-VAE: An Improved Deep Representation Learning Framework for Complex VAE-Based Single-Channel Speech Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-i-dccrn-vae-an-improved-deep-representation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-i-dccrn-vae-an-improved-deep-representation/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>IBPCodec : A Low-Bitrate Lightweight Speech Codec With Inter-Band Prediction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ibpcodec-a-low-bitrate-lightweight-speech-codec/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ibpcodec-a-low-bitrate-lightweight-speech-codec/</guid>
      <description>语音编码 | 7.0/10</description>
    </item>
    <item>
      <title>ICASSP 2026 - 主动噪声控制 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-000/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-000/</guid>
      <description>共 1 篇 ICASSP 2026 主动噪声控制 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 主动降噪 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-001/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-001/</guid>
      <description>共 1 篇 ICASSP 2026 主动降噪 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 主题建模 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-002/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-002/</guid>
      <description>共 1 篇 ICASSP 2026 主题建模 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 信号处理 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-003/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-003/</guid>
      <description>共 2 篇 ICASSP 2026 信号处理 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 关键词检测 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-004/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-004/</guid>
      <description>共 2 篇 ICASSP 2026 关键词检测 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 医疗AI 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-005/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-005/</guid>
      <description>共 1 篇 ICASSP 2026 医疗AI 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 听觉注意力解码 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-006/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-006/</guid>
      <description>共 2 篇 ICASSP 2026 听觉注意力解码 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 听觉注意解码 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-007/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-007/</guid>
      <description>共 1 篇 ICASSP 2026 听觉注意解码 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 噪声控制 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-008/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-008/</guid>
      <description>共 1 篇 ICASSP 2026 噪声控制 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 回声消除 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-009/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-009/</guid>
      <description>共 1 篇 ICASSP 2026 回声消除 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 基准测试 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-010/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-010/</guid>
      <description>共 5 篇 ICASSP 2026 基准测试 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 基频估计 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-011/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-011/</guid>
      <description>共 1 篇 ICASSP 2026 基频估计 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 声场估计 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-012/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-012/</guid>
      <description>共 1 篇 ICASSP 2026 声场估计 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 声学建模 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-013/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-013/</guid>
      <description>共 1 篇 ICASSP 2026 声学建模 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 声源定位 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-014/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-014/</guid>
      <description>共 15 篇 ICASSP 2026 声源定位 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 多模态学习 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-015/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-015/</guid>
      <description>共 1 篇 ICASSP 2026 多模态学习 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 多模态对话意图识别 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-016/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-016/</guid>
      <description>共 1 篇 ICASSP 2026 多模态对话意图识别 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 多模态情感分析 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-017/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-017/</guid>
      <description>共 2 篇 ICASSP 2026 多模态情感分析 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 多模态情感识别 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-018/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-018/</guid>
      <description>共 2 篇 ICASSP 2026 多模态情感识别 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 多模态模型 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-019/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-019/</guid>
      <description>共 6 篇 ICASSP 2026 多模态模型 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 多通道 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-020/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-020/</guid>
      <description>共 1 篇 ICASSP 2026 多通道 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 多音高估计 #音符跟踪 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-021/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-021/</guid>
      <description>共 1 篇 ICASSP 2026 多音高估计 #音符跟踪 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 实体消歧 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-022/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-022/</guid>
      <description>共 1 篇 ICASSP 2026 实体消歧 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 实时处理 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-023/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-023/</guid>
      <description>共 1 篇 ICASSP 2026 实时处理 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 对抗样本 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-024/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-024/</guid>
      <description>共 1 篇 ICASSP 2026 对抗样本 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 异常声音检测 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-025/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-025/</guid>
      <description>共 1 篇 ICASSP 2026 异常声音检测 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 情感分析 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-026/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-026/</guid>
      <description>共 3 篇 ICASSP 2026 情感分析 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 情感识别 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-027/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-027/</guid>
      <description>共 2 篇 ICASSP 2026 情感识别 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 房间脉冲响应 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-028/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-028/</guid>
      <description>共 1 篇 ICASSP 2026 房间脉冲响应 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 房间脉冲响应去噪 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-029/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-029/</guid>
      <description>共 1 篇 ICASSP 2026 房间脉冲响应去噪 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 数据集 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-030/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-030/</guid>
      <description>共 3 篇 ICASSP 2026 数据集 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 数据集对齐 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-031/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-031/</guid>
      <description>共 1 篇 ICASSP 2026 数据集对齐 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 槽填充 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-032/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-032/</guid>
      <description>共 1 篇 ICASSP 2026 槽填充 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 模型评估 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-033/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-033/</guid>
      <description>共 16 篇 ICASSP 2026 模型评估 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 歌唱旋律提取 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-034/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-034/</guid>
      <description>共 1 篇 ICASSP 2026 歌唱旋律提取 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 歌唱语音合成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-035/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-035/</guid>
      <description>共 5 篇 ICASSP 2026 歌唱语音合成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 歌唱语音转录 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-036/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-036/</guid>
      <description>共 1 篇 ICASSP 2026 歌唱语音转录 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 歌唱语音转换 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-037/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-037/</guid>
      <description>共 3 篇 ICASSP 2026 歌唱语音转换 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 水下声学目标识别 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-038/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-038/</guid>
      <description>共 2 篇 ICASSP 2026 水下声学目标识别 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 生物声学 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-039/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-039/</guid>
      <description>共 12 篇 ICASSP 2026 生物声学 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 目标说话人提取 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-040/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-040/</guid>
      <description>共 1 篇 ICASSP 2026 目标说话人提取 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 神经解码 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-041/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-041/</guid>
      <description>共 1 篇 ICASSP 2026 神经解码 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 空间音频 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-042/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-042/</guid>
      <description>共 31 篇 ICASSP 2026 空间音频 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 联邦学习 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-043/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-043/</guid>
      <description>共 1 篇 ICASSP 2026 联邦学习 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 脑信号编码 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-044/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-044/</guid>
      <description>共 1 篇 ICASSP 2026 脑信号编码 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 脑机接口 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-045/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-045/</guid>
      <description>共 1 篇 ICASSP 2026 脑机接口 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 舞蹈生成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-046/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-046/</guid>
      <description>共 1 篇 ICASSP 2026 舞蹈生成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 视觉语音识别 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-047/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-047/</guid>
      <description>共 2 篇 ICASSP 2026 视觉语音识别 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 视频到音频生成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-048/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-048/</guid>
      <description>共 1 篇 ICASSP 2026 视频到音频生成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 视频检索 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-049/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-049/</guid>
      <description>共 1 篇 ICASSP 2026 视频检索 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 视频片段检索 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-050/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-050/</guid>
      <description>共 1 篇 ICASSP 2026 视频片段检索 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 视频理解 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-051/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-051/</guid>
      <description>共 1 篇 ICASSP 2026 视频理解 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 视频生成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-052/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-052/</guid>
      <description>共 2 篇 ICASSP 2026 视频生成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 视频设备识别 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-053/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-053/</guid>
      <description>共 1 篇 ICASSP 2026 视频设备识别 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 视频问答 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-054/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-054/</guid>
      <description>共 1 篇 ICASSP 2026 视频问答 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 视频高光检测 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-055/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-055/</guid>
      <description>共 1 篇 ICASSP 2026 视频高光检测 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音伪造检测 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-056/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-056/</guid>
      <description>共 8 篇 ICASSP 2026 语音伪造检测 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音克隆 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-057/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-057/</guid>
      <description>共 4 篇 ICASSP 2026 语音克隆 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音分离 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-058/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-058/</guid>
      <description>共 25 篇 ICASSP 2026 语音分离 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音匿名化 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-059/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-059/</guid>
      <description>共 10 篇 ICASSP 2026 语音匿名化 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音发现 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-060/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-060/</guid>
      <description>共 1 篇 ICASSP 2026 语音发现 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音合成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-061/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-061/</guid>
      <description>共 63 篇 ICASSP 2026 语音合成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音增强 #对抗防御 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-063/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-063/</guid>
      <description>共 1 篇 ICASSP 2026 语音增强 #对抗防御 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音增强 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-062/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-062/</guid>
      <description>共 75 篇 ICASSP 2026 语音增强 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音大模型 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-064/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-064/</guid>
      <description>共 3 篇 ICASSP 2026 语音大模型 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音对话系统 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-065/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-065/</guid>
      <description>共 10 篇 ICASSP 2026 语音对话系统 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音情感识别 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-066/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-066/</guid>
      <description>共 49 篇 ICASSP 2026 语音情感识别 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音摘要 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-067/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-067/</guid>
      <description>共 1 篇 ICASSP 2026 语音摘要 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音活动检测 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-068/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-068/</guid>
      <description>共 5 篇 ICASSP 2026 语音活动检测 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音理解 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-069/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-069/</guid>
      <description>共 2 篇 ICASSP 2026 语音理解 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音生成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-070/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-070/</guid>
      <description>共 1 篇 ICASSP 2026 语音生成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音生物标志物 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-071/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-071/</guid>
      <description>共 24 篇 ICASSP 2026 语音生物标志物 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音编码 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-072/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-072/</guid>
      <description>共 5 篇 ICASSP 2026 语音编码 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音编码器 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-073/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-073/</guid>
      <description>共 1 篇 ICASSP 2026 语音编码器 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音翻译 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-074/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-074/</guid>
      <description>共 8 篇 ICASSP 2026 语音翻译 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音表示学习 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-075/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-075/</guid>
      <description>共 1 篇 ICASSP 2026 语音表示学习 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音解码 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-076/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-076/</guid>
      <description>共 1 篇 ICASSP 2026 语音解码 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音评估 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-077/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-077/</guid>
      <description>共 5 篇 ICASSP 2026 语音评估 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音识别 #语音合成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-079/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-079/</guid>
      <description>共 1 篇 ICASSP 2026 语音识别 #语音合成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音识别 #语音翻译 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-080/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-080/</guid>
      <description>共 3 篇 ICASSP 2026 语音识别 #语音翻译 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音识别 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-078/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-078/</guid>
      <description>共 102 篇 ICASSP 2026 语音识别 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音质量评估 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-081/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-081/</guid>
      <description>共 8 篇 ICASSP 2026 语音质量评估 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音转换 #语音增强 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-083/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-083/</guid>
      <description>共 1 篇 ICASSP 2026 语音转换 #语音增强 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音转换 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-082/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-082/</guid>
      <description>共 9 篇 ICASSP 2026 语音转换 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音问答 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-084/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-084/</guid>
      <description>共 3 篇 ICASSP 2026 语音问答 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 语音驱动动作生成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-085/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-085/</guid>
      <description>共 1 篇 ICASSP 2026 语音驱动动作生成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 说话人分离 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-086/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-086/</guid>
      <description>共 9 篇 ICASSP 2026 说话人分离 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 说话人合成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-087/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-087/</guid>
      <description>共 1 篇 ICASSP 2026 说话人合成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 说话人日志 #语音分离 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-089/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-089/</guid>
      <description>共 1 篇 ICASSP 2026 说话人日志 #语音分离 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 说话人日志 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-088/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-088/</guid>
      <description>共 2 篇 ICASSP 2026 说话人日志 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 说话人检测 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-090/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-090/</guid>
      <description>共 1 篇 ICASSP 2026 说话人检测 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 说话人生成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-091/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-091/</guid>
      <description>共 1 篇 ICASSP 2026 说话人生成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 说话人脸生成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-092/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-092/</guid>
      <description>共 1 篇 ICASSP 2026 说话人脸生成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 说话人识别 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-093/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-093/</guid>
      <description>共 1 篇 ICASSP 2026 说话人识别 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 说话人验证 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-094/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-094/</guid>
      <description>共 10 篇 ICASSP 2026 说话人验证 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 课堂阶段分割 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-095/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-095/</guid>
      <description>共 1 篇 ICASSP 2026 课堂阶段分割 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 跨模态 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-096/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-096/</guid>
      <description>共 2 篇 ICASSP 2026 跨模态 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 跨模态检索 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-097/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-097/</guid>
      <description>共 2 篇 ICASSP 2026 跨模态检索 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 轻度认知障碍检测 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-098/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-098/</guid>
      <description>共 1 篇 ICASSP 2026 轻度认知障碍检测 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 迁移学习 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-099/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-099/</guid>
      <description>共 1 篇 ICASSP 2026 迁移学习 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 零样本关键词检测 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-100/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-100/</guid>
      <description>共 1 篇 ICASSP 2026 零样本关键词检测 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐信息检索 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-101/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-101/</guid>
      <description>共 26 篇 ICASSP 2026 音乐信息检索 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐分离 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-102/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-102/</guid>
      <description>共 1 篇 ICASSP 2026 音乐分离 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐分类 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-103/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-103/</guid>
      <description>共 1 篇 ICASSP 2026 音乐分类 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐推荐 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-104/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-104/</guid>
      <description>共 1 篇 ICASSP 2026 音乐推荐 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐检索 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-105/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-105/</guid>
      <description>共 3 篇 ICASSP 2026 音乐检索 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐混合 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-106/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-106/</guid>
      <description>共 1 篇 ICASSP 2026 音乐混合 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐源分离 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-107/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-107/</guid>
      <description>共 2 篇 ICASSP 2026 音乐源分离 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐源提取 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-108/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-108/</guid>
      <description>共 1 篇 ICASSP 2026 音乐源提取 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐理解 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-109/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-109/</guid>
      <description>共 11 篇 ICASSP 2026 音乐理解 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐生成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-110/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-110/</guid>
      <description>共 31 篇 ICASSP 2026 音乐生成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音乐转录 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-111/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-111/</guid>
      <description>共 1 篇 ICASSP 2026 音乐转录 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音视频 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-112/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-112/</guid>
      <description>共 6 篇 ICASSP 2026 音视频 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音视频实例分割 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-113/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-113/</guid>
      <description>共 1 篇 ICASSP 2026 音视频实例分割 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频事件检测 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-114/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-114/</guid>
      <description>共 21 篇 ICASSP 2026 音频事件检测 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频信号处理 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-115/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-115/</guid>
      <description>共 1 篇 ICASSP 2026 音频信号处理 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频分离 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-116/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-116/</guid>
      <description>共 1 篇 ICASSP 2026 音频分离 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频分类 #零样本学习 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-118/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-118/</guid>
      <description>共 1 篇 ICASSP 2026 音频分类 #零样本学习 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频分类 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-117/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-117/</guid>
      <description>共 39 篇 ICASSP 2026 音频分类 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频压缩 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-119/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-119/</guid>
      <description>共 2 篇 ICASSP 2026 音频压缩 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频场景分类 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-120/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-120/</guid>
      <description>共 1 篇 ICASSP 2026 音频场景分类 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频场景理解 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-121/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-121/</guid>
      <description>共 3 篇 ICASSP 2026 音频场景理解 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频增强 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-122/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-122/</guid>
      <description>共 3 篇 ICASSP 2026 音频增强 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频大模型 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-123/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-123/</guid>
      <description>共 1 篇 ICASSP 2026 音频大模型 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频字幕生成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-124/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-124/</guid>
      <description>共 1 篇 ICASSP 2026 音频字幕生成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频安全 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-125/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-125/</guid>
      <description>共 11 篇 ICASSP 2026 音频安全 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频描述 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-126/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-126/</guid>
      <description>共 1 篇 ICASSP 2026 音频描述 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频效果估计 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-127/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-127/</guid>
      <description>共 1 篇 ICASSP 2026 音频效果估计 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频无损编码 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-128/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-128/</guid>
      <description>共 1 篇 ICASSP 2026 音频无损编码 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频检索 #音频分类 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-130/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-130/</guid>
      <description>共 1 篇 ICASSP 2026 音频检索 #音频分类 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频检索 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-129/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-129/</guid>
      <description>共 11 篇 ICASSP 2026 音频检索 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频水印 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-131/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-131/</guid>
      <description>共 1 篇 ICASSP 2026 音频水印 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频深度伪造检测 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-132/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-132/</guid>
      <description>共 29 篇 ICASSP 2026 音频深度伪造检测 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频生成 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-133/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-133/</guid>
      <description>共 39 篇 ICASSP 2026 音频生成 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频编辑 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-134/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-134/</guid>
      <description>共 1 篇 ICASSP 2026 音频编辑 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频质量评估 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-135/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-135/</guid>
      <description>共 1 篇 ICASSP 2026 音频质量评估 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频超分辨率 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-136/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-136/</guid>
      <description>共 1 篇 ICASSP 2026 音频超分辨率 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 音频问答 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-137/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-137/</guid>
      <description>共 15 篇 ICASSP 2026 音频问答 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 预训练 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-138/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-138/</guid>
      <description>共 1 篇 ICASSP 2026 预训练 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 - 领域适应 论文列表</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-139/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-task-139/</guid>
      <description>共 2 篇 ICASSP 2026 领域适应 方向论文</description>
    </item>
    <item>
      <title>ICASSP 2026 语音/音频论文详细分析</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-summary/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/icassp2026-summary/</guid>
      <description>共分析 898 篇 ICASSP 2026 论文</description>
    </item>
    <item>
      <title>Identifying Birdsong Syllables without Labelled Data</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-identifying-birdsong-syllables-without-labelled/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-identifying-birdsong-syllables-without-labelled/</guid>
      <description>生物声学 | 7.0/10</description>
    </item>
    <item>
      <title>Identifying the Minimal and Maximal Phonetic Subspace of Speech Representations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-identifying-the-minimal-and-maximal-phonetic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-identifying-the-minimal-and-maximal-phonetic/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Identity Leakage Through Accent Cues in Voice Anonymisation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-identity-leakage-through-accent-cues-in-voice/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-identity-leakage-through-accent-cues-in-voice/</guid>
      <description>语音匿名化 | 7.0/10</description>
    </item>
    <item>
      <title>Impact of Phonetics on Speaker Identity in Adversarial Voice Attack</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-impact-of-phonetics-on-speaker-identity-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-impact-of-phonetics-on-speaker-identity-in/</guid>
      <description>说话人验证 | 7.0/10</description>
    </item>
    <item>
      <title>Improving Active Learning for Melody Estimation by Disentangling Uncertainties</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-active-learning-for-melody-estimation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-active-learning-for-melody-estimation/</guid>
      <description>音乐信息检索 | 7.5/10</description>
    </item>
    <item>
      <title>Improving Anomalous Sound Detection with Attribute-Aware Representation from Domain-Adaptive Pre-Training</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-anomalous-sound-detection-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-anomalous-sound-detection-with/</guid>
      <description>音频事件检测 | 8.0/10</description>
    </item>
    <item>
      <title>Improving Audio Event Recognition with Consistency Regularization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-audio-event-recognition-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-audio-event-recognition-with/</guid>
      <description>音频事件检测 | 7.0/10</description>
    </item>
    <item>
      <title>Improving Audio Question Answering with Variational Inference</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-audio-question-answering-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-audio-question-answering-with/</guid>
      <description>音频问答 | 7.5/10</description>
    </item>
    <item>
      <title>Improving Automatic Speech Recognition by Mitigating Distortions Introduced by Speech Enhancement Under Drone Noise</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-automatic-speech-recognition-by/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-automatic-speech-recognition-by/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Improving Binaural Distance Estimation in Reverberant Rooms Through Contrastive And Multi-Task Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-binaural-distance-estimation-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-binaural-distance-estimation-in/</guid>
      <description>声源定位 | 7.0/10</description>
    </item>
    <item>
      <title>Improving Contextual Asr Via Multi-Grained Fusion With Large Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-contextual-asr-via-multi-grained-fusion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-contextual-asr-via-multi-grained-fusion/</guid>
      <description>语音识别 | 8.5/10</description>
    </item>
    <item>
      <title>Improving Interpretability in Generative Multitimbral DDSP Frameworks via Semantically-Disentangled Musical Attributes</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-interpretability-in-generative/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-interpretability-in-generative/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Improving Multimodal Brain Encoding Model with Dynamic Subject-Awareness Routing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-multimodal-brain-encoding-model-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-multimodal-brain-encoding-model-with/</guid>
      <description>脑信号编码 | 8.0/10</description>
    </item>
    <item>
      <title>Improving the Speaker Anonymization Evaluation’s Robustness to Target Speakers with Adversarial Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-the-speaker-anonymization-evaluations/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-improving-the-speaker-anonymization-evaluations/</guid>
      <description>语音匿名化 | 7.5/10</description>
    </item>
    <item>
      <title>In-Sync: Adaptation of Speech Aware Large Language Models for ASR with Word level timestamp predictions</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-in-sync-adaptation-of-speech-aware-large-language/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-in-sync-adaptation-of-speech-aware-large-language/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>InconVAD: A Two-Stage Dual-Tower Framework for Multimodal Emotion Inconsistency Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-inconvad-a-two-stage-dual-tower-framework-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-inconvad-a-two-stage-dual-tower-framework-for/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Incremental Learning for Audio Classification with Hebbian Deep Neural Networks</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-incremental-learning-for-audio-classification/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-incremental-learning-for-audio-classification/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>Individualize the HRTF Neural Field Using Anthropometric Parameters Weighted by Direction-Attention</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-individualize-the-hrtf-neural-field-using/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-individualize-the-hrtf-neural-field-using/</guid>
      <description>空间音频 | 7.0/10</description>
    </item>
    <item>
      <title>Influence of Clean Speech Characteristics on Speech Enhancement Performance</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-influence-of-clean-speech-characteristics-on/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-influence-of-clean-speech-characteristics-on/</guid>
      <description>语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>Influence-Aware Curation and Active Selection for Industrial and Surveillance Sound Events</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-influence-aware-curation-and-active-selection-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-influence-aware-curation-and-active-selection-for/</guid>
      <description>音频事件检测 | 7.0/10</description>
    </item>
    <item>
      <title>Input-Adaptive Differentiable Filterbanks via Hypernetworks for Robust Speech Processing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-input-adaptive-differentiable-filterbanks-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-input-adaptive-differentiable-filterbanks-via/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>InstructAudio: Unified Speech and Music Generation with Natural Language Instruction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-instructaudio-unified-speech-and-music-generation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-instructaudio-unified-speech-and-music-generation/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Instrument Generation Through Distributional Flow Matching and Test-Time Search</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-instrument-generation-through-distributional-flow/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-instrument-generation-through-distributional-flow/</guid>
      <description>音乐生成 | 7.0/10</description>
    </item>
    <item>
      <title>Int-MeanFlow: Few-Step Speech Generation with Integral Velocity Distillation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-int-meanflow-few-step-speech-generation-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-int-meanflow-few-step-speech-generation-with/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Integrating Speaker Embeddings and LLM-Derived Semantic Representations for Streaming Speaker Diarization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-integrating-speaker-embeddings-and-llm-derived/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-integrating-speaker-embeddings-and-llm-derived/</guid>
      <description>说话人分离 | 6.5/10</description>
    </item>
    <item>
      <title>Inter-Dialog Contrastive Learning for Multimodal Emotion Recognition in Conversations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-inter-dialog-contrastive-learning-for-multimodal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-inter-dialog-contrastive-learning-for-multimodal/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Interpretable Music Harmonic Analysis Through Multilinear Mixture of Experts</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-interpretable-music-harmonic-analysis-through/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-interpretable-music-harmonic-analysis-through/</guid>
      <description>音乐理解 | 7.5/10</description>
    </item>
    <item>
      <title>Interval-Aware Retrieval Framework For Speech-Based Automatic Alzheimer’s Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-interval-aware-retrieval-framework-for-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-interval-aware-retrieval-framework-for-speech/</guid>
      <description>语音生物标志物 | 8.5/10</description>
    </item>
    <item>
      <title>Inverse-Hessian Regularization for Continual Learning in ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-inverse-hessian-regularization-for-continual/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-inverse-hessian-regularization-for-continual/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Investigating Modality Contribution in Audio LLMs for Music</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-investigating-modality-contribution-in-audio-llms/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-investigating-modality-contribution-in-audio-llms/</guid>
      <description>模型评估 | 6.5/10</description>
    </item>
    <item>
      <title>Investigating The Effect Of Sentence-Level Syntactic Structure On Information Loss In The Human Auditory System</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-investigating-the-effect-of-sentence-level/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-investigating-the-effect-of-sentence-level/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Is Phase Really Needed for Weakly-Supervised Dereverberation?</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-is-phase-really-needed-for-weakly-supervised/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-is-phase-really-needed-for-weakly-supervised/</guid>
      <description>语音增强 | 6.0/10</description>
    </item>
    <item>
      <title>It Is Personal: The Importance of Personalization for Recognizing Self-Reported Emotion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-it-is-personal-the-importance-of-personalization/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-it-is-personal-the-importance-of-personalization/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>Joint Autoregressive Modeling of Multi-Talker Overlapped Speech Recognition and Translation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-joint-autoregressive-modeling-of-multi-talker/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-joint-autoregressive-modeling-of-multi-talker/</guid>
      <description>语音识别 语音翻译 | 7.0/10</description>
    </item>
    <item>
      <title>Joint Deep Secondary Path Estimation and Adaptive Control for Active Noise Cancellation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-joint-deep-secondary-path-estimation-and-adaptive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-joint-deep-secondary-path-estimation-and-adaptive/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Joint Estimation of Piano Dynamics and Metrical Structure with a Multi-Task Multi-Scale Network</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-joint-estimation-of-piano-dynamics-and-metrical/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-joint-estimation-of-piano-dynamics-and-metrical/</guid>
      <description>音乐理解 | 7.5/10</description>
    </item>
    <item>
      <title>Joint Estimation of Primary and Secondary Paths for Personalized Hearable Applications</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-joint-estimation-of-primary-and-secondary-paths/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-joint-estimation-of-primary-and-secondary-paths/</guid>
      <description>主动降噪 | 7.5/10</description>
    </item>
    <item>
      <title>Joint Multichannel Acoustic Feedback Cancellation and Speaker Extraction via Kalman Filter and Deep Non-Linear Spatial Filter</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-joint-multichannel-acoustic-feedback-cancellation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-joint-multichannel-acoustic-feedback-cancellation/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>K-Function: Joint Pronunciation Transcription and Feedback for Evaluating Kids Language Function</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-k-function-joint-pronunciation-transcription-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-k-function-joint-pronunciation-transcription-and/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>KAN We Make Models Simpler for Audio Deepfake Detection with Kolmogorov–Arnold Networks?</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-kan-we-make-models-simpler-for-audio-deepfake/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-kan-we-make-models-simpler-for-audio-deepfake/</guid>
      <description>音频深度伪造检测 | 7.5/10</description>
    </item>
    <item>
      <title>Keeping Models Listening: Segment- and time-aware attention rescaling at decoding time</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-keeping-models-listening-segment-and-time-aware/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-keeping-models-listening-segment-and-time-aware/</guid>
      <description>音频问答 | 7.5/10</description>
    </item>
    <item>
      <title>KSDIFF: Keyframe-Augmented Speech-Aware Dual-Path Diffusion for Facial Animation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ksdiff-keyframe-augmented-speech-aware-dual-path/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ksdiff-keyframe-augmented-speech-aware-dual-path/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>LAFUFU: Latent Acoustic Features For Ultra-Fast Utterance Restoration</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lafufu-latent-acoustic-features-for-ultra-fast/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lafufu-latent-acoustic-features-for-ultra-fast/</guid>
      <description>语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>LAMB: LLM-Based Audio Captioning with Modality Gap Bridging Via Cauchy-Schwarz Divergence</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lamb-llm-based-audio-captioning-with-modality-gap/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lamb-llm-based-audio-captioning-with-modality-gap/</guid>
      <description>音频描述 | 7.0/10</description>
    </item>
    <item>
      <title>Language-Infused Retrieval-Augmented CTC with Adaptive Soft-Hard Gating for Robust Code-Switching ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-language-infused-retrieval-augmented-ctc-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-language-infused-retrieval-augmented-ctc-with/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Lattice-Guided Consistency Regularization of Dual-Mode Transducers for Automatic Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lattice-guided-consistency-regularization-of-dual/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lattice-guided-consistency-regularization-of-dual/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Learnable Mel-Frontend for Robust Underwater Acoustic Target Detection under Non-Target Interference</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learnable-mel-frontend-for-robust-underwater/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learnable-mel-frontend-for-robust-underwater/</guid>
      <description>音频分类 | 6.5/10</description>
    </item>
    <item>
      <title>Learning Domain-Robust Bioacoustic Representations for Mosquito Species Classification with Contrastive Learning and Distribution Alignment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-domain-robust-bioacoustic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-domain-robust-bioacoustic/</guid>
      <description>生物声学 | 7.5/10</description>
    </item>
    <item>
      <title>Learning Linearity in Audio Consistency Autoencoders via Implicit Regularization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-linearity-in-audio-consistency/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-linearity-in-audio-consistency/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Learning Piezoelectric Hysteresis in In-Ear MEMS Loudspeakers from Acoustic Measurements</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-piezoelectric-hysteresis-in-in-ear-mems/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-piezoelectric-hysteresis-in-in-ear-mems/</guid>
      <description>音频信号处理 | 7.0/10</description>
    </item>
    <item>
      <title>Learning to Align with Unbalanced Optimal Transport in Linguistic Knowledge Transfer for ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-to-align-with-unbalanced-optimal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-to-align-with-unbalanced-optimal/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Learning Vocal-Tract Area And Radiation With A Physics-Informed Webster Model</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-vocal-tract-area-and-radiation-with-a/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-vocal-tract-area-and-radiation-with-a/</guid>
      <description>歌唱语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Learning What to Hear: Boosting Sound-Source Association for Robust Audiovisual Instance Segmentation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-what-to-hear-boosting-sound-source/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-learning-what-to-hear-boosting-sound-source/</guid>
      <description>音视频实例分割 | 7.5/10</description>
    </item>
    <item>
      <title>LenslessMic: Audio Encryption and Authentication via Lensless Computational Imaging</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lenslessmic-audio-encryption-and-authentication/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lenslessmic-audio-encryption-and-authentication/</guid>
      <description>音频安全 | 7.5/10</description>
    </item>
    <item>
      <title>LESS: Large Language Model Enhanced Semi-Supervised Learning for Speech Foundational Models Using in-the-wild Data</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-less-large-language-model-enhanced-semi/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-less-large-language-model-enhanced-semi/</guid>
      <description>语音识别 语音翻译 | 7.5/10</description>
    </item>
    <item>
      <title>LETPAV: Lexicon-Enhanced Text with Progressive Audio-Visual Fusion for Multimodal Sentiment Analysis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-letpav-lexicon-enhanced-text-with-progressive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-letpav-lexicon-enhanced-text-with-progressive/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Leveraging Audio-Visual Data to Reduce the Multilingual Gap in Self-Supervised Speech Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-audio-visual-data-to-reduce-the/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-audio-visual-data-to-reduce-the/</guid>
      <description>语音识别 | 6.0/10</description>
    </item>
    <item>
      <title>Leveraging Diffusion U-Net Features for Predominant Instrument Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-diffusion-u-net-features-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-diffusion-u-net-features-for/</guid>
      <description>音乐信息检索 | 8.0/10</description>
    </item>
    <item>
      <title>Leveraging Large Multimodal Models for Audio-Video Deepfake Detection: A Pilot Study</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-large-multimodal-models-for-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-large-multimodal-models-for-audio/</guid>
      <description>音频深度伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>Leveraging Large Speech Language Models as Evaluators for Expressive Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-large-speech-language-models-as/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-large-speech-language-models-as/</guid>
      <description>语音情感识别 | 6.5/10</description>
    </item>
    <item>
      <title>Leveraging Multiple Speech Enhancers for Non-Intrusive Intelligibility Prediction for Hearing-Impaired Listeners</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-multiple-speech-enhancers-for-non/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-multiple-speech-enhancers-for-non/</guid>
      <description>模型评估 | 7.5/10</description>
    </item>
    <item>
      <title>Leveraging prediction entropy for Automatic prompt weighting in Zero-Shot Audio-Language Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-prediction-entropy-for-automatic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-prediction-entropy-for-automatic/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>Leveraging Segment-Level Speech Representations for LLM-Based Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-segment-level-speech-representations/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-segment-level-speech-representations/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Leveraging Text-to-Speech and Voice Conversion as Data Augmentation for Alzheimer&#39;s Disease Detection from Spontaneous Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-text-to-speech-and-voice-conversion-as/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-text-to-speech-and-voice-conversion-as/</guid>
      <description>语音生物标志物 | 7.0/10</description>
    </item>
    <item>
      <title>Leveraging Whisper Embeddings For Audio-Based Lyrics Matching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-whisper-embeddings-for-audio-based/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-leveraging-whisper-embeddings-for-audio-based/</guid>
      <description>音乐信息检索 | 7.0/10</description>
    </item>
    <item>
      <title>Lightweight and Generalizable Acoustic Scene Representations Via Contrastive Fine-Tuning and Distillation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lightweight-and-generalizable-acoustic-scene/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lightweight-and-generalizable-acoustic-scene/</guid>
      <description>音频场景理解 | 8.0/10</description>
    </item>
    <item>
      <title>Lightweight and Perceptually-Guided Voice Conversion for Electro-Laryngeal Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lightweight-and-perceptually-guided-voice/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lightweight-and-perceptually-guided-voice/</guid>
      <description>语音转换 | 7.5/10</description>
    </item>
    <item>
      <title>Lightweight Implicit Neural Network for Binaural Audio Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lightweight-implicit-neural-network-for-binaural/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lightweight-implicit-neural-network-for-binaural/</guid>
      <description>空间音频 | 7.0/10</description>
    </item>
    <item>
      <title>Lightweight Phoneme-Conditioned Bandwidth Extension for Body-Conducted Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lightweight-phoneme-conditioned-bandwidth/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lightweight-phoneme-conditioned-bandwidth/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Lingometer: On-Device Personal Speech Word Counting System</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lingometer-on-device-personal-speech-word/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lingometer-on-device-personal-speech-word/</guid>
      <description>语音活动检测 | 8.0/10</description>
    </item>
    <item>
      <title>Linguard: Authenticating Speech Recordings Using Speech Recognition and Watermark</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-linguard-authenticating-speech-recordings-using/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-linguard-authenticating-speech-recordings-using/</guid>
      <description>音频安全 | 6.5/10</description>
    </item>
    <item>
      <title>LipsAM: Lipschitz-Continuous Amplitude Modifier for Audio Signal Processing and its Application to Plug-And-Play Dereverberation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lipsam-lipschitz-continuous-amplitude-modifier/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lipsam-lipschitz-continuous-amplitude-modifier/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Lisa: Lightweight Yet Superb Neural Speech Coding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lisa-lightweight-yet-superb-neural-speech-coding/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lisa-lightweight-yet-superb-neural-speech-coding/</guid>
      <description>语音编码 | 8.5/10</description>
    </item>
    <item>
      <title>Listen, But Don&#39;t Leak: Sensitive Data Protection for Privacy Aware Automatic Speech Recognition with Acoustic Triggers</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-listen-but-dont-leak-sensitive-data-protection/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-listen-but-dont-leak-sensitive-data-protection/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>LLAC: Learned Lossless Audio Codec</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-llac-learned-lossless-audio-codec/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-llac-learned-lossless-audio-codec/</guid>
      <description>音频无损编码 | 7.5/10</description>
    </item>
    <item>
      <title>LLM-Based Post-ASR Error Correction for Disordered Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-llm-based-post-asr-error-correction-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-llm-based-post-asr-error-correction-for/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Localizing Speech Deepfakes Beyond Transitions via Segment-Aware Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-localizing-speech-deepfakes-beyond-transitions/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-localizing-speech-deepfakes-beyond-transitions/</guid>
      <description>音频深度伪造检测 | 8.0/10</description>
    </item>
    <item>
      <title>LongSpeech: A Scalable Benchmark for Transcription, Translation and Understanding in Long Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-longspeech-a-scalable-benchmark-for-transcription/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-longspeech-a-scalable-benchmark-for-transcription/</guid>
      <description>基准测试 | 7.8/10</description>
    </item>
    <item>
      <title>Look, Listen and Segment: Towards Weakly Supervised Audio-Visual Semantic Segmentation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-look-listen-and-segment-towards-weakly-supervised/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-look-listen-and-segment-towards-weakly-supervised/</guid>
      <description>音视频 | 7.0/10</description>
    </item>
    <item>
      <title>Loose Coupling of Spectral and Spatial Models for Multi-Channel Diarization and Enhancement of Meetings in Dynamic Environments</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-loose-coupling-of-spectral-and-spatial-models-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-loose-coupling-of-spectral-and-spatial-models-for/</guid>
      <description>说话人日志 语音分离 | 7.2/10</description>
    </item>
    <item>
      <title>LOTUSDIS: A Thai Far-Field Meeting Corpus for Robust Conversational ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lotusdis-a-thai-far-field-meeting-corpus-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lotusdis-a-thai-far-field-meeting-corpus-for/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Low-Bandwidth High-Fidelity Speech Transmission with Generative Latent Joint Source-Channel Coding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-low-bandwidth-high-fidelity-speech-transmission/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-low-bandwidth-high-fidelity-speech-transmission/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Low-Frequency Harmonic Control for Speech Intelligibility in Open-Ear Headphones</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-low-frequency-harmonic-control-for-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-low-frequency-harmonic-control-for-speech/</guid>
      <description>语音增强 | 6.5/10</description>
    </item>
    <item>
      <title>Low-Latency Audio Front-End Region-of-Interest Beamforming for Smart Glasses</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-low-latency-audio-front-end-region-of-interest/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-low-latency-audio-front-end-region-of-interest/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>Low-Resource Guidance for Controllable Latent Audio Diffusion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-low-resource-guidance-for-controllable-latent/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-low-resource-guidance-for-controllable-latent/</guid>
      <description>音乐生成 | 8.5/10</description>
    </item>
    <item>
      <title>Low-Resource Speech-Based Early Alzheimers Detection via Cross-Lingual and Few-Shot Transfer Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-low-resource-speech-based-early-alzheimers/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-low-resource-speech-based-early-alzheimers/</guid>
      <description>语音生物标志物 | 7.5/10</description>
    </item>
    <item>
      <title>LP-CFM: Perceptual Invariance-Aware Conditional Flow Matching for Speech Modeling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lp-cfm-perceptual-invariance-aware-conditional/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-lp-cfm-perceptual-invariance-aware-conditional/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>MAG: Multi-Modal Aligned Autoregressive Co-Speech Gesture Generation Without Vector Quantization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mag-multi-modal-aligned-autoregressive-co-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mag-multi-modal-aligned-autoregressive-co-speech/</guid>
      <description>音频生成 | 8.0/10</description>
    </item>
    <item>
      <title>MAGE: A Coarse-to-Fine Speech Enhancer with Masked Generative Model</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mage-a-coarse-to-fine-speech-enhancer-with-masked/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mage-a-coarse-to-fine-speech-enhancer-with-masked/</guid>
      <description>语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>Malefa: Multi-Granularity Learning and Effective False Alarm Suppression for Zero-Shot Keyword Spotting</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-malefa-multi-granularity-learning-and-effective/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-malefa-multi-granularity-learning-and-effective/</guid>
      <description>零样本关键词检测 | 7.5/10</description>
    </item>
    <item>
      <title>Mambaformer: State-Space Augmented Self-Attention with Downup Sampling for Monaural Speech Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mambaformer-state-space-augmented-self-attention/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mambaformer-state-space-augmented-self-attention/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>Marco-Voice: A Unified Framework for Expressive Speech Synthesis with Voice Cloning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-marco-voice-a-unified-framework-for-expressive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-marco-voice-a-unified-framework-for-expressive/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>MaskVCT: Masked Voice Codec Transformer for Zero-Shot Voice Conversion with Increased Controllability via Multiple Guidances</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-maskvct-masked-voice-codec-transformer-for-zero/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-maskvct-masked-voice-codec-transformer-for-zero/</guid>
      <description>语音转换 | 6.5/10</description>
    </item>
    <item>
      <title>Matching Reverberant Speech Through Learned Acoustic Embeddings</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-matching-reverberant-speech-through-learned/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-matching-reverberant-speech-through-learned/</guid>
      <description>音频生成 | 8.0/10</description>
    </item>
    <item>
      <title>Matrix-Structured Hierarchical Convolutional Modeling for Pronunciation Assessment and Mispronunciation Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-matrix-structured-hierarchical-convolutional/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-matrix-structured-hierarchical-convolutional/</guid>
      <description>语音评估 | 8.0/10</description>
    </item>
    <item>
      <title>Maximum Likelihood Measurement Noise Estimation for Block-Time Domain Kalman Filters</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-maximum-likelihood-measurement-noise-estimation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-maximum-likelihood-measurement-noise-estimation/</guid>
      <description>回声消除 | 7.0/10</description>
    </item>
    <item>
      <title>MC-MRX: Reference- and Midi-Guided Music Source Extraction with Contrastive Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mc-mrx-reference-and-midi-guided-music-source/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mc-mrx-reference-and-midi-guided-music-source/</guid>
      <description>音乐源提取 | 7.0/10</description>
    </item>
    <item>
      <title>MCF: Text LLMS for Multimodal Emotional Causality</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mcf-text-llms-for-multimodal-emotional-causality/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mcf-text-llms-for-multimodal-emotional-causality/</guid>
      <description>情感分析 | 8.0/10</description>
    </item>
    <item>
      <title>MCI-OTFusion: A Multimodal Model for MCI Detection and Cognitive Score Prediction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mci-otfusion-a-multimodal-model-for-mci-detection/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mci-otfusion-a-multimodal-model-for-mci-detection/</guid>
      <description>轻度认知障碍检测 | 6.5/10</description>
    </item>
    <item>
      <title>Meanflow-Accelerated Multimodal Video-to-Audio Synthesis Via One-Step Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-meanflow-accelerated-multimodal-video-to-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-meanflow-accelerated-multimodal-video-to-audio/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>MeanFlowSE: One-Step Generative Speech Enhancement via Conditional Mean Flow</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-meanflowse-one-step-generative-speech-enhancement/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-meanflowse-one-step-generative-speech-enhancement/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>MeanSE: Efficient Generative Speech Enhancement with Mean Flows</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-meanse-efficient-generative-speech-enhancement/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-meanse-efficient-generative-speech-enhancement/</guid>
      <description>语音增强 | 6.5/10</description>
    </item>
    <item>
      <title>MeanVC: Lightweight and Streaming Zero-Shot Voice Conversion via Mean Flows</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-meanvc-lightweight-and-streaming-zero-shot-voice/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-meanvc-lightweight-and-streaming-zero-shot-voice/</guid>
      <description>语音转换 | 7.5/10</description>
    </item>
    <item>
      <title>MeanVoiceFlow: One-Step Nonparallel Voice Conversion with Mean Flows</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-meanvoiceflow-one-step-nonparallel-voice/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-meanvoiceflow-one-step-nonparallel-voice/</guid>
      <description>语音转换 | 7.0/10</description>
    </item>
    <item>
      <title>Measuring Prosody Diversity in Zero-Shot TTS: A New Metric, Benchmark, and Exploration</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-measuring-prosody-diversity-in-zero-shot-tts-a/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-measuring-prosody-diversity-in-zero-shot-tts-a/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>MECap-R1: Emotion-Aware Policy with Reinforcement Learning for Multimodal Emotion Captioning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mecap-r1-emotion-aware-policy-with-reinforcement/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mecap-r1-emotion-aware-policy-with-reinforcement/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Medical ASR Enhancement by Domain-Specific Reinforcement Fine-Tuning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-medical-asr-enhancement-by-domain-specific/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-medical-asr-enhancement-by-domain-specific/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>MELA-TTS: Joint Transformer-Diffusion Model with Representation Alignment for Speech Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mela-tts-joint-transformer-diffusion-model-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mela-tts-joint-transformer-diffusion-model-with/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Melos: Sentence-To-Section Training with Multi-Task Learning for LLM-Driven Song Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-melos-sentence-to-section-training-with-multi/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-melos-sentence-to-section-training-with-multi/</guid>
      <description>音乐生成 | 6.5/10</description>
    </item>
    <item>
      <title>Membership Inference Attack against Music Diffusion Models via Generative Manifold Perturbation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-membership-inference-attack-against-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-membership-inference-attack-against-music/</guid>
      <description>音频安全 | 7.5/10</description>
    </item>
    <item>
      <title>MFF-RVRDI: Multimodal Fusion Framework for Robust Video Recording Device Identification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mff-rvrdi-multimodal-fusion-framework-for-robust/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mff-rvrdi-multimodal-fusion-framework-for-robust/</guid>
      <description>视频设备识别 | 7.5/10</description>
    </item>
    <item>
      <title>MI-Fuse: Label Fusion for Unsupervised Domain Adaptation with Closed-Source Large Audio-Language Model</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mi-fuse-label-fusion-for-unsupervised-domain/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mi-fuse-label-fusion-for-unsupervised-domain/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>Microphone-Less Measurement of Three-Dimensional Radiating Impulse Response of Sound Source using Spherical Harmonic-Domain Acousto-Optic Tomography</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-microphone-less-measurement-of-three-dimensional/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-microphone-less-measurement-of-three-dimensional/</guid>
      <description>声源定位 | 7.0/10</description>
    </item>
    <item>
      <title>MIDI-LLaMA: An Instruction-Following Multimodal LLM for Symbolic Music Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-midi-llama-an-instruction-following-multimodal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-midi-llama-an-instruction-following-multimodal/</guid>
      <description>音乐理解 | 7.5/10</description>
    </item>
    <item>
      <title>Mind the Shift: Using Delta SSL Embeddings to Enhance Child ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mind-the-shift-using-delta-ssl-embeddings-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mind-the-shift-using-delta-ssl-embeddings-to/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Mind Your [m]S, Cross Your [t]S: a Large-Scale Phonetic Analysis of Speech Reproduction in Modern Speech Generators</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mind-your-ms-cross-your-ts-a-large-scale-phonetic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mind-your-ms-cross-your-ts-a-large-scale-phonetic/</guid>
      <description>语音伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>MirrorTalk: Forging Personalized Avatars Via Disentangled Style and Hierarchical Motion Control</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mirrortalk-forging-personalized-avatars-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mirrortalk-forging-personalized-avatars-via/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Mispronunciation Detection and Diagnosis Without Model Training: A Retrieval-Based Approach</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mispronunciation-detection-and-diagnosis-without/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mispronunciation-detection-and-diagnosis-without/</guid>
      <description>语音评估 | 8.0/10</description>
    </item>
    <item>
      <title>Mitigating Attention Sinks and Massive Activations in Audio-Visual Speech Recognition with LLMs</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mitigating-attention-sinks-and-massive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mitigating-attention-sinks-and-massive/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Mitigating Data Replication in Text-to-Audio Generative Diffusion Models Through Anti-Memorization Guidance</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mitigating-data-replication-in-text-to-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mitigating-data-replication-in-text-to-audio/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Mitigating Intra-Speaker Variability in Diarization with Style-Controllable Speech Augmentation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mitigating-intra-speaker-variability-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mitigating-intra-speaker-variability-in/</guid>
      <description>说话人日志 | 7.0/10</description>
    </item>
    <item>
      <title>Mitigating Language Prior-Induced Hallucinations via Bi-Level Contrastive Decoding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mitigating-language-prior-induced-hallucinations/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mitigating-language-prior-induced-hallucinations/</guid>
      <description>多模态模型 | 7.5/10</description>
    </item>
    <item>
      <title>Mix2Morph: Learning Sound Morphing from Noisy Mixes</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mix2morph-learning-sound-morphing-from-noisy-mixes/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mix2morph-learning-sound-morphing-from-noisy-mixes/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>MixGAN-based Non-blind Bandwidth Extension for Audio Codec</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixgan-based-non-blind-bandwidth-extension-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixgan-based-non-blind-bandwidth-extension-for/</guid>
      <description>音频增强 | 8.0/10</description>
    </item>
    <item>
      <title>Mixture of Experts for Recognizing Depression from Interview and Reading Tasks</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixture-of-experts-for-recognizing-depression/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixture-of-experts-for-recognizing-depression/</guid>
      <description>语音生物标志物 | 6.0/10</description>
    </item>
    <item>
      <title>Mixture To Beamformed Mixture: Leveraging Beamformed Mixture As Weak-Supervision for Speech Enhancement and Noise-Robust ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixture-to-beamformed-mixture-leveraging/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixture-to-beamformed-mixture-leveraging/</guid>
      <description>语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>Mixture-of-Experts Based Soft-Label Learning for Multi-Label Speech Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixture-of-experts-based-soft-label-learning-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixture-of-experts-based-soft-label-learning-for/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Mixture-of-Experts Framework for Field-of-View Enhanced Signal-Dependent Binauralization of Moving Talkers</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixture-of-experts-framework-for-field-of-view/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixture-of-experts-framework-for-field-of-view/</guid>
      <description>空间音频 | 6.5/10</description>
    </item>
    <item>
      <title>Mixtures of Lightweight Articulatory Experts for Multilingual Asr</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixtures-of-lightweight-articulatory-experts-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mixtures-of-lightweight-articulatory-experts-for/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>MMAudioSep: Taming Video-to-Audio Generative Model Towards Video/Text-Queried Sound Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mmaudiosep-taming-video-to-audio-generative-model/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mmaudiosep-taming-video-to-audio-generative-model/</guid>
      <description>语音分离 | 8.0/10</description>
    </item>
    <item>
      <title>MNV-17: A High-Quality Performative Mandarin Dataset for Nonverbal Vocalization Recognition in Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mnv-17-a-high-quality-performative-mandarin/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mnv-17-a-high-quality-performative-mandarin/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Modeling Both Intra- And Inter-Utterance Variability for Conversational Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-modeling-both-intra-and-inter-utterance/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-modeling-both-intra-and-inter-utterance/</guid>
      <description>语音情感识别 | 6.5/10</description>
    </item>
    <item>
      <title>Modeling Inter-Segment Relationships in Speech for Dementia Detection with Audio Spectrogram Transformers and Graph Attention Networks</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-modeling-inter-segment-relationships-in-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-modeling-inter-segment-relationships-in-speech/</guid>
      <description>语音生物标志物 | 7.0/10</description>
    </item>
    <item>
      <title>Modeling Strategies For Speech Enhancement in The Latent Space of a Neural Audio Codec</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-modeling-strategies-for-speech-enhancement-in-the/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-modeling-strategies-for-speech-enhancement-in-the/</guid>
      <description>语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>More Than a Shortcut: A Hyperbolic Approach to Early-Exit Networks</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-more-than-a-shortcut-a-hyperbolic-approach-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-more-than-a-shortcut-a-hyperbolic-approach-to/</guid>
      <description>音频事件检测 | 8.0/10</description>
    </item>
    <item>
      <title>Motionbeat: Motion-Aligned Music Representation via Embodied Contrastive Learning and Bar-Equivariant Contact-Aware Encoding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-motionbeat-motion-aligned-music-representation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-motionbeat-motion-aligned-music-representation/</guid>
      <description>舞蹈生成 | 7.5/10</description>
    </item>
    <item>
      <title>MR-FlowDPO: Multi-Reward Direct Preference Optimization for Flow-Matching Text-to-Music Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mr-flowdpo-multi-reward-direct-preference/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mr-flowdpo-multi-reward-direct-preference/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>MSANET: Multi-Scale Semantic Aggregation Network for Brain-Assisted Speech Enhancement in Multi-Speaker Conditions</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-msanet-multi-scale-semantic-aggregation-network/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-msanet-multi-scale-semantic-aggregation-network/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>MSCT: Differential Cross-Modal Attention for Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-msct-differential-cross-modal-attention-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-msct-differential-cross-modal-attention-for/</guid>
      <description>音频深度伪造检测 | 6.5/10</description>
    </item>
    <item>
      <title>MSF-SER: Enriching Acoustic Modeling with Multi-Granularity Semantics for Speech Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-msf-ser-enriching-acoustic-modeling-with-multi/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-msf-ser-enriching-acoustic-modeling-with-multi/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>MT-HuBERT: Self-Supervised Mix-Training for Few-Shot Keyword Spotting in Mixed Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mt-hubert-self-supervised-mix-training-for-few/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mt-hubert-self-supervised-mix-training-for-few/</guid>
      <description>关键词检测 | 7.0/10</description>
    </item>
    <item>
      <title>MTP-S2UT: Enhancing Speech-to-Speech Translation Quality with Multi-Token Prediction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mtp-s2ut-enhancing-speech-to-speech-translation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-mtp-s2ut-enhancing-speech-to-speech-translation/</guid>
      <description>语音翻译 | 8.5/10</description>
    </item>
    <item>
      <title>Multi-Channel Speech Enhancement for Cocktail Party Speech Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-channel-speech-enhancement-for-cocktail/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-channel-speech-enhancement-for-cocktail/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Multi-Layer Attentive Probing Improves Transfer of Audio Representations for Bioacoustics</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-layer-attentive-probing-improves-transfer/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-layer-attentive-probing-improves-transfer/</guid>
      <description>生物声学 | 7.5/10</description>
    </item>
    <item>
      <title>Multi-Scale Physiologically-Motivated Alignment for Auditory Attention Decoding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-scale-physiologically-motivated-alignment/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-scale-physiologically-motivated-alignment/</guid>
      <description>听觉注意力解码 | 7.5/10</description>
    </item>
    <item>
      <title>Multi-Task Learning For Speech Quality Assessment Using ASR-Derived Entropy Features</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-task-learning-for-speech-quality-assessment/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-task-learning-for-speech-quality-assessment/</guid>
      <description>语音质量评估 | 7.5/10</description>
    </item>
    <item>
      <title>Multi-Task Transformer for Explainable Speech Deepfake Detection via Formant Modeling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-task-transformer-for-explainable-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-task-transformer-for-explainable-speech/</guid>
      <description>语音伪造检测 | 7.5/10</description>
    </item>
    <item>
      <title>Multi-View Hierarchical Hypergraph Neural Network for Automatic Stuttering Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-view-hierarchical-hypergraph-neural-network/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multi-view-hierarchical-hypergraph-neural-network/</guid>
      <description>语音生物标志物 | 7.5/10</description>
    </item>
    <item>
      <title>Multilingual Supervised Pretraining with Lm-Assisted Decoding for Visual Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multilingual-supervised-pretraining-with-lm/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multilingual-supervised-pretraining-with-lm/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Multimodal Co-Training with Subtractive Unlabeled-Benefit Bounds</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-co-training-with-subtractive-unlabeled/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-co-training-with-subtractive-unlabeled/</guid>
      <description>多模态学习 | 6.0/10</description>
    </item>
    <item>
      <title>Multimodal Fusion-Based IPCLIP Network for Mixed Reality Surgical Assistance</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-fusion-based-ipclip-network-for-mixed/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-fusion-based-ipclip-network-for-mixed/</guid>
      <description>多模态模型 | 6.5/10</description>
    </item>
    <item>
      <title>Multimodal LLMs as Expert Speech Annotators: Acoustic Macro-Descriptors for Parkinson&#39;s Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-llms-as-expert-speech-annotators/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-llms-as-expert-speech-annotators/</guid>
      <description>语音生物标志物 | 6.5/10</description>
    </item>
    <item>
      <title>Multimodal Room Impulse Response Generation Through Latent Rectified Flow Matching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-room-impulse-response-generation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-room-impulse-response-generation/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Multimodal Self-Attention Network with Temporal Alignment for Audio-Visual Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-self-attention-network-with-temporal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-self-attention-network-with-temporal/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>Multimodal Transformer with Multiperspective Training for Predicting Self-Expression Skills from Video Interview</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-transformer-with-multiperspective/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-transformer-with-multiperspective/</guid>
      <description>多模态模型 | 7.0/10</description>
    </item>
    <item>
      <title>Multimodal Variational Graph Network for Multimodal Sentiment Analysis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-variational-graph-network-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-multimodal-variational-graph-network-for/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>MuseTok: Symbolic Music Tokenization for Generation and Semantic Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-musetok-symbolic-music-tokenization-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-musetok-symbolic-music-tokenization-for/</guid>
      <description>音乐生成 | 8.5/10</description>
    </item>
    <item>
      <title>Musicdetr: A Position-Aware Spectral Note Detection Model for Singing Transcription</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-musicdetr-a-position-aware-spectral-note/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-musicdetr-a-position-aware-spectral-note/</guid>
      <description>歌唱语音转录 | 8.5/10</description>
    </item>
    <item>
      <title>MusiCRS: Benchmarking Audio-Centric Conversational Recommendation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-musicrs-benchmarking-audio-centric-conversational/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-musicrs-benchmarking-audio-centric-conversational/</guid>
      <description>音乐推荐 | 7.5/10</description>
    </item>
    <item>
      <title>Natural Language to Spatial Audio Parameters: Lightweight Deterministic Rendering for Creative Authoring</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-natural-language-to-spatial-audio-parameters/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-natural-language-to-spatial-audio-parameters/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>NCF-TTS: Enhancing Flow Matching Based Text-To-Speech with Neighborhood Consistency Flow</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ncf-tts-enhancing-flow-matching-based-text-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ncf-tts-enhancing-flow-matching-based-text-to/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>Neural Network-Based Time-Frequency-Bin-Wise Linear Combination of Beamformers for Underdetermined Target Source Extraction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-neural-network-based-time-frequency-bin-wise/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-neural-network-based-time-frequency-bin-wise/</guid>
      <description>语音分离 | 7.0/10</description>
    </item>
    <item>
      <title>Neuromamba: Adaptive Frequency Filtering with a Pyramid Mamba for sEEG-driven Speech Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-neuromamba-adaptive-frequency-filtering-with-a/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-neuromamba-adaptive-frequency-filtering-with-a/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>NeuroSIFT: A Biologically-Inspired Framework with Explicit Signal-Noise Separation for Robust Multimodal Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-neurosift-a-biologically-inspired-framework-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-neurosift-a-biologically-inspired-framework-with/</guid>
      <description>多模态情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>nGPT as a Scalable Architecture for Speech Recognition and Translation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ngpt-as-a-scalable-architecture-for-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ngpt-as-a-scalable-architecture-for-speech/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>No Verifiable Reward for Prosody: Toward Preference-Guided Prosody Learning in TTS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-no-verifiable-reward-for-prosody-toward/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-no-verifiable-reward-for-prosody-toward/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>Noise-Robust AV-ASR Using Visual Features both in the Whisper Encoder and Decoder</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-noise-robust-av-asr-using-visual-features-both-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-noise-robust-av-asr-using-visual-features-both-in/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Noise-Robust Contrastive Learning with an MFCC-Conformer for Coronary Artery Disease Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-noise-robust-contrastive-learning-with-an-mfcc/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-noise-robust-contrastive-learning-with-an-mfcc/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>Noise-to-Notes: Diffusion-Based Generation and Refinement for Automatic Drum Transcription</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-noise-to-notes-diffusion-based-generation-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-noise-to-notes-diffusion-based-generation-and/</guid>
      <description>音乐信息检索 | 8.0/10</description>
    </item>
    <item>
      <title>Non-Line-of-Sight Vehicle Detection via Audio-Visual Fusion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-non-line-of-sight-vehicle-detection-via-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-non-line-of-sight-vehicle-detection-via-audio/</guid>
      <description>音频分类 | 8.0/10</description>
    </item>
    <item>
      <title>Obstructive Sleep Apnea Endotype Prediction During Wakefulness Using Voice Biomarkers</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-obstructive-sleep-apnea-endotype-prediction/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-obstructive-sleep-apnea-endotype-prediction/</guid>
      <description>语音生物标志物 | 6.5/10</description>
    </item>
    <item>
      <title>Off-The-Grid Multi-Pitch Estimation Using Optimal Transport</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-off-the-grid-multi-pitch-estimation-using-optimal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-off-the-grid-multi-pitch-estimation-using-optimal/</guid>
      <description>音乐信息检索 | 7.5/10</description>
    </item>
    <item>
      <title>OMNI-AVSR: Towards Unified Multimodal Speech Recognition With Large Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-omni-avsr-towards-unified-multimodal-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-omni-avsr-towards-unified-multimodal-speech/</guid>
      <description>语音识别 | 8.5/10</description>
    </item>
    <item>
      <title>On deepfake voice detection - It’s all in the presentation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-on-deepfake-voice-detection-its-all-in-the/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-on-deepfake-voice-detection-its-all-in-the/</guid>
      <description>音频深度伪造检测 | 8.0/10</description>
    </item>
    <item>
      <title>On The Design of Efficient Neural Methods for Geometry-Agnostic Multichannel Speech Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-on-the-design-of-efficient-neural-methods-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-on-the-design-of-efficient-neural-methods-for/</guid>
      <description>语音增强 | 6.5/10</description>
    </item>
    <item>
      <title>On the Design of Higher-Order Time-Intensity Microphone Arrays for Panoramic Audio Recording and Reproduction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-on-the-design-of-higher-order-time-intensity/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-on-the-design-of-higher-order-time-intensity/</guid>
      <description>空间音频 | 7.0/10</description>
    </item>
    <item>
      <title>One Model–Three Tasks: Discovering a Shared Winning Ticket for Low-Complexity Audio Intelligence</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-one-modelthree-tasks-discovering-a-shared-winning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-one-modelthree-tasks-discovering-a-shared-winning/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>Online Register For Dual-Mode Self-Supervised Speech Models: Mitigating the Lack of Future Context</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-online-register-for-dual-mode-self-supervised/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-online-register-for-dual-mode-self-supervised/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Optimizing Domain-Adaptive Self-Supervised Learning for Clinical Voice-Based Disease Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-optimizing-domain-adaptive-self-supervised/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-optimizing-domain-adaptive-self-supervised/</guid>
      <description>语音生物标志物 | 7.0/10</description>
    </item>
    <item>
      <title>Optimizing Speech Language Models for Acoustic Consistency</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-optimizing-speech-language-models-for-acoustic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-optimizing-speech-language-models-for-acoustic/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>OV-INSTRUCTTTS: Towards Open-Vocabulary Instruct Text-to-Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ov-instructtts-towards-open-vocabulary-instruct/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ov-instructtts-towards-open-vocabulary-instruct/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>PAC: Pronunciation-Aware Contextualized Large Language Model-Based Automatic Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pac-pronunciation-aware-contextualized-large/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pac-pronunciation-aware-contextualized-large/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>PADAM: Perceptual Audio Defect Assessment Model</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-padam-perceptual-audio-defect-assessment-model/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-padam-perceptual-audio-defect-assessment-model/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>ParaGSE: Parallel Generative Speech Enhancement with Group-Vector-Quantization-Based Neural Speech Codec</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-paragse-parallel-generative-speech-enhancement/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-paragse-parallel-generative-speech-enhancement/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Parametric Neural Amp Modeling with Active Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-parametric-neural-amp-modeling-with-active/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-parametric-neural-amp-modeling-with-active/</guid>
      <description>音频生成 | 8.0/10</description>
    </item>
    <item>
      <title>PC-MCL: Patient-Consistent Multi-Cycle Learning with Multi-Label Bias Correction for Respiratory Sound Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pc-mcl-patient-consistent-multi-cycle-learning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pc-mcl-patient-consistent-multi-cycle-learning/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>Peeking Into the Future for Contextual Biasing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-peeking-into-the-future-for-contextual-biasing/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-peeking-into-the-future-for-contextual-biasing/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Perceptual Loss Optimized HRTF Personalization in Spherical Harmonic Domain</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-perceptual-loss-optimized-hrtf-personalization-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-perceptual-loss-optimized-hrtf-personalization-in/</guid>
      <description>空间音频 | 7.0/10</description>
    </item>
    <item>
      <title>Perceptual Quality Assessment for Stylized Talking Heads</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-perceptual-quality-assessment-for-stylized/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-perceptual-quality-assessment-for-stylized/</guid>
      <description>模型评估 | 7.5/10</description>
    </item>
    <item>
      <title>PerformSinger: Multimodal Singing Voice Synthesis Leveraging Synchronized Lip Cues from Singing Performance Videos</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-performsinger-multimodal-singing-voice-synthesis/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-performsinger-multimodal-singing-voice-synthesis/</guid>
      <description>歌唱语音合成 | 4.5/10</description>
    </item>
    <item>
      <title>Personal Sound Zones with Flexible Bright Zone Control</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-personal-sound-zones-with-flexible-bright-zone/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-personal-sound-zones-with-flexible-bright-zone/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>PersonaPlex: Voice and Role Control for Full Duplex Conversational Speech Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-personaplex-voice-and-role-control-for-full/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-personaplex-voice-and-role-control-for-full/</guid>
      <description>语音对话系统 | 8.5/10</description>
    </item>
    <item>
      <title>PFluxTTS: Hybrid Flow-Matching TTS with Robust Cross-Lingual Voice Cloning and Inference-Time Model Fusion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pfluxtts-hybrid-flow-matching-tts-with-robust/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pfluxtts-hybrid-flow-matching-tts-with-robust/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>PG-SE: Predictive Acceleration and Correction for Generative Speech Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pg-se-predictive-acceleration-and-correction-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pg-se-predictive-acceleration-and-correction-for/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Phase-Retrieval-Based Physics-Informed Neural Networks For Acoustic Magnitude Field Reconstruction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phase-retrieval-based-physics-informed-neural/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phase-retrieval-based-physics-informed-neural/</guid>
      <description>声源定位 | 7.0/10</description>
    </item>
    <item>
      <title>Phase-Space Signal Processing of Acoustic Data for Advanced Manufacturing In-Situ Monitoring</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phase-space-signal-processing-of-acoustic-data/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phase-space-signal-processing-of-acoustic-data/</guid>
      <description>音频事件检测 | 7.0/10</description>
    </item>
    <item>
      <title>PhoenixDSR: Phoneme-Guided and LLM-Enhanced Dysarthric Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phoenixdsr-phoneme-guided-and-llm-enhanced/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phoenixdsr-phoneme-guided-and-llm-enhanced/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Phoneme-Level Visual Speech Recognition via Point-Visual Fusion and Language Model Reconstruction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phoneme-level-visual-speech-recognition-via-point/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phoneme-level-visual-speech-recognition-via-point/</guid>
      <description>视觉语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Phonological Tokenizer: Prosody-Aware Phonetic Token Via Multi-Objective Fine-Tuning with Differentiable K-Means</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phonological-tokenizer-prosody-aware-phonetic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phonological-tokenizer-prosody-aware-phonetic/</guid>
      <description>语音表示学习 | 8.0/10</description>
    </item>
    <item>
      <title>Phrased: Phrase Dictionary Biasing for Speech Translation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phrased-phrase-dictionary-biasing-for-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-phrased-phrase-dictionary-biasing-for-speech/</guid>
      <description>语音翻译 | 7.5/10</description>
    </item>
    <item>
      <title>Physics-Informed Neural Networks for Ocean Acoustic Field Reconstruction and Source Localization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-physics-informed-neural-networks-for-ocean/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-physics-informed-neural-networks-for-ocean/</guid>
      <description>声源定位 | 7.5/10</description>
    </item>
    <item>
      <title>Pianoroll-Event: A Novel Score Representation for Symbolic Music</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pianoroll-event-a-novel-score-representation-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pianoroll-event-a-novel-score-representation-for/</guid>
      <description>音乐生成 | 6.5/10</description>
    </item>
    <item>
      <title>PICOAUDIO2: Temporal Controllable Text-to-Audio Generation with Natural Language Description</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-picoaudio2-temporal-controllable-text-to-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-picoaudio2-temporal-controllable-text-to-audio/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Plug-and-Play Emotion Graphs for Compositional Prompting in Zero-Shot Speech Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-plug-and-play-emotion-graphs-for-compositional/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-plug-and-play-emotion-graphs-for-compositional/</guid>
      <description>语音情感识别 | 7.0/10</description>
    </item>
    <item>
      <title>Poly-SVC: Polyphony-Aware Singing Voice Conversion with Harmonic Modeling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-poly-svc-polyphony-aware-singing-voice-conversion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-poly-svc-polyphony-aware-singing-voice-conversion/</guid>
      <description>歌唱语音转换 | 6.5/10</description>
    </item>
    <item>
      <title>Polynomial Mixing for Efficient Self-Supervised Speech Encoders</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-polynomial-mixing-for-efficient-self-supervised/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-polynomial-mixing-for-efficient-self-supervised/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Position-Invariant Fine-Tuning Of Speech Enhancement Models With Self-Supervised Speech Representations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-position-invariant-fine-tuning-of-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-position-invariant-fine-tuning-of-speech/</guid>
      <description>语音增强 | 6.5/10</description>
    </item>
    <item>
      <title>Principled Coarse-Grained Acceptance For Speculative Decoding In Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-principled-coarse-grained-acceptance-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-principled-coarse-grained-acceptance-for/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>PRoADS: Provably Secure And Robust Audio Diffusion Steganography With Latent Optimization And Backward Euler Inversion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-proads-provably-secure-and-robust-audio-diffusion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-proads-provably-secure-and-robust-audio-diffusion/</guid>
      <description>音频安全 | 6.5/10</description>
    </item>
    <item>
      <title>Probing the Hidden Talent of ASR foundation models for L2 English Oral Assessment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-probing-the-hidden-talent-of-asr-foundation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-probing-the-hidden-talent-of-asr-foundation/</guid>
      <description>预训练 | 7.5/10</description>
    </item>
    <item>
      <title>Probing Whisper for Dysarthric Speech in Detection and Assessment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-probing-whisper-for-dysarthric-speech-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-probing-whisper-for-dysarthric-speech-in/</guid>
      <description>语音生物标志物 | 6.5/10</description>
    </item>
    <item>
      <title>Production-Scale Dynamic Vocabulary ASR Biasing with Word-Level FST and Robust Training</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-production-scale-dynamic-vocabulary-asr-biasing/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-production-scale-dynamic-vocabulary-asr-biasing/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Proficiency-Aware Adaptation and Data Augmentation for Robust L2 ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-proficiency-aware-adaptation-and-data/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-proficiency-aware-adaptation-and-data/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Prompt-Guided Mixture-of-Experts for Robust Multimodal Sentiment Analysis with Missing Modalities</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-prompt-guided-mixture-of-experts-for-robust/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-prompt-guided-mixture-of-experts-for-robust/</guid>
      <description>语音情感识别 | 8.5/10</description>
    </item>
    <item>
      <title>PromptSep: Generative Audio Separation Via Multimodal Prompting</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-promptsep-generative-audio-separation-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-promptsep-generative-audio-separation-via/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>Prosody-Guided Harmonic Attention for Phase-Coherent Neural Vocoding in the Complex Spectrum</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-prosody-guided-harmonic-attention-for-phase/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-prosody-guided-harmonic-attention-for-phase/</guid>
      <description>语音合成 | 8.0/10</description>
    </item>
    <item>
      <title>PROST-LLM: Progressively Enhancing the Speech-to-Speech Translation Capability in LLMs</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-prost-llm-progressively-enhancing-the-speech-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-prost-llm-progressively-enhancing-the-speech-to/</guid>
      <description>语音翻译 | 7.5/10</description>
    </item>
    <item>
      <title>Prototype-Guided Cross-Modal Contrastive Learning for Continual Audio-Visual Sound Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-prototype-guided-cross-modal-contrastive-learning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-prototype-guided-cross-modal-contrastive-learning/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>PRSA: Preventing Malicious Speaker Recognition and Speech Synthesis Simultaneously with Adversarial Examples</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-prsa-preventing-malicious-speaker-recognition-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-prsa-preventing-malicious-speaker-recognition-and/</guid>
      <description>语音匿名化 | 7.0/10</description>
    </item>
    <item>
      <title>PSTalker: Realistic 3D Talking Head Synthesis via a Semantic-Aware Audio-Driven Point-Based Shape</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pstalker-realistic-3d-talking-head-synthesis-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-pstalker-realistic-3d-talking-head-synthesis-via/</guid>
      <description>说话人合成 | 7.5/10</description>
    </item>
    <item>
      <title>Purification Before Fusion: Toward Mask-Free Speech Enhancement for Robust Audio-Visual Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-purification-before-fusion-toward-mask-free/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-purification-before-fusion-toward-mask-free/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Qastanet: A DNN-Based Quality Metric for Spatial Audio</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-qastanet-a-dnn-based-quality-metric-for-spatial/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-qastanet-a-dnn-based-quality-metric-for-spatial/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>QE-XVC: Zero-Shot Cross-Lingual Voice Conversion via Query-Enhancement and Conditional Flow Matching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-qe-xvc-zero-shot-cross-lingual-voice-conversion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-qe-xvc-zero-shot-cross-lingual-voice-conversion/</guid>
      <description>语音转换 | 7.5/10</description>
    </item>
    <item>
      <title>QFOCUS: Controllable Synthesis for Automated Speech Stress Editing to Deliver Human-Like Emphatic Intent</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-qfocus-controllable-synthesis-for-automated/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-qfocus-controllable-synthesis-for-automated/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Quality Assessment of Noisy and Enhanced Speech with Limited Data: UWB-NTIS System for Voicemos 2024</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-quality-assessment-of-noisy-and-enhanced-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-quality-assessment-of-noisy-and-enhanced-speech/</guid>
      <description>语音质量评估 | 7.0/10</description>
    </item>
    <item>
      <title>Quantifying Speaker Embedding Phonological Rule Interactions in Accented Speech Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-quantifying-speaker-embedding-phonological-rule/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-quantifying-speaker-embedding-phonological-rule/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Random Matrix-Driven Graph Representation Learning For Bioacoustic Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-random-matrix-driven-graph-representation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-random-matrix-driven-graph-representation/</guid>
      <description>生物声学 | 7.5/10</description>
    </item>
    <item>
      <title>Ranking The Impact of Contextual Specialization in Neural Speech Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ranking-the-impact-of-contextual-specialization/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ranking-the-impact-of-contextual-specialization/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>RAP: Real-Time Audio-Driven Portrait Animation with Video Diffusion Transformer</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rap-real-time-audio-driven-portrait-animation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rap-real-time-audio-driven-portrait-animation/</guid>
      <description>音视频 | 7.0/10</description>
    </item>
    <item>
      <title>RASD-SR: A Robust Anomalous Sound Detection Framework with Score Recalibration</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rasd-sr-a-robust-anomalous-sound-detection/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rasd-sr-a-robust-anomalous-sound-detection/</guid>
      <description>异常声音检测 | 8.5/10</description>
    </item>
    <item>
      <title>Rationale-Guided Learning for Multimodal Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rationale-guided-learning-for-multimodal-emotion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rationale-guided-learning-for-multimodal-emotion/</guid>
      <description>语音情感识别 | 7.0/10</description>
    </item>
    <item>
      <title>RCAL: Reinforced Cross-Modal Alignment for Multimodal Sentiment Analysis with Sparse Visual Frames</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rcal-reinforced-cross-modal-alignment-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rcal-reinforced-cross-modal-alignment-for/</guid>
      <description>多模态模型 | 8.5/10</description>
    </item>
    <item>
      <title>Reading Between the Waves: Robust Topic Segmentation Using Inter-Sentence Audio Features</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reading-between-the-waves-robust-topic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reading-between-the-waves-robust-topic/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>Real-Time Streaming MEL Vocoding with Generative Flow Matching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-real-time-streaming-mel-vocoding-with-generative/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-real-time-streaming-mel-vocoding-with-generative/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Reasoning Driven Captions to Assist Noise Robust Speech Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reasoning-driven-captions-to-assist-noise-robust/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reasoning-driven-captions-to-assist-noise-robust/</guid>
      <description>语音情感识别 | 7.0/10</description>
    </item>
    <item>
      <title>ReCoM: Realistic Co-Speech Motion Generation with Recurrent Embedded Transformer</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-recom-realistic-co-speech-motion-generation-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-recom-realistic-co-speech-motion-generation-with/</guid>
      <description>音频生成 | 7.0/10</description>
    </item>
    <item>
      <title>Reconstruction of Spherical Sound Source Radiation Characteristics with Graph Signal Processing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reconstruction-of-spherical-sound-source/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reconstruction-of-spherical-sound-source/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>Recovering Performance in Speech Emotion Recognition from Discrete Tokens Via Multi-Layer Fusion and Paralinguistic Feature Integration</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-recovering-performance-in-speech-emotion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-recovering-performance-in-speech-emotion/</guid>
      <description>语音情感识别 | 6.5/10</description>
    </item>
    <item>
      <title>Reducing Prompt Sensitivity in LLM-Based Speech Recognition Through Learnable Projection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reducing-prompt-sensitivity-in-llm-based-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reducing-prompt-sensitivity-in-llm-based-speech/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Reference Microphone Selection for Guided Source Separation Based on The Normalized L-P Norm</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reference-microphone-selection-for-guided-source/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reference-microphone-selection-for-guided-source/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>Reference-Aware SFM Layers for Intrusive Intelligibility Prediction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reference-aware-sfm-layers-for-intrusive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reference-aware-sfm-layers-for-intrusive/</guid>
      <description>语音评估 | 7.5/10</description>
    </item>
    <item>
      <title>Refgen: Reference-Guided Synthetic Data Generation for Anomalous Sound Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-refgen-reference-guided-synthetic-data-generation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-refgen-reference-guided-synthetic-data-generation/</guid>
      <description>音频事件检测 | 7.5/10</description>
    </item>
    <item>
      <title>Regularized Inverse Filter Design for Rigid Spherical Microphone Array Processing: Laplace- And Time-Domain Representations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-regularized-inverse-filter-design-for-rigid/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-regularized-inverse-filter-design-for-rigid/</guid>
      <description>空间音频 | 8.0/10</description>
    </item>
    <item>
      <title>Relative Time Intervals Representation For Word-Level Timestamping With Masked Training</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-relative-time-intervals-representation-for-word/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-relative-time-intervals-representation-for-word/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Reliable AI via Age-Balanced Validation: Fair Model Selection for Parkinson’s Detection from Voice</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reliable-ai-via-age-balanced-validation-fair/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-reliable-ai-via-age-balanced-validation-fair/</guid>
      <description>语音生物标志物 | 7.5/10</description>
    </item>
    <item>
      <title>Representation-Based Data Quality Audits for Audio</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-representation-based-data-quality-audits-for-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-representation-based-data-quality-audits-for-audio/</guid>
      <description>数据集 | 7.5/10</description>
    </item>
    <item>
      <title>Representation-Diverse Self-Supervision for Cross-Domain Bioacoustic Learning in Low-Resource Settings</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-representation-diverse-self-supervision-for-cross/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-representation-diverse-self-supervision-for-cross/</guid>
      <description>生物声学 | 7.0/10</description>
    </item>
    <item>
      <title>Residual Tokens Enhance Masked Autoencoders for Speech Modeling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-residual-tokens-enhance-masked-autoencoders-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-residual-tokens-enhance-masked-autoencoders-for/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Respire-Mamba C-UNet: Consistency-Trained Autoencoder for High-Fidelity Respiratory Sound Compression</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-respire-mamba-c-unet-consistency-trained/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-respire-mamba-c-unet-consistency-trained/</guid>
      <description>音频压缩 | 7.0/10</description>
    </item>
    <item>
      <title>Rethinking Entity Disambiguation in Complex Modalities</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rethinking-entity-disambiguation-in-complex/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rethinking-entity-disambiguation-in-complex/</guid>
      <description>实体消歧 | 8.0/10</description>
    </item>
    <item>
      <title>Rethinking Music Captioning with Music Metadata LLMS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rethinking-music-captioning-with-music-metadata/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rethinking-music-captioning-with-music-metadata/</guid>
      <description>音乐理解 | 7.0/10</description>
    </item>
    <item>
      <title>Retrieval-Based Speculative Decoding For Autoregressive Speech Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-retrieval-based-speculative-decoding-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-retrieval-based-speculative-decoding-for/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Revisiting Direct Speech-to-Text Translation with Speech LLMS: Better Scaling than Cot Prompting?</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-revisiting-direct-speech-to-text-translation-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-revisiting-direct-speech-to-text-translation-with/</guid>
      <description>语音翻译 | 7.5/10</description>
    </item>
    <item>
      <title>RFM-Editing: Rectified Flow Matching for Text-Guided Audio Editing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rfm-editing-rectified-flow-matching-for-text/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rfm-editing-rectified-flow-matching-for-text/</guid>
      <description>音频编辑 | 7.5/10</description>
    </item>
    <item>
      <title>RHO-PERFECT: Correlation Ceiling for Subjective Evaluation Datasets</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rho-perfect-correlation-ceiling-for-subjective/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rho-perfect-correlation-ceiling-for-subjective/</guid>
      <description>模型评估 | 7.5/10</description>
    </item>
    <item>
      <title>RIR-Former: Coordinate-Guided Transformer for Continuous Reconstruction of Room Impulse Responses</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rir-former-coordinate-guided-transformer-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rir-former-coordinate-guided-transformer-for/</guid>
      <description>房间脉冲响应 | 7.0/10</description>
    </item>
    <item>
      <title>RLBR: Reinforcement Learning with Biasing Rewards for Contextual Speech Large Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rlbr-reinforcement-learning-with-biasing-rewards/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rlbr-reinforcement-learning-with-biasing-rewards/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>RMODGDF: A Robust STFT-Derived Feature for Musical Instrument Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rmodgdf-a-robust-stft-derived-feature-for-musical/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rmodgdf-a-robust-stft-derived-feature-for-musical/</guid>
      <description>音乐信息检索 | 7.0/10</description>
    </item>
    <item>
      <title>Robust and Lightweight F0 Estimation Through Mid-Level Fusion of DSP-Informed Features</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-robust-and-lightweight-f0-estimation-through-mid/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-robust-and-lightweight-f0-estimation-through-mid/</guid>
      <description>基频估计 | 8.0/10</description>
    </item>
    <item>
      <title>Robust Deepfake Audio Detection via Multi-Level Intermediate Feature Fusion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-robust-deepfake-audio-detection-via-multi-level/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-robust-deepfake-audio-detection-via-multi-level/</guid>
      <description>音频深度伪造检测 | 7.5/10</description>
    </item>
    <item>
      <title>Robust Online Overdetermined Independent Vector Analysis Based on Bilinear Decomposition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-robust-online-overdetermined-independent-vector/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-robust-online-overdetermined-independent-vector/</guid>
      <description>语音分离 | 7.0/10</description>
    </item>
    <item>
      <title>RoCo: Robust Code for Fast and Effective Proactive Defense against Voice Cloning Attack</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-roco-robust-code-for-fast-and-effective-proactive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-roco-robust-code-for-fast-and-effective-proactive/</guid>
      <description>音频安全 | 7.5/10</description>
    </item>
    <item>
      <title>RRPO: Robust Reward Policy Optimization for LLM-Based Emotional TTS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rrpo-robust-reward-policy-optimization-for-llm/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-rrpo-robust-reward-policy-optimization-for-llm/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>S-PRESSO: Ultra Low Bitrate Sound Effect Compression with Diffusion Autoencoders and Offline Quantization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-s-presso-ultra-low-bitrate-sound-effect/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-s-presso-ultra-low-bitrate-sound-effect/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>S-SONDO: Self-Supervised Knowledge Distillation for General Audio Foundation Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-s-sondo-self-supervised-knowledge-distillation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-s-sondo-self-supervised-knowledge-distillation/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>S2Voice: Style-Aware Autoregressive Modeling with Enhanced Conditioning for Singing Style Conversion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-s2voice-style-aware-autoregressive-modeling-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-s2voice-style-aware-autoregressive-modeling-with/</guid>
      <description>歌唱语音转换 | 7.0/10</description>
    </item>
    <item>
      <title>SA-SSL-MOS: Self-Supervised Learning MOS Prediction with Spectral Augmentation for Generalized Multi-Rate Speech Assessment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sa-ssl-mos-self-supervised-learning-mos/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sa-ssl-mos-self-supervised-learning-mos/</guid>
      <description>语音质量评估 | 7.0/10</description>
    </item>
    <item>
      <title>SAASDNet: An EEG-Based Streaming Auditory Attention Switch Decoding Network for Self-Initiated Attention Switching in Mixed Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-saasdnet-an-eeg-based-streaming-auditory/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-saasdnet-an-eeg-based-streaming-auditory/</guid>
      <description>脑机接口 | 8.0/10</description>
    </item>
    <item>
      <title>SAGA-SR: Semantically and Acoustically Guided Audio Super-Resolution</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-saga-sr-semantically-and-acoustically-guided/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-saga-sr-semantically-and-acoustically-guided/</guid>
      <description>音频增强 | 7.5/10</description>
    </item>
    <item>
      <title>Salad-VAE: Semantic Audio Compression with Language-Audio Distillation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-salad-vae-semantic-audio-compression-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-salad-vae-semantic-audio-compression-with/</guid>
      <description>音频压缩 | 7.5/10</description>
    </item>
    <item>
      <title>Sampling-Rate-Agnostic Speech Super-Resolution Based on Gaussian Process Dynamical Systems with Deep Kernel Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sampling-rate-agnostic-speech-super-resolution/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sampling-rate-agnostic-speech-super-resolution/</guid>
      <description>语音增强 | 6.5/10</description>
    </item>
    <item>
      <title>SAUNA: Song-Level Audio &amp; User-Listening Data Neural Alignment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sauna-song-level-audio-user-listening-data-neural/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sauna-song-level-audio-user-listening-data-neural/</guid>
      <description>音乐信息检索 | 7.0/10</description>
    </item>
    <item>
      <title>Savgbench: Benchmarking Spatially Aligned Audio-Video Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-savgbench-benchmarking-spatially-aligned-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-savgbench-benchmarking-spatially-aligned-audio/</guid>
      <description>基准测试 | 7.5/10</description>
    </item>
    <item>
      <title>Scalable Evaluation for Audio Identification Via Synthetic Latent Fingerprint Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-scalable-evaluation-for-audio-identification-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-scalable-evaluation-for-audio-identification-via/</guid>
      <description>音频检索 | 7.0/10</description>
    </item>
    <item>
      <title>Scaling Ambiguity: Augmenting Human Annotation in Speech Emotion Recognition with Audio-Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-scaling-ambiguity-augmenting-human-annotation-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-scaling-ambiguity-augmenting-human-annotation-in/</guid>
      <description>语音情感识别 | 6.5/10</description>
    </item>
    <item>
      <title>Scaling Multi-Talker ASR with Speaker-Agnostic Activity Streams</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-scaling-multi-talker-asr-with-speaker-agnostic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-scaling-multi-talker-asr-with-speaker-agnostic/</guid>
      <description>语音识别 | 8.5/10</description>
    </item>
    <item>
      <title>Scaling Spoken Language Models with Syllabic Speech Tokenization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-scaling-spoken-language-models-with-syllabic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-scaling-spoken-language-models-with-syllabic/</guid>
      <description>语音理解 | 7.0/10</description>
    </item>
    <item>
      <title>SceneRAG: Scene-Level Retrieval-Augmented Generation for Video Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-scenerag-scene-level-retrieval-augmented/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-scenerag-scene-level-retrieval-augmented/</guid>
      <description>视频理解 | 7.5/10</description>
    </item>
    <item>
      <title>SE-DiCoW: Self-Enrolled Diarization-Conditioned Whisper</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-se-dicow-self-enrolled-diarization-conditioned/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-se-dicow-self-enrolled-diarization-conditioned/</guid>
      <description>语音识别 | 8.5/10</description>
    </item>
    <item>
      <title>Secondary Source Placement for Sound Field Control Based on Ising Model</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-secondary-source-placement-for-sound-field/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-secondary-source-placement-for-sound-field/</guid>
      <description>空间音频 | 6.0/10</description>
    </item>
    <item>
      <title>SED: Structural Entropy Based Speech Discretization for Discrete Token-Based ASR</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sed-structural-entropy-based-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sed-structural-entropy-based-speech/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Segmentwise Pruning in Audio-Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-segmentwise-pruning-in-audio-language-models/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-segmentwise-pruning-in-audio-language-models/</guid>
      <description>音频问答 | 7.0/10</description>
    </item>
    <item>
      <title>SELD-MOHA: A Fine-Tuning Method with the Mixture of Heterogeneous Adapters for Sound Event Localization and Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-seld-moha-a-fine-tuning-method-with-the-mixture/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-seld-moha-a-fine-tuning-method-with-the-mixture/</guid>
      <description>音频事件检测 | 7.0/10</description>
    </item>
    <item>
      <title>Selective Hub Fusion with Modality-Heterogeneous Experts for Multimodal Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-selective-hub-fusion-with-modality-heterogeneous/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-selective-hub-fusion-with-modality-heterogeneous/</guid>
      <description>多模态模型 | 6.5/10</description>
    </item>
    <item>
      <title>Self-Supervised Note Tracking and Multi-Pitch Estimation Via Reconstruction-Based Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-self-supervised-note-tracking-and-multi-pitch/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-self-supervised-note-tracking-and-multi-pitch/</guid>
      <description>多音高估计 音符跟踪 | 8.5/10</description>
    </item>
    <item>
      <title>Semantic Anchor Transfer from Short to Long Speech in a Distillation-Based Summarization Framework</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-semantic-anchor-transfer-from-short-to-long/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-semantic-anchor-transfer-from-short-to-long/</guid>
      <description>语音摘要 | 7.5/10</description>
    </item>
    <item>
      <title>Semantic-Guided Pseudo-Feature Attention Network for Audio-Visual Zero-Shot Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-semantic-guided-pseudo-feature-attention-network/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-semantic-guided-pseudo-feature-attention-network/</guid>
      <description>音频分类 零样本学习 | 7.0/10</description>
    </item>
    <item>
      <title>SEP-ST: Incorporating Speech Entity Prompt Into Large Language Models for Speech Translation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sep-st-incorporating-speech-entity-prompt-into/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sep-st-incorporating-speech-entity-prompt-into/</guid>
      <description>语音翻译 | 7.5/10</description>
    </item>
    <item>
      <title>Separate this, and all of these Things Around It: Music Source Separation Via Hyperellipsoidal Queries</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-separate-this-and-all-of-these-things-around-it/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-separate-this-and-all-of-these-things-around-it/</guid>
      <description>音乐分离 | 7.0/10</description>
    </item>
    <item>
      <title>Sequence-Level Unsupervised Training in Speech Recognition: A Theoretical Study</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sequence-level-unsupervised-training-in-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sequence-level-unsupervised-training-in-speech/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Sequential and Simultaneous Optimization of Microphone Array Geometry and Region-of-Interest Beamforming</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sequential-and-simultaneous-optimization-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sequential-and-simultaneous-optimization-of/</guid>
      <description>声源定位 | 7.5/10</description>
    </item>
    <item>
      <title>Session-Level Spoken Language Assessment with A Multimodal Foundation Model Via Multi-Target Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-session-level-spoken-language-assessment-with-a/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-session-level-spoken-language-assessment-with-a/</guid>
      <description>语音评估 | 7.5/10</description>
    </item>
    <item>
      <title>SFM-TTS: Lightweight and Rapid Speech Synthesis with Flexible Shortcut Flow Matching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sfm-tts-lightweight-and-rapid-speech-synthesis/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sfm-tts-lightweight-and-rapid-speech-synthesis/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Shared Representation Learning for Reference-Guided Targeted Sound Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-shared-representation-learning-for-reference/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-shared-representation-learning-for-reference/</guid>
      <description>音频事件检测 | 8.5/10</description>
    </item>
    <item>
      <title>Shortcut Flow Matching for Speech Enhancement: Step-Invariant Flows via Single Stage Training</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-shortcut-flow-matching-for-speech-enhancement/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-shortcut-flow-matching-for-speech-enhancement/</guid>
      <description>语音增强 | 7.0/10</description>
    </item>
    <item>
      <title>Sidon: Fast and Robust Open-Source Multilingual Speech Restoration for Large-Scale Dataset Cleansing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sidon-fast-and-robust-open-source-multilingual/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sidon-fast-and-robust-open-source-multilingual/</guid>
      <description>语音增强 | 8.5/10</description>
    </item>
    <item>
      <title>SightSound-R1: Cross-Modal Reasoning Distillation from Vision to Audio Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sightsound-r1-cross-modal-reasoning-distillation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sightsound-r1-cross-modal-reasoning-distillation/</guid>
      <description>音频问答 | 7.5/10</description>
    </item>
    <item>
      <title>Sing What You Fit: A Perception-Based Dataset and Benchmark for Vocal-Song Suitability Analysis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sing-what-you-fit-a-perception-based-dataset-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sing-what-you-fit-a-perception-based-dataset-and/</guid>
      <description>音乐信息检索 | 7.0/10</description>
    </item>
    <item>
      <title>Sing2Song: An Accompaniment Generation System Based on Solo Singing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sing2song-an-accompaniment-generation-system/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sing2song-an-accompaniment-generation-system/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>Single-Microphone Audio Point Source Discriminative Localization from Reverberation Late Tail Estimation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-single-microphone-audio-point-source/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-single-microphone-audio-point-source/</guid>
      <description>说话人分离 | 7.0/10</description>
    </item>
    <item>
      <title>Single-Step Controllable Music Bandwidth extension with Flow Matching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-single-step-controllable-music-bandwidth/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-single-step-controllable-music-bandwidth/</guid>
      <description>音乐信息检索 | 7.0/10</description>
    </item>
    <item>
      <title>SingMOS-Pro: An Comprehensive Benchmark For Singing Quality Assessment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-singmos-pro-an-comprehensive-benchmark-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-singmos-pro-an-comprehensive-benchmark-for/</guid>
      <description>歌唱语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>SIREN: Spatially-Informed Reconstruction of Binaural Audio with Vision</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-siren-spatially-informed-reconstruction-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-siren-spatially-informed-reconstruction-of/</guid>
      <description>空间音频 | 7.0/10</description>
    </item>
    <item>
      <title>SIRUP: A Diffusion-Based Virtual Upmixer of Steering Vectors for Highly-Directive Spatialization with First-Order Ambisonics</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sirup-a-diffusion-based-virtual-upmixer-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sirup-a-diffusion-based-virtual-upmixer-of/</guid>
      <description>声源定位 | 7.0/10</description>
    </item>
    <item>
      <title>SLAP: Scalable Language-Audio Pretraining with Variable-Duration Audio and Multi-Objective Training</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-slap-scalable-language-audio-pretraining-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-slap-scalable-language-audio-pretraining-with/</guid>
      <description>音频检索 | 8.0/10</description>
    </item>
    <item>
      <title>SLM-SS: Speech Language Model for Generative Speech Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-slm-ss-speech-language-model-for-generative/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-slm-ss-speech-language-model-for-generative/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>SLM-TTA: A Framework for Test-Time Adaptation of Generative Spoken Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-slm-tta-a-framework-for-test-time-adaptation-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-slm-tta-a-framework-for-test-time-adaptation-of/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Slot Filling as a Reasoning Task for Speechllms</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-slot-filling-as-a-reasoning-task-for-speechllms/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-slot-filling-as-a-reasoning-task-for-speechllms/</guid>
      <description>槽填充 | 6.5/10</description>
    </item>
    <item>
      <title>SmoothCLAP: Soft-Target Enhanced Contrastive Language-Audio Pretraining for Affective Computing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-smoothclap-soft-target-enhanced-contrastive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-smoothclap-soft-target-enhanced-contrastive/</guid>
      <description>语音情感识别 | 6.5/10</description>
    </item>
    <item>
      <title>Snore Sound Classification Based on Physiological Features and Adaptive Loss Function</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-snore-sound-classification-based-on-physiological/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-snore-sound-classification-based-on-physiological/</guid>
      <description>音频分类 | 6.5/10</description>
    </item>
    <item>
      <title>Solving the Helmholtz Equation Via Physics-Informed Neural Networks with an Adaptive Weighting Strategy</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-solving-the-helmholtz-equation-via-physics/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-solving-the-helmholtz-equation-via-physics/</guid>
      <description>声学建模 | 6.5/10</description>
    </item>
    <item>
      <title>SONAR: Self-Distilled Continual Pre-Training for Domain Adaptive Audio Representation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sonar-self-distilled-continual-pre-training-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sonar-self-distilled-continual-pre-training-for/</guid>
      <description>音频事件检测 | 7.0/10</description>
    </item>
    <item>
      <title>SoundCompass: Navigating Target Sound Extraction with Effective Directional Clue Integration in Complex Acoustic Scenes</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-soundcompass-navigating-target-sound-extraction/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-soundcompass-navigating-target-sound-extraction/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>Sounding Highlights: Dual-Pathway Audio Encoders for Audio-Visual Video Highlight Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sounding-highlights-dual-pathway-audio-encoders/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sounding-highlights-dual-pathway-audio-encoders/</guid>
      <description>视频高光检测 | 8.5/10</description>
    </item>
    <item>
      <title>Sounds that Shape: Audio-Driven 3D Mesh Generation with Attribute-Decoupled Score Distillation Sampling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sounds-that-shape-audio-driven-3d-mesh-generation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sounds-that-shape-audio-driven-3d-mesh-generation/</guid>
      <description>音频生成 | 7.0/10</description>
    </item>
    <item>
      <title>Source Separation For A Cappella Music</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-source-separation-for-a-cappella-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-source-separation-for-a-cappella-music/</guid>
      <description>语音分离 | 6.5/10</description>
    </item>
    <item>
      <title>SP-MCQA: Evaluating Intelligibility of TTS Beyond the Word Level</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sp-mcqa-evaluating-intelligibility-of-tts-beyond/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sp-mcqa-evaluating-intelligibility-of-tts-beyond/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>SPADE: Structured Pruning and Adaptive Distillation for Efficient LLM-TTS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spade-structured-pruning-and-adaptive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spade-structured-pruning-and-adaptive/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>SPAM: Style Prompt Adherence Metric for Prompt-Based TTS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spam-style-prompt-adherence-metric-for-prompt/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spam-style-prompt-adherence-metric-for-prompt/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Sparse Autoencoders Make Audio Foundation Models More Explainable</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sparse-autoencoders-make-audio-foundation-models/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sparse-autoencoders-make-audio-foundation-models/</guid>
      <description>模型评估 | 6.5/10</description>
    </item>
    <item>
      <title>Sparse-View Visual-Acoustic Latent Learning for Novel-View Audio Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sparse-view-visual-acoustic-latent-learning-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sparse-view-visual-acoustic-latent-learning-for/</guid>
      <description>空间音频 | 7.5/10</description>
    </item>
    <item>
      <title>Spatial Covariance Matrix Reconstruction for Speech Enhancement in Reverberant Multi-Source Environments</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spatial-covariance-matrix-reconstruction-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spatial-covariance-matrix-reconstruction-for/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Spatial-CLAP: Learning Spatially-Aware Audio–Text Embeddings for Multi-Source Conditions</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spatial-clap-learning-spatially-aware-audiotext/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spatial-clap-learning-spatially-aware-audiotext/</guid>
      <description>空间音频 | 8.5/10</description>
    </item>
    <item>
      <title>Spatially Aware Self-Supervised Models for Multi-Channel Neural Speaker Diarization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spatially-aware-self-supervised-models-for-multi/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spatially-aware-self-supervised-models-for-multi/</guid>
      <description>说话人分离 | 8.0/10</description>
    </item>
    <item>
      <title>SpatialNet-Echo: Real-Time Acoustic Echo Cancellation via Integrated Narrow-Band and Cross-Band Processing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spatialnet-echo-real-time-acoustic-echo/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spatialnet-echo-real-time-acoustic-echo/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Speaker Anonymisation for Speech-Based Suicide Risk Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speaker-anonymisation-for-speech-based-suicide/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speaker-anonymisation-for-speech-based-suicide/</guid>
      <description>语音匿名化 | 7.5/10</description>
    </item>
    <item>
      <title>Speaking Clearly: A Simplified Whisper-Based Codec for Low-Bitrate Speech Coding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speaking-clearly-a-simplified-whisper-based-codec/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speaking-clearly-a-simplified-whisper-based-codec/</guid>
      <description>语音编码 | 7.5/10</description>
    </item>
    <item>
      <title>Spectral or Spatial? Leveraging Both for Speaker Extraction in Challenging Data Conditions</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spectral-or-spatial-leveraging-both-for-speaker/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spectral-or-spatial-leveraging-both-for-speaker/</guid>
      <description>语音分离 | 7.0/10</description>
    </item>
    <item>
      <title>Spectrogram Event Based Feature Representation for Generalizable Automatic Music Transcription</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spectrogram-event-based-feature-representation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spectrogram-event-based-feature-representation/</guid>
      <description>音乐信息检索 | 7.5/10</description>
    </item>
    <item>
      <title>Speech Emotion Recognition based on Hierarchical Transformer with Shifted Windows</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speech-emotion-recognition-based-on-hierarchical/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speech-emotion-recognition-based-on-hierarchical/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>Speech Quality-Based Localization of Low-Quality Speech and Text-to-Speech Synthesis Artefacts</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speech-quality-based-localization-of-low-quality/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speech-quality-based-localization-of-low-quality/</guid>
      <description>语音质量评估 | 7.0/10</description>
    </item>
    <item>
      <title>SpeechCT-CLIP: Distilling Text-Image Knowledge to Speech for Voice-Native Multimodal CT Analysis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speechct-clip-distilling-text-image-knowledge-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speechct-clip-distilling-text-image-knowledge-to/</guid>
      <description>医疗AI | 7.5/10</description>
    </item>
    <item>
      <title>SpeechMapper: Speech-To-Text Embedding Projector for LLMs</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speechmapper-speech-to-text-embedding-projector/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-speechmapper-speech-to-text-embedding-projector/</guid>
      <description>语音大模型 | 7.0/10</description>
    </item>
    <item>
      <title>Spike-Driven Low-Power Speech Bandwidth Extension</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spike-driven-low-power-speech-bandwidth-extension/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spike-driven-low-power-speech-bandwidth-extension/</guid>
      <description>语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>Spiking Attention Network: A Hybrid Neuromorphic Approach to Underwater Acoustic Localization and Zero-Shot Adaptation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spiking-attention-network-a-hybrid-neuromorphic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spiking-attention-network-a-hybrid-neuromorphic/</guid>
      <description>声源定位 | 7.0/10</description>
    </item>
    <item>
      <title>Spiking Temporal-Enhanced Network for Zero-Shot Audio-Visual Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spiking-temporal-enhanced-network-for-zero-shot/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spiking-temporal-enhanced-network-for-zero-shot/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>Spring Reverb Emulation with Hybrid Gated Convolutional Networks and State Space Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spring-reverb-emulation-with-hybrid-gated/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-spring-reverb-emulation-with-hybrid-gated/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>SSVD-O: Parameter-Efficient Fine-Tuning with Structured SVD for Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ssvd-o-parameter-efficient-fine-tuning-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ssvd-o-parameter-efficient-fine-tuning-with/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>ST-HNTM: Joint Speech-Text Neural Topic Modeling on the Hypersphere</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-st-hntm-joint-speech-text-neural-topic-modeling/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-st-hntm-joint-speech-text-neural-topic-modeling/</guid>
      <description>主题建模 | 7.0/10</description>
    </item>
    <item>
      <title>STACodec: Semantic Token Assignment for Balancing Acoustic Fidelity and Semantic Information in Audio Codecs</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stacodec-semantic-token-assignment-for-balancing/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stacodec-semantic-token-assignment-for-balancing/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Staged Diffusion with Hybrid Mixture-of-Experts (MOE) for Multimodal Sentiment Analysis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-staged-diffusion-with-hybrid-mixture-of-experts/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-staged-diffusion-with-hybrid-mixture-of-experts/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>Stemphonic: All-At-Once Flexible Multi-Stem Music Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stemphonic-all-at-once-flexible-multi-stem-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stemphonic-all-at-once-flexible-multi-stem-music/</guid>
      <description>音乐生成 | 7.7/10</description>
    </item>
    <item>
      <title>StereoFoley: Object-Aware Stereo Audio Generation from Video</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stereofoley-object-aware-stereo-audio-generation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stereofoley-object-aware-stereo-audio-generation/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Stereophonic Acoustic Echo Cancellation Using an Improved Affine Projection Algorithm with Adaptive Multiple Sub-Filters</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stereophonic-acoustic-echo-cancellation-using-an/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stereophonic-acoustic-echo-cancellation-using-an/</guid>
      <description>语音增强 | 6.0/10</description>
    </item>
    <item>
      <title>Still Thinking or Stopped Talking? Dialogue Silence Intention Classification Using Multimodal Large Language Model</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-still-thinking-or-stopped-talking-dialogue/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-still-thinking-or-stopped-talking-dialogue/</guid>
      <description>语音对话系统 | 6.5/10</description>
    </item>
    <item>
      <title>Str-DiffSep: Streamable Diffusion Model for Speech Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-str-diffsep-streamable-diffusion-model-for-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-str-diffsep-streamable-diffusion-model-for-speech/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>Stream-Voice-Anon: Enhancing Utility of Real-Time Speaker Anonymization Via Neural Audio Codec and Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stream-voice-anon-enhancing-utility-of-real-time/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stream-voice-anon-enhancing-utility-of-real-time/</guid>
      <description>语音匿名化 | 7.0/10</description>
    </item>
    <item>
      <title>Streaming Speech Recognition with Decoder-Only Large Language Models and Latency Optimization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-streaming-speech-recognition-with-decoder-only/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-streaming-speech-recognition-with-decoder-only/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Streamingbench: Assessing the Gap for MLLMs to Achieve Streaming Video Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-streamingbench-assessing-the-gap-for-mllms-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-streamingbench-assessing-the-gap-for-mllms-to/</guid>
      <description>基准测试 | 7.5/10</description>
    </item>
    <item>
      <title>StreamMark: A Deep Learning-Based Semi-Fragile Audio Watermarking for Proactive Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-streammark-a-deep-learning-based-semi-fragile/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-streammark-a-deep-learning-based-semi-fragile/</guid>
      <description>音频深度伪造检测 | 8.0/10</description>
    </item>
    <item>
      <title>Stress Prediction from Temporal Emotion Trajectories in Clinical Patient-Physician Conversations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stress-prediction-from-temporal-emotion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stress-prediction-from-temporal-emotion/</guid>
      <description>语音情感识别 | 7.0/10</description>
    </item>
    <item>
      <title>Structure-Aware Diffusion Schrödinger Bridge</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-structure-aware-diffusion-schrdinger-bridge/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-structure-aware-diffusion-schrdinger-bridge/</guid>
      <description>数据集对齐 | 7.7/10</description>
    </item>
    <item>
      <title>StyHarmo: Efficient Style-Specific Video Generation with Music Synchronization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-styharmo-efficient-style-specific-video/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-styharmo-efficient-style-specific-video/</guid>
      <description>视频生成 | 6.5/10</description>
    </item>
    <item>
      <title>Style Attack Disguise: When Fonts Become a Camouflage for Adversarial Intent</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-style-attack-disguise-when-fonts-become-a/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-style-attack-disguise-when-fonts-become-a/</guid>
      <description>对抗样本 | 7.0/10</description>
    </item>
    <item>
      <title>Style-Disentangled Diffusion for Controllable and Identity-Generalized Speech-Driven Body Motion Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-style-disentangled-diffusion-for-controllable-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-style-disentangled-diffusion-for-controllable-and/</guid>
      <description>语音驱动动作生成 | 7.0/10</description>
    </item>
    <item>
      <title>StyleBench: Evaluating Speech Language Models on Conversational Speaking Style Control</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stylebench-evaluating-speech-language-models-on/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stylebench-evaluating-speech-language-models-on/</guid>
      <description>基准测试 | 8.5/10</description>
    </item>
    <item>
      <title>StylePitcher: Generating Style-Following and Expressive Pitch Curves for Versatile Singing Tasks</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stylepitcher-generating-style-following-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-stylepitcher-generating-style-following-and/</guid>
      <description>歌唱语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Subgraph Localization in the Subbands for Partially Spoofed Speech Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-subgraph-localization-in-the-subbands-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-subgraph-localization-in-the-subbands-for/</guid>
      <description>音频深度伪造检测 | 8.0/10</description>
    </item>
    <item>
      <title>Subsequence SDTW: Differentiable Alignment with Flexible Boundary Conditions</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-subsequence-sdtw-differentiable-alignment-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-subsequence-sdtw-differentiable-alignment-with/</guid>
      <description>音乐信息检索 | 8.0/10</description>
    </item>
    <item>
      <title>Subspace Hybrid Adaptive Filtering for Phonocardiogram Signal Denoising</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-subspace-hybrid-adaptive-filtering-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-subspace-hybrid-adaptive-filtering-for/</guid>
      <description>音频增强 | 7.0/10</description>
    </item>
    <item>
      <title>Sunac: Source-Aware Unified Neural Audio Codec</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sunac-source-aware-unified-neural-audio-codec/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sunac-source-aware-unified-neural-audio-codec/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>SURE: Synergistic Uncertainty-Aware Reasoning for Multimodal Emotion Recognition in Conversations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sure-synergistic-uncertainty-aware-reasoning-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-sure-synergistic-uncertainty-aware-reasoning-for/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>SwitchCodec: Adaptive Residual-Expert Sparse Quantization for High-Fidelity Neural Audio Coding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-switchcodec-adaptive-residual-expert-sparse/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-switchcodec-adaptive-residual-expert-sparse/</guid>
      <description>音频生成 | 8.5/10</description>
    </item>
    <item>
      <title>Symphony Rendering: Midi and Composer-Conditioned Auto Orchestration with Flow-Matching Transformers</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-symphony-rendering-midi-and-composer-conditioned/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-symphony-rendering-midi-and-composer-conditioned/</guid>
      <description>音乐生成 | 7.0/10</description>
    </item>
    <item>
      <title>SynaSpot: A Lightweight, Streaming Multi-modal Framework for Keyword Spotting with Audio-Text Synergy</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synaspot-a-lightweight-streaming-multi-modal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synaspot-a-lightweight-streaming-multi-modal/</guid>
      <description>关键词检测 | 7.5/10</description>
    </item>
    <item>
      <title>Synchronous Secondary Path Modeling and Kronecker-Factorized Adaptive Algorithm for Multichannel Active Noise Control</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synchronous-secondary-path-modeling-and-kronecker/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synchronous-secondary-path-modeling-and-kronecker/</guid>
      <description>主动噪声控制 | 7.0/10</description>
    </item>
    <item>
      <title>Syncspeech: Efficient and Low-Latency Text-to-Speech Based on Temporal Masked Transformer</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-syncspeech-efficient-and-low-latency-text-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-syncspeech-efficient-and-low-latency-text-to/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>SynParaSpeech: Automated Synthesis of Paralinguistic Datasets for Speech Generation and Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synparaspeech-automated-synthesis-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synparaspeech-automated-synthesis-of/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Synthcloner: Synthesizer-Style Audio Transfer via Factorized Codec with ADSR Envelope Control</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synthcloner-synthesizer-style-audio-transfer-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synthcloner-synthesizer-style-audio-transfer-via/</guid>
      <description>音频生成 | 8.5/10</description>
    </item>
    <item>
      <title>Synthesized Data Selection via Score Distribution Matching for Te Reo Māori Automatic Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synthesized-data-selection-via-score-distribution/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synthesized-data-selection-via-score-distribution/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Synthetic Data Domain Adaptation for ASR via LLM-Based Text and Phonetic Respelling Augmentation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synthetic-data-domain-adaptation-for-asr-via-llm/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synthetic-data-domain-adaptation-for-asr-via-llm/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>Synthetic yet Striking? Assessing Vocal Charisma in TTS via Perceptual and Algorithmic Measures</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synthetic-yet-striking-assessing-vocal-charisma/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-synthetic-yet-striking-assessing-vocal-charisma/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>T-Cache: Fast Inference For Masked Generative Transformer-Based TTS Via Prompt-Aware Feature Caching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-t-cache-fast-inference-for-masked-generative/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-t-cache-fast-inference-for-masked-generative/</guid>
      <description>语音合成 | 9.0/10</description>
    </item>
    <item>
      <title>T-Mimi: A Transformer-Based Mimi Decoder for Real-Time On-Phone TTS</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-t-mimi-a-transformer-based-mimi-decoder-for-real/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-t-mimi-a-transformer-based-mimi-decoder-for-real/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>TAG: Structured Temporal Audio Generation via LLM-Guided Manual Scription and Control</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tag-structured-temporal-audio-generation-via-llm/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tag-structured-temporal-audio-generation-via-llm/</guid>
      <description>音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>TAGARELA - A Portuguese Speech Dataset from Podcasts</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tagarela-a-portuguese-speech-dataset-from-podcasts/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tagarela-a-portuguese-speech-dataset-from-podcasts/</guid>
      <description>语音识别 语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Taming Audio VAEs via Target-KL Regularization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-taming-audio-vaes-via-target-kl-regularization/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-taming-audio-vaes-via-target-kl-regularization/</guid>
      <description>音频生成 | 6.5/10</description>
    </item>
    <item>
      <title>Target Speaker Anonymization in Multi-Speaker Recordings</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-target-speaker-anonymization-in-multi-speaker/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-target-speaker-anonymization-in-multi-speaker/</guid>
      <description>语音匿名化 | 7.6/10</description>
    </item>
    <item>
      <title>Target-Speaker LLM-ASR with Speaker-Aware Speech Encoder</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-target-speaker-llm-asr-with-speaker-aware-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-target-speaker-llm-asr-with-speaker-aware-speech/</guid>
      <description>语音识别 | 8.8/10</description>
    </item>
    <item>
      <title>Task Vector in TTS: Toward Emotionally Expressive Dialectal Speech Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-task-vector-in-tts-toward-emotionally-expressive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-task-vector-in-tts-toward-emotionally-expressive/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Task-Oriented Sound Privacy Preservation for Sound Event Detection Via End-to-End Adversarial Multi-Task Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-task-oriented-sound-privacy-preservation-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-task-oriented-sound-privacy-preservation-for/</guid>
      <description>音频事件检测 | 7.5/10</description>
    </item>
    <item>
      <title>TASU: Text-only Alignment for Speech Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tasu-text-only-alignment-for-speech-understanding/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tasu-text-only-alignment-for-speech-understanding/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>TAU: A Benchmark for Cultural Sound Understanding Beyond Semantics</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tau-a-benchmark-for-cultural-sound-understanding/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tau-a-benchmark-for-cultural-sound-understanding/</guid>
      <description>音频问答 | 7.5/10</description>
    </item>
    <item>
      <title>Teacher-Guided Pseudo Supervision and Cross-Modal Alignment for Audio-Visual Video Parsing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-teacher-guided-pseudo-supervision-and-cross-modal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-teacher-guided-pseudo-supervision-and-cross-modal/</guid>
      <description>音视频 | 7.0/10</description>
    </item>
    <item>
      <title>Teaching Audio Models to Reason: A Unified Framework for Source- and Layer-Wise Distillation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-teaching-audio-models-to-reason-a-unified/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-teaching-audio-models-to-reason-a-unified/</guid>
      <description>音频问答 | 7.0/10</description>
    </item>
    <item>
      <title>Teaching the Teachers: Boosting Unsupervised Domain Adaptation In Speech Recognition By Ensemble Update</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-teaching-the-teachers-boosting-unsupervised/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-teaching-the-teachers-boosting-unsupervised/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Temporal Distillation for Music Representation Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-temporal-distillation-for-music-representation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-temporal-distillation-for-music-representation/</guid>
      <description>音乐信息检索 | 7.5/10</description>
    </item>
    <item>
      <title>Temporal Graph Modeling for Speech Emotion Recognition Using LSTM-Aggregated Multigraph Networks</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-temporal-graph-modeling-for-speech-emotion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-temporal-graph-modeling-for-speech-emotion/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Temporal-Spatial Decouple Before Act: Disentangled Representation Learning for Multimodal Sentiment Analysis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-temporal-spatial-decouple-before-act-disentangled/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-temporal-spatial-decouple-before-act-disentangled/</guid>
      <description>情感分析 | 7.5/10</description>
    </item>
    <item>
      <title>Temporally Heterogeneous Graph Contrastive Learning for Multimodal Acoustic Event Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-temporally-heterogeneous-graph-contrastive/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-temporally-heterogeneous-graph-contrastive/</guid>
      <description>音频事件检测 | 8.5/10</description>
    </item>
    <item>
      <title>Test Time Adaptation for Speech Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-test-time-adaptation-for-speech-emotion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-test-time-adaptation-for-speech-emotion/</guid>
      <description>语音情感识别 | 7.0/10</description>
    </item>
    <item>
      <title>Test-Time Scaling for Auditory Cognition in Audio Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-test-time-scaling-for-auditory-cognition-in-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-test-time-scaling-for-auditory-cognition-in-audio/</guid>
      <description>音频问答 | 7.0/10</description>
    </item>
    <item>
      <title>Testing The Efficient Coding Hypothesis Beyond Humans: The Auditory Kernels of Bat Vocalizations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-testing-the-efficient-coding-hypothesis-beyond/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-testing-the-efficient-coding-hypothesis-beyond/</guid>
      <description>生物声学 | 7.5/10</description>
    </item>
    <item>
      <title>Text2midi-InferAlign: Improving Symbolic Music Generation with Inference-Time Alignment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-text2midi-inferalign-improving-symbolic-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-text2midi-inferalign-improving-symbolic-music/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>Text2Move: Text-To-Moving Sound Generation via Trajectory Prediction and Temporal Alignment</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-text2move-text-to-moving-sound-generation-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-text2move-text-to-moving-sound-generation-via/</guid>
      <description>空间音频 | 8.0/10</description>
    </item>
    <item>
      <title>TextlessRAG: End-to-End Visual Document RAG by Speech without Text</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-textlessrag-end-to-end-visual-document-rag-by/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-textlessrag-end-to-end-visual-document-rag-by/</guid>
      <description>语音问答 | 8.5/10</description>
    </item>
    <item>
      <title>The 3rd Clarity Prediction Challenge: A Machine Learning Challenge for Hearing aid Speech Intelligibility Prediction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-3rd-clarity-prediction-challenge-a-machine/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-3rd-clarity-prediction-challenge-a-machine/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>The Curious Case of Visual Grounding: Different Effects for Speech-and Text-Based Language Encoders</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-curious-case-of-visual-grounding-different/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-curious-case-of-visual-grounding-different/</guid>
      <description>模型评估 | 8.0/10</description>
    </item>
    <item>
      <title>The Impact of Audio Watermarking on Audio Anti-Spoofing Countermeasures</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-impact-of-audio-watermarking-on-audio-anti/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-impact-of-audio-watermarking-on-audio-anti/</guid>
      <description>音频深度伪造检测 | 8.5/10</description>
    </item>
    <item>
      <title>The Muse Benchmark: Probing Music Perception and Auditory Relational Reasoning in Audio LLMs</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-muse-benchmark-probing-music-perception-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-muse-benchmark-probing-music-perception-and/</guid>
      <description>音乐理解 | 8.5/10</description>
    </item>
    <item>
      <title>The Role of Prosodic and Lexical Cues in Turn-Taking with Self-Supervised Speech Representations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-role-of-prosodic-and-lexical-cues-in-turn/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-role-of-prosodic-and-lexical-cues-in-turn/</guid>
      <description>语音对话系统 | 7.5/10</description>
    </item>
    <item>
      <title>The Singing Voice Conversion Challenge 2025: From Singer Identity Conversion to Singing Style Conversion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-singing-voice-conversion-challenge-2025-from/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-singing-voice-conversion-challenge-2025-from/</guid>
      <description>歌唱语音转换 | 7.0/10</description>
    </item>
    <item>
      <title>The Synergistic Role of Audio and Large Video-Language Model in Source-Free Video Domain Adaptation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-synergistic-role-of-audio-and-large-video/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-the-synergistic-role-of-audio-and-large-video/</guid>
      <description>领域适应 | 7.0/10</description>
    </item>
    <item>
      <title>Theory and Application of Circular Relative Harmonic Coefficients</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-theory-and-application-of-circular-relative/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-theory-and-application-of-circular-relative/</guid>
      <description>声源定位 | 7.5/10</description>
    </item>
    <item>
      <title>Thinking While Listening: Simple Test Time Scaling for Audio Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-thinking-while-listening-simple-test-time-scaling/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-thinking-while-listening-simple-test-time-scaling/</guid>
      <description>音频分类 | 6.5/10</description>
    </item>
    <item>
      <title>Three Seconds is Sufficient: A Multi-Pronged Framework for Model-Based Speaker Adaptation in ASR Under Data-Scarce Conditions</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-three-seconds-is-sufficient-a-multi-pronged/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-three-seconds-is-sufficient-a-multi-pronged/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>TICL: Text-Embedding KNN for Speech in-Context Learning Unlocks Speech Recognition Abilities of Large Multimodal Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ticl-text-embedding-knn-for-speech-in-context/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ticl-text-embedding-knn-for-speech-in-context/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Timbre-Aware Audio Difference Captioning for Anomalous Machine Sounds without Paired Training Data via Synthetic Perturbations</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-timbre-aware-audio-difference-captioning-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-timbre-aware-audio-difference-captioning-for/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>Timbre-Based Pretraining with Pseudo-Labels for Multi-Instrument Automatic Music Transcription</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-timbre-based-pretraining-with-pseudo-labels-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-timbre-based-pretraining-with-pseudo-labels-for/</guid>
      <description>音乐信息检索 | 7.0/10</description>
    </item>
    <item>
      <title>Time vs. Layer: Locating Predictive Cues for Dysarthric Speech Descriptors in Wav2vec 2.0</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-time-vs-layer-locating-predictive-cues-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-time-vs-layer-locating-predictive-cues-for/</guid>
      <description>语音质量评估 | 7.5/10</description>
    </item>
    <item>
      <title>Time-Domain Synthesis of Virtual Sound Source Within Personalized Sound Zone using a Linear Loudspeaker Array</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-time-domain-synthesis-of-virtual-sound-source/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-time-domain-synthesis-of-virtual-sound-source/</guid>
      <description>空间音频 | 8.0/10</description>
    </item>
    <item>
      <title>Time-Shifted Token Scheduling for Symbolic Music Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-time-shifted-token-scheduling-for-symbolic-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-time-shifted-token-scheduling-for-symbolic-music/</guid>
      <description>音乐生成 | 8.5/10</description>
    </item>
    <item>
      <title>TinyMU: A Compact Audio-Language Model for Music Understanding</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tinymu-a-compact-audio-language-model-for-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tinymu-a-compact-audio-language-model-for-music/</guid>
      <description>音乐理解 | 7.5/10</description>
    </item>
    <item>
      <title>Tldiffgan: A Latent Diffusion-Gan Framework with Temporal Information Fusion for Anomalous Sound Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tldiffgan-a-latent-diffusion-gan-framework-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tldiffgan-a-latent-diffusion-gan-framework-with/</guid>
      <description>音频事件检测 | 7.5/10</description>
    </item>
    <item>
      <title>TMD-TTS: A Unified Tibetan Multi-Dialect Text-to-Speech Framework for Ü-Tsang, Amdo and Kham Speech Dataset Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tmd-tts-a-unified-tibetan-multi-dialect-text-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tmd-tts-a-unified-tibetan-multi-dialect-text-to/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Tokenchain: A Discrete Speech Chain via Semantic Token Modeling</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tokenchain-a-discrete-speech-chain-via-semantic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tokenchain-a-discrete-speech-chain-via-semantic/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Toward Faithful Explanations in Acoustic Anomaly Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-toward-faithful-explanations-in-acoustic-anomaly/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-toward-faithful-explanations-in-acoustic-anomaly/</guid>
      <description>音频事件检测 | 7.5/10</description>
    </item>
    <item>
      <title>Toward Robust And Efficient Beat Tracking Via Beat-Aware Attention</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-toward-robust-and-efficient-beat-tracking-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-toward-robust-and-efficient-beat-tracking-via/</guid>
      <description>音乐理解 | 8.5/10</description>
    </item>
    <item>
      <title>Towards Blind Data Cleaning: A Case Study in Music Source Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-blind-data-cleaning-a-case-study-in-music/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-blind-data-cleaning-a-case-study-in-music/</guid>
      <description>音乐信息检索 | 7.0/10</description>
    </item>
    <item>
      <title>Towards Building Speech Large Language Models for Multitask Understanding in Low-Resource Languages</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-building-speech-large-language-models-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-building-speech-large-language-models-for/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Towards Data Drift Monitoring for Speech Deepfake Detection in the Context of MLOps</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-data-drift-monitoring-for-speech-deepfake/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-data-drift-monitoring-for-speech-deepfake/</guid>
      <description>音频深度伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>Towards Distance-Aware Synthetic Audio Mixtures for Universal Sound Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-distance-aware-synthetic-audio-mixtures/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-distance-aware-synthetic-audio-mixtures/</guid>
      <description>语音分离 | 6.5/10</description>
    </item>
    <item>
      <title>Towards Effective Negation Modeling in Joint Audio-Text Models for Music</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-effective-negation-modeling-in-joint/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-effective-negation-modeling-in-joint/</guid>
      <description>音乐理解 | 7.5/10</description>
    </item>
    <item>
      <title>Towards Evaluating Generative Audio: Insights from Neural Audio Codec Embedding Distances</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-evaluating-generative-audio-insights-from/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-evaluating-generative-audio-insights-from/</guid>
      <description>模型评估 | 6.5/10</description>
    </item>
    <item>
      <title>Towards Fair ASR for Second Language Speakers using Fairness Prompted Finetuning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-fair-asr-for-second-language-speakers/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-fair-asr-for-second-language-speakers/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Towards Lightweight Adaptation of Speech Enhancement Models in Real-World Environments</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-lightweight-adaptation-of-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-lightweight-adaptation-of-speech/</guid>
      <description>语音增强 | 8.5/10</description>
    </item>
    <item>
      <title>Towards Multi-View Hierarchical Video-to-Piano Generation with MIDI Guidance</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-multi-view-hierarchical-video-to-piano/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-multi-view-hierarchical-video-to-piano/</guid>
      <description>音乐生成 | 7.0/10</description>
    </item>
    <item>
      <title>Towards Orthographically-Informed Evaluation of Speech Recognition Systems for Indian Languages</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-orthographically-informed-evaluation-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-orthographically-informed-evaluation-of/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Towards Real-Time Generative Speech Restoration with Flow-Matching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-real-time-generative-speech-restoration/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-real-time-generative-speech-restoration/</guid>
      <description>语音增强 | 6.0/10</description>
    </item>
    <item>
      <title>Towards Robust Dysarthric Speech Recognition: LLM-Agent Post-ASR Correction Beyond WER</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-robust-dysarthric-speech-recognition-llm/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-towards-robust-dysarthric-speech-recognition-llm/</guid>
      <description>语音识别 | 9.0/10</description>
    </item>
    <item>
      <title>Tpeformer: Temporal Patch Embedding Transformer</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tpeformer-temporal-patch-embedding-transformer/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tpeformer-temporal-patch-embedding-transformer/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Train Short, Infer Long: Speech-LLM Enables Zero-Shot Streamable Joint ASR and Diarization on Long Audio</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-train-short-infer-long-speech-llm-enables-zero/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-train-short-infer-long-speech-llm-enables-zero/</guid>
      <description>说话人分离 | 9.0/10</description>
    </item>
    <item>
      <title>Training Dynamics-Aware Multi-Factor Curriculum Learning for Target Speaker Extraction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-training-dynamics-aware-multi-factor-curriculum/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-training-dynamics-aware-multi-factor-curriculum/</guid>
      <description>语音分离 | 7.0/10</description>
    </item>
    <item>
      <title>Training Flow Matching Models with Reliable Labels via Self-Purification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-training-flow-matching-models-with-reliable/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-training-flow-matching-models-with-reliable/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>Training-Free Inference-Time Scaling for Audio Source Separation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-training-free-inference-time-scaling-for-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-training-free-inference-time-scaling-for-audio/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>Training-Free Multimodal Guidance for Video to Audio Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-training-free-multimodal-guidance-for-video-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-training-free-multimodal-guidance-for-video-to/</guid>
      <description>音频生成 | 8.0/10</description>
    </item>
    <item>
      <title>Transfer Learning for Paediatric Sleep Apnoea Detection using Physiology-Guided Acoustic Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-transfer-learning-for-paediatric-sleep-apnoea/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-transfer-learning-for-paediatric-sleep-apnoea/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>Transferable Audio Lottery Tickets: Gradient Accumulation for Extreme Sparsity</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-transferable-audio-lottery-tickets-gradient/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-transferable-audio-lottery-tickets-gradient/</guid>
      <description>音频分类 | 7.0/10</description>
    </item>
    <item>
      <title>Tri-Attention Fusion: Joint Temporal-Spectral and Bidirectional Modeling for Speech Spoofing Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tri-attention-fusion-joint-temporal-spectral-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tri-attention-fusion-joint-temporal-spectral-and/</guid>
      <description>语音伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>Triad: Tri-Head with Auxiliary Duplicating Permutation Invariant Training for Multi-Task Sound Event Localization and Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-triad-tri-head-with-auxiliary-duplicating/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-triad-tri-head-with-auxiliary-duplicating/</guid>
      <description>音频事件检测 | 7.5/10</description>
    </item>
    <item>
      <title>Triage Knowledge Distillation for Speaker Verification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-triage-knowledge-distillation-for-speaker/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-triage-knowledge-distillation-for-speaker/</guid>
      <description>说话人验证 | 7.5/10</description>
    </item>
    <item>
      <title>TTA: Transcribe, Translate and Alignment for Cross-Lingual Speech Representation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tta-transcribe-translate-and-alignment-for-cross/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tta-transcribe-translate-and-alignment-for-cross/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>TVP-UNet: Threshold Variance Penalty U-Net for Voice Activity Detection in Dysarthric Speech</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tvp-unet-threshold-variance-penalty-u-net-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-tvp-unet-threshold-variance-penalty-u-net-for/</guid>
      <description>语音活动检测 | 7.0/10</description>
    </item>
    <item>
      <title>Two-Stage Language Model Framework for Acoustic Echo Cancellation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-two-stage-language-model-framework-for-acoustic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-two-stage-language-model-framework-for-acoustic/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>UJCodec: An End-to-end Unet-Style Codec for Joint Speech Compression and Enhancement</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ujcodec-an-end-to-end-unet-style-codec-for-joint/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-ujcodec-an-end-to-end-unet-style-codec-for-joint/</guid>
      <description>语音增强 | 7.5/10</description>
    </item>
    <item>
      <title>UMA-SPLIT: Unimodal Aggregation for Both English and Mandarin Non-Autoregressive Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-uma-split-unimodal-aggregation-for-both-english/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-uma-split-unimodal-aggregation-for-both-english/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>UMV: A Mixture-Of-Experts Vision Transformer with Multi-Spectrogram Fusion for Underwater Ship Noise Classification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-umv-a-mixture-of-experts-vision-transformer-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-umv-a-mixture-of-experts-vision-transformer-with/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>Uncertainty-Aware 3D Emotional Talking Face Synthesis with Emotion Prior Distillation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-uncertainty-aware-3d-emotional-talking-face/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-uncertainty-aware-3d-emotional-talking-face/</guid>
      <description>音视频 | 8.0/10</description>
    </item>
    <item>
      <title>Understanding Textual Capability Degradation in Speech LLMS via Parameter Importance Analysis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-understanding-textual-capability-degradation-in/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-understanding-textual-capability-degradation-in/</guid>
      <description>语音问答 | 7.5/10</description>
    </item>
    <item>
      <title>Understanding the Strengths and Weaknesses of SSL Models for Audio Deepfake Model Attribution</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-understanding-the-strengths-and-weaknesses-of-ssl/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-understanding-the-strengths-and-weaknesses-of-ssl/</guid>
      <description>音频深度伪造检测 | 7.0/10</description>
    </item>
    <item>
      <title>Universr: Unified and Versatile Audio Super-Resolution Via Vocoder-Free Flow Matching</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-universr-unified-and-versatile-audio-super/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-universr-unified-and-versatile-audio-super/</guid>
      <description>音频超分辨率 | 8.0/10</description>
    </item>
    <item>
      <title>UNMIXX: Untangling Highly Correlated Singing Voices Mixtures</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-unmixx-untangling-highly-correlated-singing/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-unmixx-untangling-highly-correlated-singing/</guid>
      <description>语音分离 | 8.5/10</description>
    </item>
    <item>
      <title>Unseen but Not Unknown: Using Dataset Concealment to Robustly Evaluate Speech Quality Estimation Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-unseen-but-not-unknown-using-dataset-concealment/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-unseen-but-not-unknown-using-dataset-concealment/</guid>
      <description>语音质量评估 | 8.3/10</description>
    </item>
    <item>
      <title>Unsupervised Discovery and Analysis of the Vocal Repertoires and Patterns of Select Corvid Species</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-unsupervised-discovery-and-analysis-of-the-vocal/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-unsupervised-discovery-and-analysis-of-the-vocal/</guid>
      <description>生物声学 | 7.5/10</description>
    </item>
    <item>
      <title>Unsupervised Lexicon Learning from Speech is Limited by Representations Rather than Clustering</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-unsupervised-lexicon-learning-from-speech-is/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-unsupervised-lexicon-learning-from-speech-is/</guid>
      <description>语音发现 | 8.0/10</description>
    </item>
    <item>
      <title>USVexplorer: Robust Detection of Ultrasonic Vocalizations with Cross Species Generalization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-usvexplorer-robust-detection-of-ultrasonic/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-usvexplorer-robust-detection-of-ultrasonic/</guid>
      <description>音频事件检测 | 8.0/10</description>
    </item>
    <item>
      <title>UTI-LLM: A Personalized Articulatory-Speech Therapy Assistance System Based on Multimodal Large Language Model</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-uti-llm-a-personalized-articulatory-speech/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-uti-llm-a-personalized-articulatory-speech/</guid>
      <description>语音对话系统 | 7.5/10</description>
    </item>
    <item>
      <title>Utilizing Information Theoretic Approach to Study Cochlear Neural Degeneration</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-utilizing-information-theoretic-approach-to-study/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-utilizing-information-theoretic-approach-to-study/</guid>
      <description>生物声学 | 6.5/10</description>
    </item>
    <item>
      <title>UVT-LM: Unifying Visual and Tactile Perception with Language Model</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-uvt-lm-unifying-visual-and-tactile-perception/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-uvt-lm-unifying-visual-and-tactile-perception/</guid>
      <description>跨模态 | 7.0/10</description>
    </item>
    <item>
      <title>V2A-DPO: Omni-Preference Optimization for Video-To-Audio Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-v2a-dpo-omni-preference-optimization-for-video-to/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-v2a-dpo-omni-preference-optimization-for-video-to/</guid>
      <description>视频到音频生成 | 7.5/10</description>
    </item>
    <item>
      <title>Variational Low-Rank Adaptation for Personalized Impaired Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-variational-low-rank-adaptation-for-personalized/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-variational-low-rank-adaptation-for-personalized/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>VBx for End-to-End Neural and Clustering-Based Diarization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vbx-for-end-to-end-neural-and-clustering-based/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vbx-for-end-to-end-neural-and-clustering-based/</guid>
      <description>说话人分离 | 8.5/10</description>
    </item>
    <item>
      <title>VChangeCodec: An Ultra Low-Complexity Neural Speech Codec with Built-In Voice Changer for Customized Real-Time Communication</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vchangecodec-an-ultra-low-complexity-neural/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vchangecodec-an-ultra-low-complexity-neural/</guid>
      <description>语音转换 语音增强 | 8.0/10</description>
    </item>
    <item>
      <title>Via Score to Performance: Efficient Human-Controllable Long Song Generation with Bar-Level Symbolic Notation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-via-score-to-performance-efficient-human/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-via-score-to-performance-efficient-human/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>Vib2Sound: Separation Of Multimodal Sound Sources</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vib2sound-separation-of-multimodal-sound-sources/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vib2sound-separation-of-multimodal-sound-sources/</guid>
      <description>语音分离 | 6.5/10</description>
    </item>
    <item>
      <title>Vioptt: Violin Technique-Aware Transcription from Synthetic Data Augmentation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vioptt-violin-technique-aware-transcription-from/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vioptt-violin-technique-aware-transcription-from/</guid>
      <description>音乐信息检索 | 6.5/10</description>
    </item>
    <item>
      <title>Virtual Consistency for Audio Editing</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-virtual-consistency-for-audio-editing/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-virtual-consistency-for-audio-editing/</guid>
      <description>音乐生成 | 8.0/10</description>
    </item>
    <item>
      <title>Visual Keys to Symphonies: Latent Diffusion for Multi-Scene Video-to-Music Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-visual-keys-to-symphonies-latent-diffusion-for/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-visual-keys-to-symphonies-latent-diffusion-for/</guid>
      <description>音乐生成 | 7.5/10</description>
    </item>
    <item>
      <title>ViTex: Visual Texture Control for Multi-Track Symbolic Music Generation via Discrete Diffusion Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vitex-visual-texture-control-for-multi-track/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vitex-visual-texture-control-for-multi-track/</guid>
      <description>音乐生成 | 7.0/10</description>
    </item>
    <item>
      <title>VividTalker: A Modular Framework for Expressive 3D Talking Avatars with Controllable Gaze and Blink</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vividtalker-a-modular-framework-for-expressive-3d/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vividtalker-a-modular-framework-for-expressive-3d/</guid>
      <description>语音合成 | 7.5/10</description>
    </item>
    <item>
      <title>VM-UNSSOR: Unsupervised Neural Speech Separation Enhanced by Higher-SNR Virtual Microphone Arrays</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vm-unssor-unsupervised-neural-speech-separation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vm-unssor-unsupervised-neural-speech-separation/</guid>
      <description>语音分离 | 7.5/10</description>
    </item>
    <item>
      <title>VMSP: Video-to-Music Generation with Two-Stage Alignment and Synthesis</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vmsp-video-to-music-generation-with-two-stage/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vmsp-video-to-music-generation-with-two-stage/</guid>
      <description>音乐生成 | 7.0/10</description>
    </item>
    <item>
      <title>Vocalnet-M2: Advancing Low-Latency Spoken Language Modeling via Integrated Multi-Codebook Tokenization and Multi-Token Prediction</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vocalnet-m2-advancing-low-latency-spoken-language/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vocalnet-m2-advancing-low-latency-spoken-language/</guid>
      <description>语音对话系统 | 7.5/10</description>
    </item>
    <item>
      <title>Voting-Based Pitch Estimation with Temporal and Frequential Alignment and Correlation Aware Selection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-voting-based-pitch-estimation-with-temporal-and/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-voting-based-pitch-estimation-with-temporal-and/</guid>
      <description>语音识别 | 8.0/10</description>
    </item>
    <item>
      <title>VoxMorph: Scalable Zero-Shot Voice Identity Morphing via Disentangled Embeddings</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-voxmorph-scalable-zero-shot-voice-identity/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-voxmorph-scalable-zero-shot-voice-identity/</guid>
      <description>语音克隆 | 9.0/10</description>
    </item>
    <item>
      <title>VoXtream: Full-Stream Text-To-Speech With Extremely Low Latency</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-voxtream-full-stream-text-to-speech-with/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-voxtream-full-stream-text-to-speech-with/</guid>
      <description>语音合成 | 8.5/10</description>
    </item>
    <item>
      <title>VT-Heads: Voice Cloning and Talking Head Generation from Text Based on V-DiT</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vt-heads-voice-cloning-and-talking-head/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-vt-heads-voice-cloning-and-talking-head/</guid>
      <description>视频生成 | 6.5/10</description>
    </item>
    <item>
      <title>WAV2LEV: Predicting Levenshtein Edit Operation Sequences For Fine-Grained Estimation of Automatic Speech Recognition Error</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wav2lev-predicting-levenshtein-edit-operation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wav2lev-predicting-levenshtein-edit-operation/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Wave-Trainer-Fit: Neural Vocoder With Trainable Prior And Fixed-Point Iteration Towards High-Quality Speech Generation From SSL Features</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wave-trainer-fit-neural-vocoder-with-trainable/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wave-trainer-fit-neural-vocoder-with-trainable/</guid>
      <description>语音合成 | 7.0/10</description>
    </item>
    <item>
      <title>Wavenext 2: Convnext-Based Fast Neural Vocoders with Residual Denoising and Sub-Modeling for Gan And Diffusion Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wavenext-2-convnext-based-fast-neural-vocoders/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wavenext-2-convnext-based-fast-neural-vocoders/</guid>
      <description>语音合成 | 9.0/10</description>
    </item>
    <item>
      <title>WaveSP-Net: Learnable Wavelet-Domain Sparse Prompt Tuning for Speech Deepfake Detection</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wavesp-net-learnable-wavelet-domain-sparse-prompt/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wavesp-net-learnable-wavelet-domain-sparse-prompt/</guid>
      <description>语音伪造检测 | 8.0/10</description>
    </item>
    <item>
      <title>WaveSpikeNet: A Wavelet-Spiking Fusion Architecture for Audio Classification on Edge Devices</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wavespikenet-a-wavelet-spiking-fusion/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wavespikenet-a-wavelet-spiking-fusion/</guid>
      <description>音频分类 | 7.5/10</description>
    </item>
    <item>
      <title>WavLink: Compact Audio–Text Embeddings with a Global Whisper Token</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wavlink-compact-audiotext-embeddings-with-a/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-wavlink-compact-audiotext-embeddings-with-a/</guid>
      <description>音频检索 | 8.0/10</description>
    </item>
    <item>
      <title>What the student learns in knowledge distillation: A subspace view and evidence on Convolutional Recurrent Network</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-what-the-student-learns-in-knowledge-distillation/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-what-the-student-learns-in-knowledge-distillation/</guid>
      <description>语音增强 | 6.5/10</description>
    </item>
    <item>
      <title>When Audio Matters: A Lightweight, Hierarchical Fusion Model for Speech and Non-Verbal Emotion Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-when-audio-matters-a-lightweight-hierarchical/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-when-audio-matters-a-lightweight-hierarchical/</guid>
      <description>语音情感识别 | 8.0/10</description>
    </item>
    <item>
      <title>When Children Talk and Machines Listen: Toward an Interpretable Speech-Based Screener for Dutch Developmental Language Disorder</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-when-children-talk-and-machines-listen-toward-an/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-when-children-talk-and-machines-listen-toward-an/</guid>
      <description>语音生物标志物 | 7.0/10</description>
    </item>
    <item>
      <title>When Noise Lowers the Loss: Rethinking Likelihood-Based Evaluation in Music Large Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-when-noise-lowers-the-loss-rethinking-likelihood/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-when-noise-lowers-the-loss-rethinking-likelihood/</guid>
      <description>音乐生成 | 7.0/10</description>
    </item>
    <item>
      <title>When Silence Matters: The Impact of Irrelevant Audio on Text Reasoning in Large Audio-Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-when-silence-matters-the-impact-of-irrelevant/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-when-silence-matters-the-impact-of-irrelevant/</guid>
      <description>模型评估 | 7.0/10</description>
    </item>
    <item>
      <title>When Voice Matters: A Controlled Study of Audio LLM Behavior in Clinical Decision-Making</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-when-voice-matters-a-controlled-study-of-audio/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-when-voice-matters-a-controlled-study-of-audio/</guid>
      <description>模型评估 | 7.0/10</description>
    </item>
    <item>
      <title>Whisper-FEST: Single-Channel Far-Field Enhanced Speech-to-text without Parallel Data</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-whisper-fest-single-channel-far-field-enhanced/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-whisper-fest-single-channel-far-field-enhanced/</guid>
      <description>语音识别 | 7.5/10</description>
    </item>
    <item>
      <title>Whisper-MLA: Reducing GPU Memory Consumption of ASR Models Based on MHA2MLA Conversion</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-whisper-mla-reducing-gpu-memory-consumption-of/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-whisper-mla-reducing-gpu-memory-consumption-of/</guid>
      <description>语音识别 | 7.0/10</description>
    </item>
    <item>
      <title>Whisper-QF: Leveraging Dual Cross-Attention Q-Former for Speech Emotion Recognition With Multi-Task Learning</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-whisper-qf-leveraging-dual-cross-attention-q/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-whisper-qf-leveraging-dual-cross-attention-q/</guid>
      <description>语音情感识别 | 7.5/10</description>
    </item>
    <item>
      <title>Whisper: Courtside Edition - Enhancing ASR Performance through LLM-Driven Context Generation</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-whisper-courtside-edition-enhancing-asr/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-whisper-courtside-edition-enhancing-asr/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Why Do Speech Language Models Fail to Generate Semantically Coherent Outputs? A Modality Evolving Perspective</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-why-do-speech-language-models-fail-to-generate/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-why-do-speech-language-models-fail-to-generate/</guid>
      <description>语音生成 | 7.0/10</description>
    </item>
    <item>
      <title>Windowed SummaryMixing: An Efficient Fine-Tuning of Self-Supervised Learning Models for Low-Resource Speech Recognition</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-windowed-summarymixing-an-efficient-fine-tuning/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-windowed-summarymixing-an-efficient-fine-tuning/</guid>
      <description>语音识别 | 6.5/10</description>
    </item>
    <item>
      <title>Z-Scores: A Metric for Linguistically Assessing Disfluency Removal</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-z-scores-a-metric-for-linguistically-assessing/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-z-scores-a-metric-for-linguistically-assessing/</guid>
      <description>模型评估 | 6.5/10</description>
    </item>
    <item>
      <title>ZK-VSA: Zero-Knowledge Verifiable Speaker Anonymization Leveraging Phase Vocoder with Time-Scale Modification</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-zk-vsa-zero-knowledge-verifiable-speaker/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-zk-vsa-zero-knowledge-verifiable-speaker/</guid>
      <description>语音匿名化 | 7.5/10</description>
    </item>
    <item>
      <title>ZSV2C-MLLM: Zero-Shot Visual Voice Cloning Via Multimodal Large Language Models</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-zsv2c-mllm-zero-shot-visual-voice-cloning-via/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-zsv2c-mllm-zero-shot-visual-voice-cloning-via/</guid>
      <description>语音克隆 | 6.5/10</description>
    </item>
    <item>
      <title>β-AVSDNET: A Novel End-To-End Neural Network Architecture For Audio-Visual Speaker Diarization</title>
      <link>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-avsdnet-a-novel-end-to-end-neural-network/</link>
      <pubDate>Wed, 29 Apr 2026 00:00:00 +0000</pubDate>
      <guid>https://nanless.github.io/audio-paper-digest-blog/posts/2026-04-29-avsdnet-a-novel-end-to-end-neural-network/</guid>
      <description>说话人分离 | 7.5/10</description>
    </item>
  </channel>
</rss>
