@inproceedings{sudarsanam2026unsupervised,author={Sudarsanam, Nitin and Kader, Sahla and Fernandezlopez, Isaac and Huang, Sophie and Dang, Tuan M. and Wang, Theron S. and Lekhak, Hridayesh and Zhu, Kenny Q.},title={Unsupervised Discovery and Analysis of the Vocal Repertoires and Patterns of Select Corvid Species},booktitle={ICASSP 2026 - 2026 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},year={2026},pages={15217--15221},publisher={IEEE},doi={10.1109/ICASSP55912.2026.11462057},}
Dogs communicate intelligently but little is known about the phonetic properties of their vocalization communication. For the first time, this paper presents an iterative algorithm inspired by human phonetic discovery, which is based on minimal pairs that determine phonemes by distinguishing different words in human language, and is able to produce a complete alphabet of distinct canine phoneme-like units. In addition, the algorithm produces a number of canine repeated acoustic units, which may correspond to specific environments and activities of a dog, composed exclusively of the canine phoneme-like units in the alphabet. The framework outlined in this paper is expected to function not only on canines but other animal species.
@inproceedings{wang-etal-2025-toward,title={Toward Automatic Discovery of a Canine Phonetic Alphabet},author={Wang, Theron S. and Li, Xingyuan and Lekhak, Hridayesh and Dang, Tuan Minh and Wu, Mengyue and Zhu, Kenny Q.},editor={Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher},booktitle={Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},month=jul,year={2025},address={Vienna, Austria},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2025.acl-long.451/},doi={10.18653/v1/2025.acl-long.451},pages={9207--9219},isbn={979-8-89176-251-0},selected=true,}
Progress in understanding real-world canine vocal communication is constrained by datasets lacking scale and ’in-the-wild’ diversity. We introduce DogSpeak, a large-scale public dataset of 77,202 Barkseqs (33.162 hours) from 156 dogs (5 breeds), uniquely sourced from online social media with accurate dog ID, sex, and breed labels. DogSpeak, one of the largest of its kind, addresses prior limitations. Benchmark tasks (sex, breed, individual dog recognition) demonstrate its utility and highlight how its inherent real-world challenges necessitate and foster research into more robust bioacoustic models, preprocessing, and feature representation.
@inproceedings{10.1145/3746027.3758298,author={Lekhak, Hridayesh and Wang, Theron S. and Dang, Tuan M. and Zhu, Kenny Q.},title={DogSpeak: A Canine Vocalization Classification Dataset},year={2025},isbn={9798400720352},publisher={Association for Computing Machinery},address={New York, NY, USA},url={https://doi.org/10.1145/3746027.3758298},doi={10.1145/3746027.3758298},booktitle={Proceedings of the 33rd ACM International Conference on Multimedia},pages={13369–13375},numpages={7},keywords={animal communication, bioacoustics classification, canine vocalization, computational bioacoustics, dog bark analysis, machine learning, vocalization dataset},location={Dublin, Ireland},series={MM '25},}
This study centers on the creation of a novel dog bark emotion dataset, EmotionalCanines, capturing the emotional spectrum of canine vocalizations. In the current literature on animal communication and its intersection with machine learning, there is a limited amount of open-sourced data available to facilitate research, mainly due to constraints in animal subjects and recording conditions. To address this gap, we propose a framework that enables the collection of reliable arousal and valence labels in animal emotional state at scale. Through its application, we built a dataset of 1,400 dog bark sequences with corresponding arousal and valence labels, the largest of its kind, for the Husky and Shiba Inu dog breeds. By constructing this dataset, we provide a foundation for decoding dog bark patterns and advancing animal communication research.
@inproceedings{10.1145/3746027.3758286,author={Dang, Tuan M. and Wang, Theron S. and Lekhak, Hridayesh and Zhu, Kenny Q.},title={EmotionalCanines: A Dataset for Analysis of Arousal and Valence in Dog Vocalization},year={2025},isbn={9798400720352},publisher={Association for Computing Machinery},address={New York, NY, USA},url={https://doi.org/10.1145/3746027.3758286},doi={10.1145/3746027.3758286},booktitle={Proceedings of the 33rd ACM International Conference on Multimedia},pages={13281–13288},numpages={8},keywords={animal communication, animal emotions, bioacoustics, dogs},location={Dublin, Ireland},series={MM '25},}
Longitudinal studies of animal vocalizations provide crucial insights into developmental patterns and communicative evolution. To aid such investigations in canines, this paper introduces the Canine Age Transition Vocalization Dataset, a large-scale collection of dog vocalizations featuring meticulously verified metadata (including precise birthdate, breed, and individual dog ID) for 125 dogs across 6 common breeds. Our in-depth longitudinal analysis of this dataset then reveals novel findings on how key vocal parameters, encompassing defined bark types and finer-grained acoustic components (Elemental Dog Bark Units, or EDBUs), change as dogs mature. This work, therefore, offers both a significant new resource and foundational data that enable deeper, more nuanced investigations into the lifelong vocal development of dogs and other animal communication.
@inproceedings{10.1145/3746027.3758175,author={Lekhak, Hridayesh and Dang, Tuan M. and Wang, Theron S. and Zhu, Kenny Q.},title={A Data-driven Approach to the Longitudinal Study of Canine Vocal Pattern Development},year={2025},isbn={9798400720352},publisher={Association for Computing Machinery},address={New York, NY, USA},url={https://doi.org/10.1145/3746027.3758175},doi={10.1145/3746027.3758175},booktitle={Proceedings of the 33rd ACM International Conference on Multimedia},pages={12473–12482},numpages={10},keywords={age-related vocal development, animal communication, bioacoustic classification, bioacoustics, canine vocalization dataset, longitudinal study, machine learning},location={Dublin, Ireland},series={MM '25},}
This paper attempts to discover communication patterns automatically within dog vocalizations in a data-driven approach, which breaks the barrier previous approaches that rely on human prior knowledge on limited data. We present a self-supervised approach with HuBERT, enabling the accurate classification of phones, and an adaptive grammar induction method that identifies phone sequence patterns that suggest a preliminary vocabulary within dog vocalizations. Our results show that a subset of this vocabulary has substantial causality relations with certain canine activities, suggesting signs of stable semantics associated with these “words”.
@inproceedings{wang-etal-2024-phonetic,title={Phonetic and Lexical Discovery of Canine Vocalization},author={Wang, Theron S. and Li, Xingyuan and Zhang, Chunhao and Wu, Mengyue and Zhu, Kenny Q.},editor={Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung},booktitle={Findings of the Association for Computational Linguistics: EMNLP 2024},month=nov,year={2024},address={Miami, Florida, USA},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2024.findings-emnlp.816},pages={13972--13983},selected=true,}
This paper delves into the pioneering exploration of potential communication patterns within dog vocalizations and transcends traditional linguistic analysis barriers, which heavily relies on human priori knowledge on limited datasets to find sound units in dog vocalization. We present a self-supervised approach with HuBERT, enabling the accurate classification of phoneme labels and the identification of vocal patterns that suggest a rudimentary vocabulary within dog vocalizations. Our findings indicate a significant acoustic consistency in these identified canine vocabulary, covering the entirety of observed dog vocalization sequences. We further develop a web-based dog vocalization labeling system. This system can highlight phoneme n-grams, present in the vocabulary, in the dog audio uploaded by users.
@article{li2024phonetic,title={Phonetic and Lexical Discovery of a Canine Language using HuBERT},author={Li, Xingyuan and Wang, Sinong and Xie, Zeyu and Wu, Mengyue and Zhu, Kenny Q},journal={arXiv preprint arXiv:2402.15985},year={2024},}