pubs2019.bib

@article{wei2019investigatingapproach,
  author = {Weiß, C and Mauch, M and Dixon, S and Müller, M},
  journal = {Musicae Scientiae},
  month = {Dec},
  number = {4},
  pages = {486--507},
  title = {Investigating style evolution of Western classical music: A computational approach},
  volume = {23},
  year = {2019},
  abstract = {© The Author(s) 2018. In musicology, there has been a long debate about a meaningful partitioning and description of music history regarding composition styles. Particularly, concepts of historical periods have been criticized since they cannot account for the continuous and interwoven evolution of style. To systematically study this evolution, large corpora are necessary suggesting the use of computational strategies. This article presents such strategies and experiments relying on a dataset of 2000 audio recordings, which cover more than 300 years of music history. From the recordings, we extract different tonal features. We propose a method to visualize these features over the course of history using evolution curves. With the curves, we re-trace hypotheses concerning the evolution of chord transitions, intervals, and tonal complexity. Furthermore, we perform unsupervised clustering of recordings across composition years, individual pieces, and composers. In these studies, we found independent evidence of historical periods that broadly agrees with traditional views as well as recent data-driven experiments. This shows that computational experiments can provide novel insights into the evolution of styles.},
  doi = {10.1177/1029864918757595},
  issn = {1029-8649},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{frieler2019dontalgorithms,
  author = {Frieler, K and Basaran, D and Höger, F and Crayencour, HC and Peeters, G and Dixon, S},
  booktitle = {ACM International Conference Proceeding Series},
  month = {Nov},
  pages = {25--32},
  title = {Don't hide in the frames: Note-and pattern-based evaluation of automated melody extraction algorithms},
  year = {2019},
  abstract = {© 2019 Copyright held by the owner/author(s). Publication rights licensed to ACM. In this paper, we address how to evaluate and improve the performance of automatic dominant melody extraction systems from a pattern mining perspective with a focus on jazz improvisations. Traditionally, dominant melody extraction systems estimate the melody on the frame-level, but for real-world musicological applications note-level representations are needed. For the evaluation of estimated note tracks, the current frame-wise metrics are not fully suitable and provide at most a first approximation. Furthermore, miningmelodic patterns (n-grams) poses another challenge because note-wise errors propagate geometrically with increasing length of the pattern. On the other hand, for certain derived metrics such as pattern commonalities between performers, extraction errors might be less critical if at least qualitative rankings can be reproduced. Finally, while searching for similar patterns in a melody database the number of irrelevant patterns in the result set increases with lower similarity thresholds. For reasons of usability, it would be interesting to know the behavior using imperfect automated melody extractions. We propose three novel evaluation strategies for estimated note-tracks based on three application scenarios: Pattern mining, pattern commonalities, and fuzzy pattern search. We apply the proposed metrics to one general state-of-the-art melody estimation method (Melodia) and to two variants of an algorithm that was optimized for the extraction of jazz solos melodies. A subset of the Weimar Jazz Database with 91 solos was used for evaluation. Results show that the optimized algorithm clearly outperforms the reference algorithm, which quickly degrades and eventually breaks down for longer n-grams. Frame-wise metrics provide indeed an estimate for note-wise metrics, but only for sufficiently good extractions, whereas F1 scores for longer n-grams cannot be predicted from frame-wise F1 scores at all. The ranking of pattern commonalities between performers can be reproduced with the optimized algorithms but not with the reference algorithm. Finally, the size of result sets of pattern similarity searches decreases for automated note extraction and for larger similarity thresholds but the difference levels out for smaller thresholds.},
  doi = {10.1145/3358664.3358672},
  isbn = {9781450372084},
  day = {20},
  publicationstatus = {published}
}
@article{rodrguezalgarra2019characterisinginterventions,
  author = {Rodríguez-Algarra, F and Sturm, BL and Dixon, S},
  journal = {Transactions of the International Society for Music Information Retrieval},
  month = {Aug},
  number = {1},
  pages = {52--66},
  publisher = {Ubiquity Press, Ltd.},
  title = {Characterising Confounding Effects in Music Classification Experiments through Interventions},
  volume = {2},
  year = {2019},
  doi = {10.5334/tismir.24},
  eissn = {2514-3298},
  language = {en},
  day = {21},
  publicationstatus = {published}
}
@article{dai2019intonationsinging,
  author = {Dai, J and Dixon, S},
  journal = {J Acoust Soc Am},
  month = {Aug},
  number = {2},
  pages = {1005--1005},
  title = {Intonation trajectories within tones in unaccompanied soprano, alto, tenor, bass quartet singing.},
  url = {https://www.ncbi.nlm.nih.gov/pubmed/31472532},
  volume = {146},
  year = {2019},
  abstract = {Unlike fixed-pitch instruments, the voice requires careful regulation during each note in order to maintain a steady pitch. Previous studies have investigated aspects of singing performance such as intonation accuracy and pitch drift, treating pitch as fixed within notes, while the pitch trajectory within notes has hardly been investigated. The aim of this paper is to study pitch variation within vocal notes and ascertain what factors influence the various parts of a note. The authors recorded five soprano, alto, tenor, bass quartets singing two pieces of music in three different listening conditions, according to whether the singers can hear the other participants or not. After analysing all of the individual notes and extracting pitch over time, the authors observed the following regularities: (1) There are transient parts of approximately 120 ms duration at both the beginning and end of a note, where the pitch varies rapidly; (2) the shapes of transient parts differ significantly according to the adjacent pitch, although all singers tend to have a descending transient at the end of a note; (3) the trajectory shapes of female singers differ from those of male singers at the beginnings of notes; (4) between vocal parts, there is a tendency to expand harmonic intervals (by about 8 cents between adjacent voices); (5) the listening condition had no significant effect on within-note pitch trajectories.},
  doi = {10.1121/1.5120483},
  eissn = {1520-8524},
  language = {eng},
  day = {8},
  publicationstatus = {published}
}
@inproceedings{agrawal2019aalignment,
  author = {Agrawal, R and Dixon, S},
  booktitle = {},
  month = {May},
  title = {A Hybrid Approach to Audio-to-Score Alignment},
  year = {2019},
  keyword = {Audio-to-score alignment},
  keyword = {Music Information Retrieval},
  keyword = {Convolutional Neural Networks},
  conference = {Machine Learning for Music Discovery Workshop at International Conference on Machine Learning (ICML)},
  day = {30},
  publicationstatus = {published}
}
@inproceedings{nakamura2019probabilistictranscription,
  author = {Nakamura, E and Nishikimi, R and Dixon, S and Yoshii, K},
  booktitle = {2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference, APSIPA ASC 2018 - Proceedings},
  month = {Mar},
  pages = {1905--1912},
  title = {Probabilistic Sequential Patterns for Singing Transcription},
  year = {2019},
  abstract = {© 2018 APSIPA organization. Statistical models of musical scores play an important role in various tasks of music information processing. It has been an open problem to construct a score model incorporating global repetitive structure of note sequences, which is expected to be useful for music transcription and other tasks. Since repetitions can be described by a sparse distribution over note patterns (segments of music), a possible solution is to consider a Bayesian score model in which such a sparse distribution is first generated for each individual piece and then musical notes are generated in units of note patterns according to the distribution. However, straightforward construction is impractical due to the enormous number of possible note patterns. We propose a probabilistic model that represents a cluster of note patterns, instead of explicitly dealing with the set of all possible note patterns, to attain computational tractability. A score model is constructed as a mixture or a Markov model of such clusters, which is compatible with the above framework for describing repetitive structure. As a practical test to evaluate the potential of the model, we consider the problem of singing transcription from vocal f0 trajectories. Evaluation results show that our model achieves better predictive ability and transcription accuracies compared to the conventional Markov model, nearly reaching state-of-the-art performance.},
  doi = {10.23919/APSIPA.2018.8659637},
  isbn = {9789881476852},
  conference = {2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference},
  day = {7},
  publicationstatus = {published}
}
@article{dai2019singingsinging,
  author = {Dai, J and Dixon, S},
  journal = {J Acoust Soc Am},
  month = {Feb},
  number = {2},
  pages = {663--663},
  title = {Singing together: Pitch accuracy and interaction in unaccompanied unison and duet singing.},
  url = {https://www.ncbi.nlm.nih.gov/pubmed/30823797},
  volume = {145},
  year = {2019},
  abstract = {This paper investigates singing interaction by analysis of the factors influencing pitch accuracy of unaccompanied pairs of singers. Eight pairs of singers sang two excerpts either in unison or two-part harmony. The experimental condition varied which singers could hear singing partners. After semi-automatic pitch-tracking and manual checking, this paper calculated the pitch error (PE) and interval error and tested the factors of influence using a one-way analysis of variance and a linear mixed-effects model. The results indicate that: (1) singing with the same vocal part is more accurate than singing with a different vocal part; (2) singing solo has less pPE than singing with a partner; (3) PEs are correlated, as singers adjust pitch to mitigate a partner's error and preserve harmonic intervals at the expense of melodic intervals and absolute pitch; (4) other factors influence the pitch accuracy, including: score pitch, score harmonic interval, score melodic interval, musical background, vocal part, and individual differences.},
  doi = {10.1121/1.5087817},
  eissn = {1520-8524},
  language = {eng},
  day = {4},
  publicationstatus = {published}
}
@inproceedings{dai2019understandingnotes,
  author = {Dai, J and Dixon, S},
  booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
  month = {Jan},
  pages = {243--253},
  title = {Understanding intonation trajectories and patterns of vocal notes},
  volume = {11296 LNCS},
  year = {2019},
  abstract = {© 2019, Springer Nature Switzerland AG. Unlike fixed-pitch instruments hold the same pitch over time, the voice requires careful regulation during each note in order to maintain a steady pitch. Previous studies have investigated singing performance which takes single note as an element, such as intonation accuracy, pitch drift while the note trajectory within the notes has hardly been investigated. The aim of this paper is to study pitch variation within vocal notes and ascertain what factors influence the various parts of a note. We recorded data which including five SATB groups (four participants each group) singing two pieces of music in three listening conditions according to whether can hear other participants or not. After extracting fundamental frequency and analysing, we obtained all the notes by relative time and real-time duration, then observed a regular pattern among all the notes. To be specific: (1) There are transient parts in both the beginning and end of a note which is about 15–20\% of the whole duration; (2) The shapes of transient parts differ significantly according to adjacent pitch, although all singers tend to have a descending transient at the end of a note.},
  doi = {10.1007/978-3-030-05716-9_20},
  isbn = {9783030057152},
  issn = {0302-9743},
  eissn = {1611-3349},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{li2019aphrase,
  author = {Li, S and Dixon, S and Black, DAA and Plumbley, MD},
  booktitle = {SMC 2016 - 13th Sound and Music Computing Conference, Proceedings},
  month = {Jan},
  pages = {247--252},
  title = {A model selection test for factors affecting the choice of expressive timing clusters for a phrase},
  year = {2019},
  abstract = {Copyright: © 2016 Shengchen Li, Simon Dixon, Dawn A. A. Black and Mark D. Plumbley. This is an open-access article distributed under the terms of the Creative Commons Attribution 3.0 Unported License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited. We model expressive timing for a phrase in performed classical music as being dependent on two factors: the expressive timing in the previous phrase and the position of the phrase within the piece. We present a model selection test for evaluating candidate models that assert different dependencies for deciding the Cluster of Expressive Timing (CET) for a phrase. We use cross entropy and Kullback Leibler (KL) divergence to evaluate the resulting models: with these criteria we find that both the expressive timing in the previous phrase and the position of the phrase in the music score affect expressive timing in a phrase. The results show that the expressive timing in the previous phrase has a greater effect on timing choices than the position of the phrase, as the phrase position only impacts the choice of expressive timing in combination with the choice of expressive timing in the previous phrase.},
  isbn = {9783000537004},
  day = {1},
  publicationstatus = {published}
}
@article{thalmann2019representingstructures,
  author = {Thalmann, F and Wiggins, G and Sandler, M},
  journal = {IEEE Transactions on Multimedia},
  month = {Dec},
  publisher = {Institute of Electrical and Electronics Engineers},
  title = {Representing Modifiable and Reusable Musical Content on the Web with Constrained Multi-Hierarchical Structures},
  year = {2019},
  abstract = {The most commonly used formats for exchanging musical information today are limited in that they represent music as flat and rigid streams of events or as raw audio signals without any structural information about the content. Such files can only be listened to in a linear way and reused and manipulated in manners determined by a target application such as a Digital Audio Workstation. The publisher has no means to incorporate their intentions or understanding of the content. This paper introduces an extension of the music formalism CHARM for the representation of modifiable and reusable musical content on the Web. It discusses how various kinds of multi-hierarchical graph structures together with logical constraints can be useful to model different musical situations. In particular, we focus on presenting solutions on how to interpret, navigate and schedule such structures in order for them to be played back. We evaluate the versatility of the representation in a number of practical examples created with a Web-based implementation based on Semantic Web technologies.},
  doi = {10.1109/TMM.2019.2961207},
  issn = {1520-9210},
  day = {20},
  publicationstatus = {published}
}
@inproceedings{thalmann2019moodplaygithubioplayer,
  author = {Thalmann, F and Allik, A and Metzig, C and Sandler, M},
  booktitle = {},
  month = {Dec},
  organization = {Trondheim},
  title = {moodplay.github.io: an online collaborative music player},
  year = {2019},
  conference = {Web Audio Conference},
  day = {4},
  publicationstatus = {published}
}
@inproceedings{allik2019joinstreaming,
  author = {Allik, A and Thalmann, F and Metzig, C and Sandler, M},
  booktitle = {},
  month = {Dec},
  organization = {Trondheim},
  title = {Join my party! How can we enhance social interactions in music streaming?},
  year = {2019},
  conference = {Web Audio Conference},
  day = {4},
  publicationstatus = {published}
}
@inproceedings{pauwels201920audio,
  author = {Pauwels, J and O'Hanlon, K and Gómez, E and Sandler, M},
  booktitle = {},
  month = {Nov},
  organization = {Delft, Netherlands},
  title = {20 Years of Automatic Chord Recognition from Audio},
  year = {2019},
  abstract = {In 1999, Fujishima published "Realtime Chord Recognition of Musical Sound: a System using Common Lisp Music". This paper kickstarted an active research topic that has been popular in and around the ISMIR community. The field of Automatic Chord Recognition (ACR) has evolved considerably from early knowledge-based systems towards data-driven methods, with neural network approaches arguably being central to current ACR research. Nonetheless, many of its core issues were already addressed or referred to in the Fujishima paper. In this paper, we review those twenty years of ACR according to these issues. We furthermore attempt to frame current directions in the field in order to establish some perspective for future research.},
  startyear = {2019},
  startmonth = {Nov},
  startday = {4},
  finishyear = {2019},
  finishmonth = {Nov},
  finishday = {8},
  conference = {Proceedings of the 20th Conference of the International Society for Music Information Retrieval (ISMIR)},
  day = {7},
  publicationstatus = {accepted}
}
@inproceedings{wilmering2019alignmentconcerts,
  author = {Wilmering, T and Thalmann, F and Sandler, MB},
  booktitle = {147th Audio Engineering Society International Convention 2019},
  month = {Oct},
  title = {Alignment and Timeline Construction for Incomplete Analogue Audience Recordings of Historical Live Music Concerts},
  year = {2019},
  abstract = {Analogue recordings pose specific problems during automatic alignment, such as distortion due to physical degradation, or differences in tape speed during recording, copying, and digitisation. Oftentimes, recordings are incomplete, exhibiting gaps with different lengths. In this paper we propose a method to align multiple digitised analogue recordings of same concerts of varying quality and song segmentations. The process includes the automatic construction of a reference concert timeline. We evaluate alignment methods on a synthetic dataset and apply our algorithm to real-world data.},
  conference = {147th Audio Engineering Society International Convention 2019},
  day = {8},
  publicationstatus = {published}
}
@article{moffat2019approachesproduction,
  author = {Moffat, D and Sandler, M},
  journal = {Arts},
  month = {Sep},
  number = {4},
  publisher = {MDPI AG},
  title = {Approaches in Intelligent Music Production},
  url = {https://www.mdpi.com/2076-0752/8/4/125},
  volume = {8},
  year = {2019},
  abstract = {Music production technology has made few advancements over the past few decades. State-of-the-art approaches are based on traditional studio paradigms with new developments primarily focusing on digital modelling of analog equipment. Intelligent music production (IMP) is the approach of introducing some level of artificial intelligence into the space of music production, which has the ability to change the field considerably. There are a multitude of methods that intelligent systems can employ to analyse, interact with, and modify audio. Some systems interact and collaborate with human mix engineers, while others are purely black box autonomous systems, which are uninterpretable and challenging to work with. This article outlines a number of key decisions that need to be considered while producing an intelligent music production system, and identifies some of the assumptions and constraints of each of the various approaches. One of the key aspects to consider in any IMP system is how an individual will interact with the system, and to what extent they can consistently use any IMP tools. The other key aspects are how the target or goal of the system is created and defined, and the manner in which the system directly interacts with audio. The potential for IMP systems to produce new and interesting approaches for analysing and manipulating audio, both for the intended application and creative misappropriation, is considerable.},
  doi = {10.3390/arts8040125},
  issn = {2076-0752},
  keyword = {intelligent music production},
  keyword = {automatic mixing},
  keyword = {Adaptive Audio Effects},
  keyword = {Audio processing},
  keyword = {artificial Intelligence},
  keyword = {Machine Learning},
  language = {English},
  day = {25},
  publicationstatus = {published}
}
@inproceedings{senvaityte2019guitardeconvolution,
  author = {Senvaityte, D and Pauwels, J and Sandler, M},
  booktitle = {},
  month = {Sep},
  title = {Guitar String Separation Using Non-Negative Matrix Factorization and Factor Deconvolution},
  year = {2019},
  doi = {10.1145/3356590.3356628},
  conference = {Audio Mostly 2019},
  day = {18},
  publicationstatus = {accepted}
}
@inproceedings{delgado2019aanalysis,
  author = {Delgado, A and McDonald, S and Xu, N and Sandler, M},
  booktitle = {ACM International Conference Proceeding Series},
  month = {Sep},
  pages = {17--23},
  title = {A new dataset for amateur vocal percussion analysis},
  year = {2019},
  abstract = {© 2019 ACM. The imitation of percussive instruments via the human voice is a natural way for us to communicate rhythmic ideas and, for this reason, it attracts the interest of music makers. Specifically, the automatic mapping of these vocal imitations to their emulated instruments would allow creators to realistically prototype rhythms in a faster way. The contribution of this study is two-fold. Firstly, a new Amateur Vocal Percussion (AVP) dataset is introduced to investigate how people with little or no experience in beatboxing approach the task of vocal percussion. The end-goal of this analysis is that of helping mapping algorithms to better generalise between subjects and achieve higher performances. The dataset comprises a total of 9780 utterances recorded by 28 participants with fully annotated onsets and labels (kick drum, snare drum, closed hi-hat and opened hi-hat). Lastly, we conducted baseline experiments on audio onset detection with the recorded dataset, comparing the performance of four state-of-the-art algorithms in a vocal percussion context.},
  doi = {10.1145/3356590.3356844},
  isbn = {9781450372978},
  day = {18},
  publicationstatus = {published}
}
@inproceedings{thalmann2019querybasedrecordings,
  author = {Thalmann, F and WILMERING, T and Sandler, M},
  booktitle = {},
  month = {Aug},
  title = {Query-based Mashups of Historical Live Music Recordings},
  year = {2019},
  conference = {9th Conference of Japanese Association for Digital Humanities (JADH2019)},
  day = {29},
  publicationstatus = {published}
}
@article{mehrabi2019erratumdoi101371journalpone0219955,
  author = {Mehrabi, A and Dixon, S and Sandler, M},
  journal = {PLoS One},
  month = {Aug},
  number = {8},
  publisher = {Public Library of Science (PLoS)},
  title = {Erratum: Vocal imitation of percussion sounds: On the perceptual similarity between imitations and imitated sounds (PLOS ONE (2019)14:8 (e0221722) DOI:10.1371/journal.pone.0219955)},
  volume = {14},
  year = {2019},
  abstract = {The affiliations for the second and third authors are incorrect. Simon Dixon and Mark Sandler are not affiliated with #1 but with #2: School of Electronic Engineering and Computer Science, Queen Mary University of London, London, England.},
  doi = {10.1371/journal.pone.0221722},
  issn = {1932-6203},
  eissn = {1932-6203},
  day = {22},
  publicationstatus = {published}
}
@article{mehrabi2019vocalsounds,
  author = {Mehrabi, A and Dixon, S and Sandler, M},
  journal = {PLoS One},
  month = {Jul},
  number = {7},
  pages = {e0219955--e0219955},
  title = {Vocal imitation of percussion sounds: On the perceptual similarity between imitations and imitated sounds.},
  url = {https://www.ncbi.nlm.nih.gov/pubmed/31344080},
  volume = {14},
  year = {2019},
  abstract = {Recent studies have demonstrated the effectiveness of the voice for communicating sonic ideas, and the accuracy with which it can be used to imitate acoustic instruments, synthesised sounds and environmental sounds. However, there has been little research on vocal imitation of percussion sounds, particularly concerning the perceptual similarity between imitations and the sounds being imitated. In the present study we address this by investigating how accurately musicians can vocally imitate percussion sounds, in terms of whether listeners consider the imitations 'more similar' to the imitated sounds than to other same-category sounds. In a vocal production task, 14 musicians imitated 30 drum sounds from five categories (cymbals, hats, kicks, snares, toms). Listeners were then asked to rate the similarity between the imitations and same-category drum sounds via web based listening test. We found that imitated sounds received the highest similarity ratings for 16 of the 30 sounds. The similarity between a given drum sound and its imitation was generally rated higher than for imitations of another same-category sound, however for some drum categories (snares and toms) certain sounds were consistently considered most similar to the imitations, irrespective of the sound being imitated. Finally, we apply an existing auditory image based measure for perceptual similarity between same-category drum sounds, to model the similarity ratings using linear mixed effect regression. The results indicate that this measure is a good predictor of perceptual similarity between imitations and imitated sounds, when compared to acoustic features containing only temporal or spectral features.},
  doi = {10.1371/journal.pone.0219955},
  eissn = {1932-6203},
  language = {eng},
  pii = {PONE-D-17-40726},
  day = {25},
  publicationstatus = {published}
}
@inproceedings{kudumakis2019mpegpersonalization,
  author = {Kudumakis, P and Wilmering, T and Sandler, M and Foss, J},
  booktitle = {},
  month = {Jun},
  organization = {Manchester, UK},
  title = {MPEG IPR ontologies for media trading and personalization},
  url = {http://ceur-ws.org/Vol-2423/DataTV2019_paper_2.pdf},
  volume = {2423},
  year = {2019},
  issn = {1613-0073},
  conference = {1st International Workshop on Data-driven Personalization of Television (DataTV’19) held as part of the ACM International Conference on Interactive Experiences for Television and Online Video (TVX’19)},
  day = {5},
  publicationstatus = {published}
}
@inproceedings{pauwels2019findingcatalogue,
  author = {Pauwels, J and Sandler, M},
  booktitle = {},
  month = {May},
  organization = {Málaga, Spain},
  title = {Finding new practice material through chord-based exploration of a large music catalogue},
  year = {2019},
  abstract = {Our demo is a web app that suggests new practice mate- rial to music learners based on automatic chord analysis. It is aimed at music practitioners of any skill set, playing any instrument, as long as they know how to play along with a chord sheet. Users need to select a number of chords in the app, and are then presented with a list of music pieces con- taining those chords. Each of those pieces can be played back while its chord transcription is displayed in sync to the music. This enables a variety of practice scenarios, ranging from following the chords in a piece to using the suggested music as a backing track to practice soloing over.},
  startyear = {2019},
  startmonth = {May},
  startday = {28},
  finishyear = {2019},
  finishmonth = {May},
  finishday = {31},
  conference = {Proceedings of the 16th Sound and Music Conference},
  day = {31},
  publicationstatus = {published}
}
@inproceedings{ohanlon2019comparingrecognition,
  author = {O'Hanlon, K and Sandler, MB},
  booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
  month = {May},
  pages = {860--864},
  title = {Comparing CQT and Reassignment Based Chroma Features for Template-based Automatic Chord Recognition},
  volume = {2019-May},
  year = {2019},
  abstract = {© 2019 IEEE. Automatic Chord Recognition (ACR) seeks to extract chords from musical signals. Recently, deep neural network (DNN) approaches have become popular for this task, being employed for feature extraction and sequence modelling. Traditionally, the most important steps in ACR were extraction of chroma features which estimate the energy in each pitch class, and pattern matching using templates or learning-based approaches. In this paper we reconsider chroma features with template matching, employing spectral reassignment chroma with synthetic spectral templates, and find experimental results comparable to those of a recent DNN-based chroma extractor.},
  doi = {10.1109/ICASSP.2019.8682774},
  isbn = {9781479981311},
  issn = {1520-6149},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{pauwels2019acontent,
  author = {Pauwels, J and Sandler, MB},
  booktitle = {Joint Proceedings of the ACM IUI 2019 Workshops},
  month = {Mar},
  title = {A web-based system for suggesting new practice material to music learners based on chord content},
  volume = {2327},
  year = {2019},
  abstract = {In this demo paper, a system that suggests new practice material to music learners is presented. It is aimed at music practitioners of any skill set, playing any instrument, as long as they know how to play along with a chord sheet. Users need to select a number of chords in a web app, and are then presented with a list of music pieces containing those chords. Each of those pieces can then be played back while its chord transcription is displayed in sync to the music. This enables a variety of practice scenarios, ranging from following the chords in a piece to using the suggested music as a backing track to practice soloing over. We set out the various interface elements that make up this web application and the thoughts that went behind them. Furthermore, we touch upon the algorithms that are used in the app. Notably, the automatic generation of chord transcriptions – such that large amounts of music can be processed without human intervention – and the query resolution mechanism – finding appropriate music based on the user input and transcription quality – are discussed.},
  startyear = {2019},
  startmonth = {Mar},
  startday = {20},
  finishyear = {2019},
  finishmonth = {Mar},
  finishday = {20},
  issn = {1613-0073},
  conference = {2nd Workshop on Intelligent Music Interfaces for Listening and Creation},
  day = {20},
  publicationstatus = {published}
}
@inproceedings{shukla2019realtimepanning,
  address = {New York},
  author = {SHUKLA, R and Radu, IT and SANDLER, M and STEWART, R},
  booktitle = {},
  month = {Mar},
  organization = {York, United Kingdom},
  publisher = {Audio Engineering Society},
  title = {Real-time binaural rendering with virtual vector base amplitude panning},
  url = {http://www.aes.org/e-lib/},
  year = {2019},
  abstract = {We present a virtual vector base amplitude panning (VBAP) implementation for 3D head-tracked binaural rendering on an embedded Linux system. Three degrees of freedom head-tracking is implemented within acceptable levels of latency and at 1º angular resolution. The technical performance of virtual VBAP is evaluated alongside a First Order Ambisonics (FOA) approach on the same platform, using analysis of localisation cue error against a human-measured head-related transfer function set. Our findings illustrate that, in scenarios utilising embedded or other portable, low-resource computing platforms, the nature and requirements of the immersive or interactive audio application at hand may determine whether virtual VBAP is a viable (or even preferable) approach compared to virtual FOA.},
  startyear = {2019},
  startmonth = {Mar},
  startday = {27},
  finishyear = {2019},
  finishmonth = {Mar},
  finishday = {29},
  keyword = {vector base amplitude panning},
  keyword = {binaural},
  keyword = {head-tracking},
  keyword = {embedded hardware},
  conference = {Audio Engineering Society Conference on Immersive and Interactive Audio},
  day = {17},
  publicationstatus = {accepted}
}
@inproceedings{sandler2019semanticchain,
  author = {Sandler, M and De Roure, D and Benford, S and Page, K},
  booktitle = {Proceedings - 2019 International Workshop on Multilayer Music Representation and Processing, MMRP 2019},
  month = {Mar},
  pages = {49--55},
  title = {Semantic web technology for new experiences throughout the music production-consumption chain},
  year = {2019},
  abstract = {© 2019 IEEE. The FAST project (Fusing Audio and Semantic Technology for Intelligent Music Production and Consumption) with 5 years of UK funding, has sought to create a new musical ecosystem that empowers all manner of people, from professional performers to casual listeners, to engage in new, more creative, immersive and dynamic musical experiences. Realising this requires a step-change in digital music technologies. Going beyond today's digital sound files, future experiences will demand far richer musical information, whereby music content will be packaged in a flexible, structured way that combines audio recordings with rich, layered metadata to support interactive and adaptive musical experiences. This defines the overall ambition of FAST-to lay the foundations for a new generation of 'semantic audio' technologies that underpin diverse future music experiences. This paper therefore aims to describe the overall vision of the project, set out the broad landscape in which it is working, highlight some key results and show how they bring out a central notion of FAST, that of Digital Music Objects, which are flexible constructs consisting of recorded music essence coupled with rich, semantic, linked metadata.},
  doi = {10.1109/MMRP.2019.8665378},
  isbn = {9781728116495},
  day = {11},
  publicationstatus = {published}
}
@inproceedings{moffat2019machinedrums,
  author = {Moffat, D and Sandler, MB},
  booktitle = {147th Audio Engineering Society International Convention 2019},
  month = {Jan},
  title = {Machine learning multitrack gain mixing of drums},
  year = {2019},
  abstract = {© 147th Audio Engineering Society International Convention 2019. All rights reserved. There is a body of work in the field of intelligent music production, covering a range of specific audio effects. However, there is a distinct lack of any purely machine learning approaches to automatic mixing. This could be due to a lack of suitable data. This paper presents an approach to used human produced audio mixes, along with their source multitrack, to produce the set of mix parameters. The focus will be entirely on the gain mixing of audio drum tracks. Using existing reverse engineering of music production gain parameters, a target mix gain parameter is identified, and these results are fed into a number of machine learning algorithms, along with audio feature vectors of each audio track. This allow for a machine learning prediction approach to audio gain mixing. A random forest approach is taken to perform a multiple output prediction. The prediction results of the random forest approach are then compared to a number of other published automatic gain mixing approaches. The results demonstrate that the random forest gain mixing approach performs similarly to that of a human engineer and outperforms the existing gain mixing approaches.},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{moffat2019automaticidentification,
  author = {Moffat, D and Sandler, MB},
  booktitle = {AES 146th International Convention},
  month = {Jan},
  title = {Automatic mixing level balancing enhanced through source interference identification},
  year = {2019},
  abstract = {© 2019 The Audio Engineering Society (AES). All rights reserved. It has been well established that equal loudness normalisation can produce a perceptually appropriate level balance in an automated mix. Previous work assumes that each captured track represents an individual sound source. In the context of a live drum recording this assumption is incorrect. This paper will demonstrate approach to identify the source interference and adjust the source gains accordingly, to ensure that tracks are all set to equal perceptual loudness. The impact of this interference on the selected gain parameters and resultant mixture is highlighted.},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{moffat2019anreverberation,
  author = {Moffat, D and Sandler, MB},
  booktitle = {147th Audio Engineering Society International Convention 2019},
  month = {Jan},
  title = {An automated approach to the application of reverberation},
  year = {2019},
  abstract = {© 147th Audio Engineering Society International Convention 2019. All rights reserved. The field of intelligent music production has been growing over recent years. There have been several different approaches to automated reverberation. In this paper, we automate the parameters of an algorithmic reverb, based on analysis of the input signals. Literature is used to produce a set of rules for the application of reverberation, and these rules are then represented directly as direct audio feature. This audio feature representation is then used to control the reverberation parameters, from the audio signal in real time.},
  day = {1},
  publicationstatus = {published}
}
@article{phan2019seqsleepnetstaging,
  author = {Phan, H and Andreotti, F and Cooray, N and Chen, OY and De Vos, M},
  journal = {IEEE Transactions on Neural Systems and Rehabilitation Engineering},
  month = {Jan},
  number = {3},
  pages = {400--410},
  publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
  title = {SeqSleepNet: End-to-End Hierarchical Recurrent Neural Network for Sequence-to-Sequence Automatic Sleep Staging},
  volume = {27},
  year = {2019},
  doi = {10.1109/tnsre.2019.2896659},
  issn = {1534-4320},
  eissn = {1558-0210},
  day = {31},
  publicationstatus = {published}
}
@inproceedings{feng2019augmentedrehabilitation,
  author = {Feng, F and Stockman, T},
  booktitle = {Conference on Human Factors in Computing Systems - Proceedings},
  month = {May},
  title = {Augmented visuotactile feedback support sensorimotor synchronization skill for rehabilitation},
  year = {2019},
  abstract = {© 2019 Copyright held by the owner/author(s). Augmented visual-audio feedback supports rhythmic motor performance in both sports training and sensorimotor synchronization practise. In home-based rehabilitation for minor stroke patients, training on a fine motor skill using rhythms not only helps to recover sophisticated motion ability but also increases their confidence and mental health recovery. Auditory information has been shown to have advantages for improving rhythmic motion performance, but it can be masked by environmental noise and may be intrusive to non-stakeholders. Under these circumstances, patients may be reluctant to practice actively due to difficulties hearing the auditory stimuli or through a concern for disturbing others. To address this issue, we explored an inconspicuous way of providing vibrotactile feedback through wristband. In order to investigate the general feasibility of a sensorimotor synchronization task, we conducted a preliminary user study with 16 healthy participants, and compared the visual-tactile feedback with visual-audio, visual-audio-tactile and visual-only feedback. Results showed that rhythmic motion accuracy with visual-tactile feedback has the equivalent facilitatory effect with visual-audio feedback. In addition, visual-tactile feedback supports smoother movements than the visual-audio feedback. In the future, after refinement with stroke patients, the system could support customization for different levels of sensorimotor synchronization training.},
  doi = {10.1145/3290607.3312812},
  isbn = {9781450359719},
  day = {2},
  publicationstatus = {published}
}
@inproceedings{stowell2019statecome,
  author = {Stowell, D},
  booktitle = {Biodiversity Information Science and Standards},
  month = {Jun},
  pages = {e37227},
  title = {State of the Art in Computational Bioacoustics and Machine Learning: How far have we come?},
  volume = {3},
  year = {2019},
  doi = {10.3897/biss.3.37227},
  issn = {2535-0897},
  day = {19}
}
@inproceedings{alvarado2019sparsetimedomain,
  author = {Alvarado, PA and Alvarez, MA and Stowell, D},
  booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
  month = {Apr},
  pages = {995--999},
  title = {Sparse Gaussian Process Audio Source Separation Using Spectrum Priors in the Time-domain},
  volume = {2019-May},
  year = {2019},
  abstract = {© 2019 IEEE. Gaussian process (GP) audio source separation is a time- domain approach that circumvents the inherent phase approx- imation issue of spectrogram based methods. Furthermore, through its kernel, GPs elegantly incorporate prior knowl- edge about the sources into the separation model. Despite these compelling advantages, the computational complexity of GP inference scales cubically with the number of audio samples. As a result, source separation GP models have been restricted to the analysis of short audio frames. We intro- duce an efficient application of GPs to time-domain audio source separation, without compromising performance. For this purpose, we used GP regression, together with spectral mixture kernels, and variational sparse GPs. We compared our method with LD-PSDTF (positive semi-definite tensor factorization), KL-NMF (Kullback-Leibler non-negative ma- trix factorization), and IS-NMF (Itakura-Saito NMF). Results show that the proposed method outperforms these techniques.},
  doi = {10.1109/ICASSP.2019.8683287},
  isbn = {9781479981311},
  issn = {1520-6149},
  conference = {2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  day = {17},
  publicationstatus = {published}
}
@article{stowell2019automaticconditions,
  author = {Stowell, D and Petrusková, T and Šálek, M and Linhart, P},
  journal = {Journal of the Royal Society Interface},
  month = {Apr},
  number = {153},
  publisher = {Royal Society, The},
  title = {Automatic acoustic identification of individual animals: Improving
  generalisation across species and recording conditions},
  url = {http://arxiv.org/abs/1810.09273v1},
  volume = {16},
  year = {2019},
  abstract = {Many animals emit vocal sounds which, independently from the sounds'
function, embed some individually-distinctive signature. Thus the automatic
recognition of individuals by sound is a potentially powerful tool for zoology
and ecology research and practical monitoring. Here we present a general
automatic identification method, that can work across multiple animal species
with various levels of complexity in their communication systems. We further
introduce new analysis techniques based on dataset manipulations that can
evaluate the robustness and generality of a classifier. By using these
techniques we confirmed the presence of experimental confounds in situations
resembling those from past studies. We introduce data manipulations that can
reduce the impact of these confounds, compatible with any classifier. We
suggest that assessment of confounds should become a standard part of future
studies to ensure they do not report over-optimistic results. We provide
annotated recordings used for analyses along with this study and we call for
dataset sharing to be a common practice to enhance development of methods and
comparisons of results.},
  doi = {10.1098/rsif.2018.0940},
  issn = {1742-5689},
  day = {10},
  publicationstatus = {published}
}
@inproceedings{yela2019spectralsignals,
  author = {Yela, DF and Stowell, D and Sandler, M},
  booktitle = {European Signal Processing Conference},
  month = {Sep},
  title = {Spectral visibility graphs: Application to similarity of harmonic signals},
  volume = {2019-September},
  year = {2019},
  abstract = {© 2019,IEEE Graph theory is emerging as a new source of tools for time series analysis. One promising method is to transform a signal into its visibility graph, a representation which captures many interesting aspects of the signal. Here we introduce the visibility graph for audio spectra and propose a novel representation for audio analysis: the spectral visibility graph degree. Such representation inherently captures the harmonic content of the signal whilst being resilient to broadband noise. We present experiments demonstrating its utility to measure robust similarity between harmonic signals in real and synthesised audio data. The source code is available online.},
  doi = {10.23919/EUSIPCO.2019.8903056},
  isbn = {9789082797039},
  issn = {2219-5491},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{matt2019estimatingsignalling,
  author = {Matt, A and Stowell, D},
  booktitle = {European Signal Processing Conference},
  month = {Sep},
  title = {Estimating \& mitigating the impact of acoustic environments on machine-to-machine signalling},
  volume = {2019-September},
  year = {2019},
  abstract = {© 2019 IEEE The advance of technology for transmitting Data-over-Sound in various IoT and telecommunication applications has led to the concept of machine-to-machine over-the-air acoustic signalling. Reverberation can have a detrimental effect on such machine-to-machine signals while decoding. Various methods have been studied to combat the effects of reverberation in speech and audio signals, but it is not clear how well they generalise to other sound types. We look at extending these models to facilitate machine-to-machine acoustic signalling. This research investigates dereverberation techniques to shortlist a single-channel reverberation suppression method through a pilot test. In order to apply the chosen dereverberation method a novel method of estimating acoustic parameters governing reverberation is proposed. The performance of the final algorithm is evaluated on quality metrics as well as the performance of a real machine-to-machine decoder. We demonstrate a dramatic reduction in error rate for both audible and ultrasonic signals.},
  doi = {10.23919/EUSIPCO.2019.8902634},
  isbn = {9789082797039},
  issn = {2219-5491},
  day = {1},
  publicationstatus = {published}
}
@article{stolfi2019playsoundspaceobjects,
  author = {Stolfi, AS and Milo, A and Barthet, M},
  journal = {Journal of New Music Research},
  month = {Sep},
  number = {4},
  pages = {366--384},
  publisher = {Taylor \& Francis (Routledge)},
  title = {Playsound.space: Improvising in the browser with semantic sound objects},
  volume = {48},
  year = {2019},
  abstract = {This paper describes the development and evaluation of the online music making tool Playsound.space, an open platform leveraging Freesound Creative Commons audio content. After discussing the interface which relies on semantic queries and spectrogram representations, we introduce new features aiming to enhance participation and creativity support. This includes a multi-user chat system providing language translation, and a Web Audio player enabling real-time sound editing and time-stretching. We present evaluations for live performance and soundscape composition sketching. Throughout our reflections, we provide directions for future artistic and pedagogical applications that can benefit the design of other ubiquitous music systems.},
  doi = {10.1080/09298215.2019.1649433},
  issn = {0929-8215},
  eissn = {1744-5027},
  day = {18},
  publicationstatus = {published}
}
@inproceedings{bruford2019modellingevaluation,
  author = {Bruford, F and Barthet, M and McDonald, S and Sandler, M},
  booktitle = {ACM International Conference Proceeding Series},
  month = {Sep},
  pages = {131--138},
  title = {Modelling musical similarity for drum patterns: A perceptual evaluation},
  year = {2019},
  abstract = {© 2019 ACM. Computational models of similarity for drum kit patterns are an important enabling factor in many intelligent music production systems. In this paper, we carry out a perceptual study to evaluate the performance of a number of state-of-the-art models for estimating similarity of drum patterns. 24 listeners rated similarity between 80 pairs of drum patterns covering a range of styles. We find that many of the models perform well, especially those using density-based features, and a more simplistic rhythm-pattern distance. However, many of the most perceptually important factors reported by listeners (such as swing, genre and style, instrument distribution) are not adequately accounted for. We also introduce a velocity transform method to better incorporate variable onset intensity into rhythm similarity analysis. Inter-rater agreement analysis shows that models are also limited somewhat by individual perceptual differences. These findings will inform future research into improved approaches to drum pattern similarity modelling that integrate existing features with new features modelling a wider range of characteristics.},
  doi = {10.1145/3356590.3356611},
  isbn = {9781450372978},
  day = {18},
  publicationstatus = {published}
}
@inproceedings{turchet2019haptificationperformance,
  author = {Turchet, L and Barthet, M},
  booktitle = {ACM International Conference Proceeding Series},
  month = {Sep},
  pages = {244--247},
  title = {Haptification of performer's control gestures in live electronic music performance},
  year = {2019},
  abstract = {© 2019 ACM. In this paper, we introduce musical haptic wearables for audiences (MHWAs) which provide sensing and haptic stimulation technologies for networked musical interaction using wireless connectivity. We report on a concert experiment during which audience members could experience vibro-tactile feedback mapped to the control gestures of two electronic music performers. Preliminary results suggest that MHWAs may increase the audience's understanding of the musical expression and the presence of the performers when the tempo is slow while no significant effects were found at fast tempi. Participants' comments also indicate that vibro-tactile feedback related to musical attributes such as beat could enrich some aspects of the live music experience.},
  doi = {10.1145/3356590.3356629},
  isbn = {9781450372978},
  day = {18},
  publicationstatus = {published}
}
@article{turchet2019anpractice,
  author = {Turchet, L and Barthet, M},
  journal = {Journal of New Music Research},
  month = {Jul},
  number = {4},
  pages = {352--365},
  publisher = {Taylor \& Francis (Routledge)},
  title = {An ubiquitous smart guitar system for collaborative musical practice},
  volume = {48},
  year = {2019},
  abstract = {Ubiquitous music (UbiMus) proposes to study how social interaction with mobile and distributed technologies can converge to form novel creativity support tools and music artistic practices. A recent field overlapping with UbiMus is the Internet of Musical Things, which refers to ecosystems of interconnected embedded computers (Musical Things) enabling users to produce, interact with or experience musical content. Musical Things embed electronics, sensors, data forwarding and processing software into physical or virtual objects. Smart musical instruments (SMIs) are an emerging class of Musical Things provided with capabilities of capturing and receiving data supporting instrumental musical practice. Due to their portability and self-containdeness, SMIs enable novel ubiquitous interactions between performers of acoustic and digital musical instruments. After a review of current trends in SMI research, we propose an ubiquitous smart guitar system which uses the guitar as a hub for collaborative music making. We then present a survey conducted with 18 performers to assess the usability, creativity support and engagement with the system. Results show a positive emotional engagement with the system which overall was found easy to use and novel. We also discuss several barriers to creative interaction related to the size of the user interface, creative agency and personalisation.},
  doi = {10.1080/09298215.2019.1637439},
  issn = {0929-8215},
  eissn = {1744-5027},
  day = {5},
  publicationstatus = {published}
}
@inproceedings{bruford2019groovenavigation,
  author = {Bruford, F and McDonald, ST and Barthet, M and Sandler, M},
  booktitle = {CEUR Workshop Proceedings},
  month = {Jan},
  title = {Groove explorer: An intelligent visual interface for drum loop library navigation},
  volume = {2327},
  year = {2019},
  abstract = {© 2019 for the individual papers by the papers’ authors. Copying permitted for private and academic purposes. This volume is published and copyrighted by its editors Music producers nowadays rely on increasingly large libraries of loops, samples and virtual instrument sounds as part of the composition process. Intelligent interfaces are therefore useful in enabling navigation of these databases in a way that supports the production workflow. Within virtual drumming software, producers typically rely on large libraries of symbolic drum loops. Due to their large size, navigating and exploring these libraries can be a difficult process. To address this, preliminary work is presented into the Groove Explorer. Using Self-Organizing Maps, a large library of symbolic drum loops is automatically mapped on a 2D space according to rhythmic similarity. This space can then be explored via a Max/MSP prototype interface. Early results suggest that while the algorithm works well for smaller datasets, further development is required, particularly in the similarity metric used, to make the tool scalable to large libraries.},
  issn = {1613-0073},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{weaver2019fillingperformance,
  author = {Weaver, J and Barthet, M and Chew, E},
  booktitle = {147th Audio Engineering Society International Convention 2019},
  month = {Jan},
  title = {Filling the space: The impact of convolution reverberation time on note duration and velocity in duet performance},
  year = {2019},
  abstract = {© 147th Audio Engineering Society International Convention 2019. All rights reserved. The impact of reverberation on musical expressivity is an area of growing interest as technology to simulate, and create, acoustic environments improves. Being able to characterise the impact of acoustic environments on musical performance is a problem of interest to acousticians, designers of virtual environments, and algorithmic composers. We analyse the impact of convolution reverberation time on note duration and note velocity, which serve as markers of musical expressivity. To improve note clarity in situations of long reverberation times, we posit musicians performing in a duo would lengthen the separation between notes (note duration) and increase loudness (note velocity) contrast. The data for this study comprises of MIDI messages extracted from performances by 2 co-located pianists playing the same piece of music 100 times across 5 different reverberation conditions. To our knowledge, this is the largest data set to date looking at piano duo performance in a range of reverberation conditions. In contrast to prior work the analysis considers both the entire performance as well as an excerpt at the opening part of the piece featuring a key structural element of the score. This analysis finds convolution reverberation time is found to be moderately positively correlated with mean note duration (r = 0.34 and p =< 0.001), but no significant correlation was found between convolution reverberation time and mean note velocity (r = −0.19 and p = 0.058).},
  day = {1},
  publicationstatus = {published}
}
@misc{liu2019automaticrepresentation,
  author = {Liu, L and Benetos, E},
  month = {Dec},
  title = {Automatic Music Accompaniment with a Chroma-based Music Data Representation},
  year = {2019},
  conference = {DMRN+14: Digital Music Research Network One-day Workshop},
  day = {17}
}
@inproceedings{wang2019cbfperidbanalysis,
  author = {Wang, C and Benetos, E and Chew, E},
  booktitle = {},
  month = {Nov},
  organization = {Delft, The Netherlands},
  title = {CBF-periDB: A Chinese Bamboo Flute Dataset for Periodic Modulation Analysis},
  year = {2019},
  abstract = {We present CBF-periDB, a dataset of Chinese bamboo flute performances for ecologically valid analysis of periodic modulations in context. The dataset contains monophonic recordings of four types of isolated playing techniques and twenty expert full-length performances of classic Chinese bamboo flute pieces. Four types of playing techniques---vibrato, tremolo, trill, and flutter-tongue---are thoroughly annotated by the players themselves. These techniques are also referred to as periodic modulations due to their periodic patterns in the time-frequency domain. All recordings and annotations can be downloaded from c4dm.eecs.qmul.ac.uk/CBFdataset.html.},
  startyear = {2019},
  startmonth = {Nov},
  startday = {4},
  finishyear = {2019},
  finishmonth = {Nov},
  finishday = {8},
  keyword = {Dataset},
  keyword = {Music playing techniques},
  keyword = {Chinese bamboo flute},
  conference = {International Society for Music Information Retrieval Conference Late-Breaking Demo Session},
  day = {4},
  publicationstatus = {accepted}
}
@inproceedings{ycart2019blendingtranscription,
  author = {Ycart, A and McLeod, A and Benetos, E and Yoshii, K},
  booktitle = {},
  month = {Nov},
  organization = {Delft, The Netherlands},
  pages = {454--461},
  title = {Blending acoustic and language model predictions for automatic music transcription},
  url = {https://ismir2019.ewi.tudelft.nl/},
  year = {2019},
  abstract = {In this paper, we introduce a method for converting an input probabilistic piano roll (the output of a typical multi-pitch detection model) into a binary piano roll. The task is an important step for many automatic music transcription systems with the goal of converting an audio recording into some symbolic format. Our model has two components: an LSTM-based music language model (MLM) which can be trained on any MIDI data, not just that aligned with audio; and a blending model used to combine the probabilities of the MLM with those of the input probabilistic piano roll given by an acoustic multi-pitch detection model, which must be trained on (a comparably small amount of) aligned data. We use scheduled sampling to make the MLM robust to noisy sequences during testing. We analyze the performance of our model on the MAPS dataset using two different timesteps (40ms and 16th-note), comparing it against a strong baseline hidden Markov model with a training method not used before for the task to our knowledge. We report a statistically significant improvement over HMM decoding in terms of notewise F-measure with both timesteps, with 16th note timesteps improving further compared to 40ms timesteps.},
  startyear = {2019},
  startmonth = {Nov},
  startday = {4},
  finishyear = {2019},
  finishmonth = {Nov},
  finishday = {8},
  conference = {20th conference of the International Society for Music Information Retrieval (ISMIR)},
  day = {4},
  publicationstatus = {accepted}
}
@inproceedings{holzapfel2019automaticstudy,
  author = {Holzapfel, A and Benetos, E},
  booktitle = {},
  month = {Nov},
  organization = {Delft, The Netherlands},
  pages = {678--684},
  title = {Automatic music transcription and ethnomusicology: a user study},
  url = {https://ismir2019.ewi.tudelft.nl/},
  year = {2019},
  abstract = {Converting an acoustic music signal into music notation using a computer program has been at the forefront of music information research for several decades, as a task referred to as automatic music transcription (AMT). However, current AMT research is still constrained to system development followed by quantitative evaluations; it is still unclear whether the performance of AMT methods is considered sufficient to be used in the everyday practice of music scholars. In this paper, we propose and carry out a user study on evaluating the usefulness of automatic music transcription in the context of ethnomusicology. As part of the study, we recruited 16 participants who were asked to transcribe short musical excerpts either from scratch or using the output of an AMT system as a basis. We collect and analyze quantitative measures such as transcription time and effort, and a range of qualitative feedback from study participants, which includes user needs, criticisms of AMT technologies, and links between perceptual and quantitative evaluations on AMT outputs. The results show no quantitative advantage of using AMT, but important indications regarding appropriate user groups and evaluation measures are provided.},
  startyear = {2019},
  startmonth = {Nov},
  startday = {4},
  finishyear = {2019},
  finishmonth = {Nov},
  finishday = {8},
  conference = {20th conference of the International Society for Music Information Retrieval (ISMIR)},
  day = {4},
  publicationstatus = {accepted}
}
@inproceedings{wang2019adaptivesignals,
  author = {Wang, C and Benetos, E and Lostanlen, V and Chew, E},
  booktitle = {},
  month = {Nov},
  organization = {Delft, The Netherlands},
  pages = {809--815},
  title = {Adaptive Time–Frequency Scattering for Periodic Modulation Recognition in Music Signals},
  year = {2019},
  abstract = {Vibratos, tremolos, trills, and flutter-tongue are techniques frequently found in vocal and instrumental music. A common feature of these techniques is the periodic modulation in the time--frequency domain. We propose a representation based on time--frequency scattering to model the inter-class variability for fine discrimination of these periodic modulations. Time--frequency scattering is an instance of the scattering transform, an approach for building invariant, stable, and informative signal representations. The proposed representation is calculated around the wavelet subband of maximal acoustic energy, rather than over all the wavelet bands. To demonstrate the feasibility of this approach, we build a system that computes the representation as input to a machine learning classifier. Whereas previously published datasets for playing technique analysis focus primarily on techniques recorded in isolation, for ecological validity, we create a new dataset to evaluate the system. The dataset, named CBF-periDB, contains full-length expert performances on the Chinese bamboo flute that have been thoroughly annotated by the players themselves. We report F-measures of 99\% for flutter-tongue, 82\% for trill, 69\% for vibrato, and 51\% for tremolo detection, and provide explanatory visualisations of scattering coefficients for each of these techniques.},
  startyear = {2019},
  startmonth = {Nov},
  startday = {4},
  finishyear = {2019},
  finishmonth = {Nov},
  finishday = {8},
  conference = {International Society for Music Information Retrieval Conference},
  day = {4},
  publicationstatus = {accepted}
}
@inproceedings{ycart2019atransduction,
  author = {Ycart, A and Stoller, D and Benetos, E},
  booktitle = {},
  month = {Nov},
  organization = {Delft, The Netherlands},
  pages = {470--477},
  title = {A Comparative Study of Neural Models for Polyphonic Music Sequence Transduction},
  url = {https://ismir2019.ewi.tudelft.nl/},
  year = {2019},
  abstract = {Automatic transcription of polyphonic music remains a challenging task in the field of Music Information Retrieval. One under-investigated point is the post-processing of time-pitch posteriograms into binary piano rolls.
In this study, we investigate this task using a variety of neural network models and training procedures. We introduce an adversarial framework, that we compare against more traditional training losses. We also propose the use of binary neuron outputs and compare them to the usual real-valued outputs in both training frameworks. This allows us to train networks directly using the F-measure as training objective. We evaluate these methods using two kinds of transduction networks and two different multi-pitch detection systems, and compare the results against baseline note-tracking methods on a dataset of classical piano music. Analysis of results indicates that (1) convolutional models improve results over baseline models, but no improvement is reported for recurrent models; (2) supervised losses are superior to adversarial ones; (3) binary neurons do not improve results; (4) cross-entropy loss results in better or equal performance compared to the F-measure loss.},
  startyear = {2019},
  startmonth = {Nov},
  startday = {4},
  finishyear = {2019},
  finishmonth = {Nov},
  finishday = {8},
  conference = {20th conference of the International Society for Music Information Retrieval (ISMIR)},
  day = {4},
  publicationstatus = {accepted}
}
@inproceedings{subramanian2019robustnessclassification,
  author = {SUBRAMANIAN, V and Benetos, E and Sandler, M},
  booktitle = {http://dcase.community/workshop2019/},
  month = {Oct},
  organization = {New York, USA},
  pages = {239--243},
  title = {Robustness of Adversarial Attacks in Sound Event Classification},
  url = {http://dcase.community/workshop2019/},
  year = {2019},
  abstract = {An adversarial attack is a method to generate perturbations to the input of a machine learning model in order to make the output of the model incorrect. The perturbed inputs are known as adversarial examples. In this paper, we investigate the robustness of adversarial examples to simple input transformations such as mp3 compression, resampling, white noise and reverb in the task of sound event classification. By performing this analysis, we aim to provide insights on strengths and weaknesses in current adversarial attack algorithms as well as provide a baseline for defenses against adversarial attacks. Our work shows that adversarial attacks are not robust to simple input transformations. White noise is the most consistent method to defend against adversarial attacks with a success rate of 73.72\% averaged across all models and attack algorithms.},
  startyear = {2019},
  startmonth = {Oct},
  startday = {25},
  finishyear = {2019},
  finishmonth = {Oct},
  finishday = {26},
  conference = {4th Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE 2019)},
  day = {25},
  publicationstatus = {accepted}
}
@inproceedings{pankajakshan2019onsetsmodelling,
  author = {Pankajakshan, A and Bear, H and Benetos, E},
  booktitle = {http://dcase.community/workshop2019/},
  month = {Oct},
  organization = {New York, USA},
  pages = {174--178},
  title = {Onsets, activity, and events: a multi-task approach for polyphonic sound event modelling},
  url = {http://dcase.community/workshop2019/},
  year = {2019},
  abstract = {State of the art polyphonic sound event detection (SED) systems function as frame-level multi-label classification models. In the context of dynamic polyphony levels at each frame, sound events interfere with each other which degrade a classifier's ability to learn the exact frequency profile of individual sound events. Frame-level localized classifiers also fail to explicitly model the long-term temporal structure of sound events. Consequently, the event-wise detection performance is less than the segment-wise detection. We define 'temporally precise polyphonic sound event detection' as the subtask of detecting sound event instances with the correct onset. Here, we investigate the effectiveness of sound activity detection (SAD) and onset detection as auxiliary tasks to improve temporal precision in polyphonic SED using multi-task learning. SAD helps to differentiate event activity frames from noisy and silence frames and helps to avoid missed detections at each frame. Onset predictions ensure the start of each event which in turn are used to condition predictions of both SAD and SED. Our experiments on the URBAN-SED dataset show that by conditioning SED with onset detection and SAD, there is over a three-fold relative improvement in event-based F-score.},
  startyear = {2019},
  startmonth = {Oct},
  startday = {25},
  finishyear = {2019},
  finishmonth = {Oct},
  finishday = {26},
  conference = {4th Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE 2019)},
  day = {25},
  publicationstatus = {accepted}
}
@inproceedings{singh2019audiolayer,
  author = {Singh, S and Pankajakshan, A and Benetos, E},
  booktitle = {http://dcase.community/workshop2019/},
  month = {Oct},
  organization = {New York, USA},
  pages = {234--238},
  title = {Audio tagging using a linear noise modelling layer},
  url = {http://dcase.community/workshop2019/},
  year = {2019},
  abstract = {Label noise refers to the presence of inaccurate target labels in a dataset. It is an impediment to the performance of a deep neural network (DNN) as the network tends to overfit to the label noise, hence it becomes imperative to devise a generic methodology to counter the effects of label noise. FSDnoisy18k is an audio dataset collected with the aim of encouraging research on label noise for sound event classification. The dataset contains ~42.5 hours of audio recordings divided across 20 classes, with a small amount of manually verified labels and a large amount of noisy data. Using this dataset, our work intends to explore the potential of modelling the label noise distribution by adding a linear layer on top of a baseline network. The accuracy of the approach is compared to an alternative approach of adopting a noise robust loss function. Results show that modelling the noise distribution improves the accuracy of the baseline network in a similar capacity to the soft bootstrapping loss.},
  startyear = {2019},
  startmonth = {Oct},
  startday = {25},
  finishyear = {2019},
  finishmonth = {Oct},
  finishday = {26},
  conference = {4th Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE 2019)},
  day = {25},
  publicationstatus = {accepted}
}
@inproceedings{pankajakshan2019polyphonicapproach,
  author = {Pankajakshan, A and Bear, H and Benetos, E},
  booktitle = {http://www.waspaa.com/},
  month = {Oct},
  organization = {New Paltz, NY, USA},
  pages = {318--322},
  publisher = {IEEE},
  title = {Polyphonic sound event and sound activity detection: a multi-task approach},
  url = {http://www.eecs.qmul.ac.uk/profiles/pankajakshanarjun.html},
  year = {2019},
  abstract = {Polyphonic Sound Event Detection (SED) in real-world recordings is a challenging task because of the dynamic polyphony level, intensity, and duration of sound events. Current polyphonic SED systems fail to model the temporal structure of sound events explicitly and instead attempt to look at which sound events are present at each audio frame. Consequently, the event-wise detection performance is much lower than the segment-wise detection performance. In this work, we propose a joint model approach to improve the temporal localization of sound events using a multi-task learning setup. The first task predicts which sound events are present at each time frame; we call this branch 'Sound Event Detection (SED) model', while the second task predicts if a sound event is present or not at each frame; we call this branch 'Sound Activity Detection (SAD) model'. We verify the proposed joint model by comparing it with a separate implementation of both tasks aggregated together from individual task predictions. Our experiments on the URBAN-SED dataset show that the proposed joint model can alleviate False Positive (FP) and False Negative (FN) errors and improve both the segment-wise and the event-wise metrics.},
  startyear = {2019},
  startmonth = {Oct},
  startday = {20},
  finishyear = {2019},
  finishmonth = {Oct},
  finishday = {23},
  conference = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics},
  day = {20},
  publicationstatus = {accepted}
}
@inproceedings{viannalordelo2019investigatingseparation,
  author = {Vianna Lordelo, C and Benetos, E and Dixon, S and Ahlbäck, S},
  booktitle = {http://www.waspaa.com/},
  month = {Oct},
  organization = {New Paltz, NY, USA},
  pages = {40--44},
  publisher = {IEEE},
  title = {Investigating kernel shapes and skip connections for deep learning-based harmonic-percussive separation},
  url = {http://www.eecs.qmul.ac.uk/profiles/viannalordelocarlospedro.html},
  year = {2019},
  abstract = {In this paper we propose an efficient deep learning encoder-decoder network for performing Harmonic-Percussive Source Separation (HPSS). It is shown that we are able to greatly reduce the number of model trainable parameters by using a dense arrangement of skip connections between the model layers. We also explore the utilisation of different kernel sizes for the 2D filters of the convolutional layers with the objective of allowing the network to learn the different time-frequency patterns associated with percussive and harmonic sources more efficiently. The training and evaluation of the separation has been done using the training and test sets of the MUSDB18 dataset. Results show that the proposed deep network achieves automatic learning of high-level features and maintains HPSS performance at a state-of-the-art level while reducing the number of parameters and training time.},
  startyear = {2019},
  startmonth = {Oct},
  startday = {20},
  finishyear = {2019},
  finishmonth = {Oct},
  finishday = {23},
  conference = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics},
  day = {20},
  publicationstatus = {published}
}
@inproceedings{bear2019cityscenes,
  author = {Bear, H and Heittola, T and Mesaros, A and Benetos, E and Virtanen, T},
  booktitle = {http://www.waspaa.com/},
  month = {Oct},
  organization = {New Paltz, NY, USA},
  pages = {11--15},
  publisher = {IEEE},
  title = {City classification from multiple real-world sound scenes},
  year = {2019},
  abstract = {The majority of sound scene analysis work focuses on one of two clearly defined tasks: acoustic scene classification or sound event detection. Whilst this separation of tasks is useful for problem definition, they inherently ignore some subtleties of the real-world, in particular how humans vary in how they describe a scene. Some will describe the weather and features within it, others will use a holistic descriptor like 'park', and others still will use unique identifiers such as cities or names. In this paper, we undertake the task of automatic city classification to ask whether we can recognize a city from a set of sound scenes? In this problem each city has recordings from multiple scenes. We test a series of methods for this novel task and show that a simple convolutional neural network (CNN) can achieve accuracy of 50\%. This is less than the acoustic scene classification task baseline in the DCASE 2018 ASC challenge on the same data. A simple adaptation to the class labels of pairing city labels with grouped scenes, accuracy increases to 52\%, closer to the simpler scene classification task. Finally we also formulate the problem in a multi-task learning framework and achieve an accuracy of 56\%, outperforming the aforementioned approaches.},
  startyear = {2019},
  startmonth = {Oct},
  startday = {20},
  finishyear = {2019},
  finishmonth = {Oct},
  finishday = {23},
  conference = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics},
  day = {20},
  publicationstatus = {published}
}
@inproceedings{bear2019towardsrecognition,
  author = {Bear, H and Nolasco, I and Benetos, E},
  booktitle = {},
  month = {Sep},
  organization = {Graz, Austria},
  pages = {4594--4598},
  publisher = {International Speech Communication Association (ISCA)},
  title = {Towards joint sound scene and polyphonic sound event recognition},
  url = {https://www.interspeech2019.org/},
  year = {2019},
  abstract = {Acoustic Scene Classification (ASC) and Sound Event Detection (SED) are two separate tasks in the field of computational sound scene analysis. In this work, we present a new dataset with both sound scene and sound event labels and use this to demonstrate a novel method for jointly classifying sound scenes and recognizing sound events. We show that by taking a joint approach, learning is more efficient and whilst improvements are still needed for sound event detection, SED results are robust in a dataset where the sample distribution is skewed towards sound scenes.},
  startyear = {2019},
  startmonth = {Sep},
  startday = {15},
  finishyear = {2019},
  finishmonth = {Sep},
  finishday = {19},
  conference = {20th Annual Conference of the International Speech Communication Association (INTERSPEECH 2019)},
  day = {15},
  publicationstatus = {accepted}
}
@inproceedings{chettri2019ensembleverification,
  author = {Chettri, B and Stoller, D and Morfi, V and Martinez Ramirez, M and Benetos, E and Sturm, B},
  booktitle = {},
  month = {Sep},
  organization = {Graz, Austria},
  pages = {1018--1022},
  publisher = {International Speech Communication Association (ISCA)},
  title = {Ensemble Models for Spoofing Detection in Automatic Speaker Verification},
  url = {https://www.interspeech2019.org/},
  year = {2019},
  abstract = {Detecting spoofing attempts of automatic speaker verification (ASV) systems is challenging, especially when using only one modelling approach. For robustness, we use both deep neural networks and traditional machine learning models and combine them as ensemble models through logistic regression. They are trained to detect logical access (LA) and physical access (PA) attacks on the dataset released as part of the ASV Spoofing and Countermeasures Challenge 2019. We propose dataset partitions that ensure different attack types are present during training and validation to improve system robustness. Our ensemble model outperforms all our single models and the baselines from the challenge for both attack types. We investigate why some models on the PA dataset strongly outperform others and find that spoofed recordings in the dataset tend to have longer silences at the end than genuine ones. By removing them, the PA task becomes much more challenging, with the tandem detection cost function (t-DCF) of our best single model rising from 0.1672 to 0.5018 and equal error rate (EER) increasing from 5.98\% to 19.8\% on the development set.},
  startyear = {2019},
  startmonth = {Jul},
  startday = {15},
  finishyear = {2019},
  finishmonth = {Sep},
  finishday = {19},
  conference = {20th Annual Conference of the International Speech Communication Association (INTERSPEECH 2019)},
  day = {15},
  publicationstatus = {accepted}
}
@article{zhou2019adaptivenmf,
  author = {Zhou, Q and Feng, Z and Benetos, E},
  journal = {Sensors},
  month = {Jul},
  number = {14},
  publisher = {MDPI AG},
  title = {Adaptive Noise Reduction for Sound Event Detection Using Subband-Weighted NMF},
  url = {https://www.mdpi.com/},
  volume = {19},
  year = {2019},
  abstract = {Sound event detection in real-world environments suffers from the interference of non-stationary and time-varying noise. This paper presents an adaptive noise reduction method for sound event detection based on non-negative matrix factorization (NMF). First, a scheme for noise dictionary learning from the input noisy signal is employed by the technique of robust NMF, which supports adaptation to noise variations. The estimated noise dictionary is used to develop a supervised source separation framework in combination with a pre-trained event dictionary. Second, to improve the separation quality, we extend the basic NMF model to a weighted form, with the aim of varying the relative importance of the different components when separating a target sound event from noise. With properly designed weights, the separation process is forced to rely more on those dominant event components, whereas the noise gets greatly suppressed. The proposed method is evaluated on a dataset of the rare sound event detection task of the DCASE 2017 challenge, and achieves comparable results to the top-ranking system based on convolutional recurrent neural networks (CRNNs). The proposed weighted NMF method shows an excellent noise reduction ability, and achieves an improvement of an F-score by 5\%, compared to the unweighted approach.},
  doi = {10.3390/s19143206},
  issn = {1424-2818},
  day = {20},
  publicationstatus = {published}
}
@article{covas2019optimalforecasting,
  author = {Covas, E and Benetos, E},
  journal = {Chaos},
  month = {Jun},
  number = {6},
  publisher = {AIP Publishing},
  title = {Optimal Neural Network Feature Selection for Spatial-Temporal Forecasting},
  url = {https://aip.scitation.org/journal/cha},
  volume = {29},
  year = {2019},
  abstract = {Neural networks, and in general machine learning techniques, have been widely employed in forecasting time series and more recently in predicting spatial-temporal signals. All of these approaches involve some kind of feature selection regarding what past data and what neighbour data to use for forecasting. In this article, we show extensive empirical evidence on how to independently construct the optimal feature selection or input representation used by  the input layer of a feed forward neural network for the purpose of forecasting spatial-temporal signals. The approach is based on results from dynamical systems theory, namely non-linear embedding theorems. We demonstrate it for a variety of spatial-temporal signals, and show that the optimal input layer representation consists of a grid, with spatial/temporal lags determined by the minimum of the mutual information of the spatial/temporal signals and the number of points taken in space/time decided by the embedding dimension of the signal. We present evidence of this proposal by running a Monte Carlo simulation of several combinations of input layer feature designs and show that the one predicted by the non-linear embedding theorems seems to be optimal or close to being  optimal. In total we show evidence in four unrelated systems: a series of coupled Henon maps; a series of coupled ordinary differential Equations (Lorenz-96) phenomenologically modelling atmospheric dynamics; the Kuramoto-Sivashinsky equation, a partial differential equation used in studies of instabilities in laminar flame fronts and finally real physical  data from sunspot areas in the Sun (in latitude and time) from 1874 to 2015. These four examples cover the range from simple toy models  to complex non-linear dynamical simulations, and real data. Finally, we also compare our proposal against alternative feature selection methods, and show that it  also works for other machine learning forecasting models.},
  doi = {10.1063/1.5095060},
  issn = {1089-7682},
  day = {20},
  publicationstatus = {published}
}
@inproceedings{ragano2019adaptingevaluation,
  author = {Ragano, A and BENETOS, E and Hines, A},
  booktitle = {https://www.qomex2019.de/},
  month = {Jun},
  organization = {Berlin, Germany},
  title = {Adapting the Quality of Experience Framework for Audio Archive Evaluation},
  year = {2019},
  abstract = {Perceived quality of historical audio material that is subjected to digitisation and restoration is typically evaluated by individual judgements or with inappropriate objective quality models. This paper presents a Quality of Experience (QoE) framework for predicting perceived audio quality of sound archives. The approach consists in adapting concepts used in QoE evaluation to digital audio archives. Limitations of current objective quality models employed in audio archives are provided and reasons why a QoE-based framework can overcome these limitations are discussed. This paper shows that applying a QoE framework to audio archives is feasible and it helps to identify the stages, stakeholders and models for a QoE centric approach.},
  startyear = {2019},
  startmonth = {Jun},
  startday = {5},
  finishyear = {2019},
  finishmonth = {Jun},
  finishday = {7},
  conference = {11th International Conference on Quality of Multimedia Experience},
  day = {5},
  publicationstatus = {accepted}
}
@inproceedings{wang2019hmmbasedflute,
  author = {WANG, C and BENETOS, E and MENG, X and CHEW, E},
  booktitle = {Proceedings of Sound and Music Computing Conference},
  month = {May},
  organization = {Malaga, Spain},
  pages = {545--550},
  title = {HMM-based Glissando Detection for Recordings of Chinese Bamboo Flute},
  year = {2019},
  abstract = {Playing techniques such as ornamentations and articulation effects constitute important aspects of music performance. However, their computational analysis is still at an early stage due to a lack of instrument diversity, established methodologies and informative data. Focusing on the Chinese bamboo flute, we introduce a two-stage glissando detection system based on hidden Markov models (HMMs) with Gaussian mixtures. A rule-based segmentation process extracts glissando candidates that are consecutive note changes in the same direction. Glissandi are then identified by two HMMs. The study uses a newly created dataset of Chinese bamboo flute recordings, including both isolated glissandi and real-world pieces. The results, based on both frame- and segment-based evaluation for ascending and descending glissandi respectively, confirm the feasibility of the proposed method for glissando detection. Better detection performance of ascending glissandi over descending ones is obtained due to their more regular patterns. Inaccurate pitch estimation forms a main obstacle for successful fully-automated glissando detection. The dataset and method can be used for performance analysis.},
  startyear = {2019},
  startmonth = {May},
  startday = {28},
  finishyear = {2019},
  finishmonth = {May},
  finishday = {31},
  conference = {Sound and Music Computing Conference},
  day = {28},
  publicationstatus = {accepted}
}
@inproceedings{phaye2019subspectralnetclassification,
  author = {Phaye, SSR and BENETOS, E and Wang, Y},
  booktitle = {},
  month = {May},
  organization = {Brighton, UK},
  publisher = {IEEE},
  title = {SubSpectralNet - Using sub-spectrogram based convolutional neural networks for acoustic scene classification},
  url = {https://ssrp.github.io/},
  year = {2019},
  abstract = {Acoustic Scene Classification (ASC) is one of the core research problems in the field of Computational Sound Scene Analysis. In this work, we present SubSpectralNet, a novel model which captures discriminative features by incorporating frequency band-level differences to model soundscapes. Using mel-spectrograms, we propose the idea of using band-wise crops of the input time-frequency representations and train a convolutional neural network~(CNN) on the same. We also propose a modification in the training method for more efficient learning of the CNN models. We first give a motivation for using sub-spectrograms by giving intuitive and statistical analyses and finally we develop a sub-spectrogram based CNN architecture for ASC. The system is evaluated on the public ASC development dataset provided for the "Detection and Classification of Acoustic Scenes and Events" (DCASE) 2018 Challenge. Our best model achieves an improvement of +14\% in terms of classification accuracy with respect to the DCASE 2018 baseline system. Code and figures are available at https://github.com/ssrp/SubSpectralNet},
  doi = {10.1109/ICASSP.2019.8683288},
  startyear = {2019},
  startmonth = {May},
  startday = {12},
  finishyear = {2019},
  finishmonth = {May},
  finishday = {17},
  conference = {IEEE International Conference on Acoustics, Speech, and Signal Processing},
  day = {12},
  publicationstatus = {accepted}
}
@inproceedings{lins2019automaticrecordings,
  author = {Lins, F and Johann, M and BENETOS, E and Schramm, R},
  booktitle = {},
  month = {May},
  organization = {Brighton, UK},
  publisher = {IEEE},
  title = {Automatic Transcription of Diatonic Harmonica Recordings},
  url = {https://2019.ieeeicassp.org/},
  year = {2019},
  abstract = {This paper presents a method for automatic transcription of the diatonic Harmonica instrument. It estimates the multi-pitch activations through a spectrogram factorisation framework. This framework is based on Probabilistic Latent Component Analysis (PLCA) and uses a fixed 4-dimensional dictionary with spectral templates extracted from Harmonica's instrument timbre. Methods based on spectrogram factorisation may suffer from local-optima issues in the presence of harmonic overlap or considerable timbre variability. To alleviate this issue, we propose a set of harmonic constraints that are inherent to the Harmonica instrument note layout or are caused by specific diatonic Harmonica playing techniques. These constraints help to guide the factorisation process until  convergence into meaningful multi-pitch activations is achieved. This work also builds a new audio dataset containing solo recordings of diatonic Harmonica excerpts and the respective multi-pitch annotations. We compare our proposed approach against multiple baseline techniques for automatic music transcription on this dataset and report the results  based on frame-based F-measure statistics.},
  doi = {10.1109/ICASSP.2019.8682334},
  startyear = {2019},
  startmonth = {May},
  startday = {12},
  finishyear = {2019},
  finishmonth = {May},
  finishday = {17},
  conference = {IEEE International Conference on Acoustics, Speech, and Signal Processing},
  day = {12},
  publicationstatus = {accepted}
}
@inproceedings{nolasco2019audiobasedstates,
  author = {Nolasco, I and Terenzi, A and Cecchi, S and Orcioni, S and BEAR, H and BENETOS, E},
  booktitle = {},
  month = {May},
  organization = {Brighton, UK},
  publisher = {IEEE},
  title = {Audio-based identification of beehive states},
  url = {https://2019.ieeeicassp.org/},
  year = {2019},
  abstract = {The absence of the queen in a beehive is a very strong indicator of the need for beekeeper intervention. Manually searching for the queen is an arduous recurrent task for beekeepers that disrupts the normal life cycle of the beehive and can be a source of stress for bees. Sound is an indicator for signalling different states of the beehive, including the absence of the queen bee. In this work, we apply machine learning methods to automatically recognise different states in a beehive using audio as input. We investigate both support vector machines and convolutional neural networks for beehive state recognition, using audio data of beehives collected from the NU-Hive project. Results indicate the potential of machine learning methods as well as the challenges of generalizing the system to new hives.},
  doi = {10.1109/ICASSP.2019.8682981},
  startyear = {2019},
  startmonth = {May},
  startday = {12},
  finishyear = {2019},
  finishmonth = {May},
  finishday = {17},
  conference = {IEEE International Conference on Acoustics, Speech, and Signal Processing},
  day = {12},
  publicationstatus = {published}
}
@inproceedings{mishra2019ganbasednetworks,
  author = {MISHRA, S and STOLLER, D and BENETOS, E and STURM, B and DIXON, S},
  booktitle = {https://sites.google.com/view/safeml-iclr2019},
  month = {May},
  organization = {New Orleans, USA},
  title = {GAN-based Generation and Automatic Selection of Explanations for Neural Networks},
  url = {https://sites.google.com/site/saumitramishrac4dm/},
  year = {2019},
  abstract = {One way to interpret trained deep neural networks (DNNs) is by inspecting characteristics that neurons in the model respond to, such as by iteratively optimising the model input (e.g., an image) to maximally activate specific neurons. However, this requires a careful selection of hyper-parameters to generate interpretable examples for each neuron of interest, and current methods rely on a manual, qualitative evaluation of each setting, which is prohibitively slow. We introduce a new metric that uses Fréchet Inception Distance (FID) to encourage similarity between model activations for real and generated data. This provides an efficient way to evaluate a set of generated examples for each setting of hyper-parameters. We also propose a novel GAN-based method for generating explanations that enables an efficient search through the input space and imposes a strong prior favouring realistic outputs. We apply our approach to a classification model trained to predict whether a music audio recording contains singing voice. Our results suggest that this proposed metric successfully selects hyper-parameters leading to interpretable examples, avoiding the need for manual evaluation. Moreover, we see that examples synthesised to maximise or minimise the predicted probability of singing voice presence exhibit vocal or non-vocal characteristics, respectively, suggesting that our approach is able to generate suitable explanations for understanding concepts learned by a neural network.},
  startyear = {2019},
  startmonth = {May},
  startday = {6},
  finishyear = {2019},
  finishmonth = {May},
  finishday = {6},
  conference = {SafeML ICLR 2019 Workshop},
  day = {6},
  publicationstatus = {accepted}
}
@article{benetos2019automaticoverview,
  author = {BENETOS, E and DIXON, S and Duan, Z and EWERT, S},
  journal = {IEEE Signal Processing Magazine},
  month = {Jan},
  number = {1},
  pages = {20--30},
  publisher = {Institute of Electrical and Electronics Engineers},
  title = {Automatic Music Transcription: An Overview},
  url = {http://www.eecs.qmul.ac.uk/~emmanouilb/},
  url = {https://ieeexplore.ieee.org/document/8588423},
  volume = {36},
  year = {2019},
  abstract = {The capability of transcribing music audio into music notation is a fascinating example of human intelligence. It involves perception (analyzing complex auditory scenes), cognition (recognizing musical objects), knowledge representation (forming musical structures), and inference (testing alternative hypotheses). Automatic music transcription (AMT), i.e., the design of computational algorithms to convert acoustic music signals into some form of music notation, is a challenging task in signal processing and artificial intelligence. It comprises several subtasks, including multipitch estimation (MPE), onset and offset detection, instrument recognition, beat and rhythm tracking, interpretation of expressive timing and dynamics, and score typesetting.},
  doi = {10.1109/MSP.2018.2869928},
  issn = {1053-5888},
  eissn = {1558-0792},
  day = {1},
  publicationstatus = {published}
}
@article{men2019designingenvironments,
  author = {Men, L and Bryan-Kinns, N and Bryce, L},
  journal = {PeerJ Computer Science},
  month = {Nov},
  pages = {1--39},
  publisher = {PeerJ},
  title = {Designing spaces to support collaborative creativity in shared virtual environments},
  volume = {5},
  year = {2019},
  abstract = {Shared virtual environments (SVEs) have been researched extensively within the fields of education, entertainment, work, and training, yet there has been limited research on the creative and collaborative aspects of interactivity in SVEs. The important role that creativity and collaboration play in human society raises the question of the way that virtual working spaces might be designed to support collaborative creativity in SVEs. In this paper, we outline an SVE named LeMo, which allows two people to collaboratively create a short loop of music together. Then we present a study of LeMo, in which 52 users composed music in pairs using four different virtual working space configurations. Key findings indicated by results include: (i) Providing personal space is an effective way to support collaborative creativity in SVEs, (ii) personal spaces with a fluid light-weight boundary could provide enough support, worked better and was preferable to ones with rigid boundaries and (iii) a configuration that provides a movable personal space was preferred to one that provided no mobility. Following these findings, five corresponding design implications for shared virtual environments focusing on supporting collaborative creativity are given and conclusions are made.},
  doi = {10.7717/peerj-cs.229},
  issn = {2376-5992},
  eissn = {2376-5992},
  day = {4},
  publicationstatus = {published}
}
@inproceedings{liang2019effectproperties,
  author = {Liang, A and Stewart, R and Freire, R and Bryan-Kinns, N},
  booktitle = {UbiComp/ISWC 2019- - Adjunct Proceedings of the 2019 ACM International Joint Conference on Pervasive and Ubiquitous Computing and Proceedings of the 2019 ACM International Symposium on Wearable Computers},
  month = {Sep},
  pages = {121--124},
  title = {Effect of bonding and washing on electronic textile stretch sensor properties},
  year = {2019},
  abstract = {© 2019 Copyright held by the owner/author(s). Knit e-textile sensors can be used to detect stretch or strain, and when integrated directly into wearable garments, they can be used to detect movement of the human body. However, before they can reliably be used in real-world applications, the garment construction technique and the effects of wear due to washing need to be considered. This paper presents a study examining how thermal bonding and washing affects piezo-resistive textile sensors. Three textile strain sensors are considered all using Technik-tex P130B as the conductive material: i) conductive fabric only, ii) conductive fabric bonded to on one side to Eurojersey fabric, and iii) conductive fabric with Eurojersey bonded on top and bottom of the conductive fabric. The sensors’ performance is evaluated using a tensile tester while monitoring their electrical resistance before and after washing. The findings show that a single layer of bonding is the ideal construction and that after three wash cycles the sensor remains reliable.},
  doi = {10.1145/3341162.3343817},
  isbn = {9781450368698},
  day = {9},
  publicationstatus = {published}
}
@article{liang2019analysissensors,
  author = {Liang, A and Stewart, R and Bryan-Kinns, N},
  journal = {Sensors (Switzerland)},
  month = {Aug},
  number = {16},
  title = {Analysis of sensitivity, linearity, hysteresis, responsiveness, and fatigue of textile knit stretch sensors},
  volume = {19},
  year = {2019},
  abstract = {© 2019 by the authors. Licensee MDPI, Basel, Switzerland. Wearable technology is widely used for collecting information about the human body and its movement by placing sensors on the body. This paper presents research into electronic textile strain sensors designed specifically for wearable applications which need to be lightweight, robust, and comfortable. In this paper, sixteen stretch sensors, each with different conductive stretch fabrics, are evaluated: EeonTex (Eeonyx Corporation), knitted silver-plated yarn, and knitted spun stainless steel yarn. The sensors’ performance is tested using a tensile tester while monitoring their resistance with a microcontroller. Each sensor was analyzed for its sensitivity, linearity, hysteresis, responsiveness, and fatigue through a series of dynamic and static tests. The findings show that for wearable applications a subset of the silver-plated yarn sensors had better ranked performance in terms of sensitivity, linearity, and steady state. EeonTex was found to be the most responsive, and the stainless steel yarn performed the worst, which may be due to the characteristics of the knit samples under test.},
  doi = {10.3390/s19163618},
  issn = {1424-8220},
  day = {2},
  publicationstatus = {published}
}
@inproceedings{deacon2019shapingcomposition,
  author = {Deacon, T and Bryan-Kinns, N and Healey, PGT and Barthet, M},
  booktitle = {C and C 2019 - Proceedings of the 2019 Creativity and Cognition},
  month = {Jun},
  pages = {121--132},
  title = {Shaping sounds: The role of gesture in collaborative spatial music composition},
  year = {2019},
  abstract = {© 2019 ACM. This paper presents an observational study of collaborative spatial music composition. We uncover the practical methods two experienced music producers use to coordinate their understanding of multi-modal and spatial representations of music as part of their workflow. We show embodied spatial referencing as a significant feature of the music producers' interactions. Our analysis suggests that gesture is used to understand, communicate and form action through a process of shaping sounds in space. This metaphor highlights how aesthetic assessments are collaboratively produced and developed through coordinated spatial activity. Our implications establish sensitivity to embodied action in the development of collaborative workspaces for creative, spatial-media production of music.},
  doi = {10.1145/3325480.3325493},
  isbn = {9781450359177},
  day = {13},
  publicationstatus = {published}
}
@inproceedings{men2019lemocreativity,
  author = {Men, L and Bryan-Kinns, N},
  booktitle = {C and C 2019 - Proceedings of the 2019 Creativity and Cognition},
  month = {Jun},
  pages = {71--82},
  title = {LeMo: Exploring virtual space for collaborative creativity},
  year = {2019},
  abstract = {© 2019 ACM. Shared Virtual Environments (SVEs) have been extensively researched for education, entertainment, work, and training, yet there has been limited research on the creative aspects of collaboration in SVEs. This raises questions about how to design virtual working spaces to support collaborative creativity in SVEs. In this paper, we outline an SVE named LeMo, which allows two people to create music collaboratively. Then we present a study of LeMo, in which 42 users composed music together using three different virtual working space configurations. Results indicate that (i) two types of territory and working configurations emerged during collaborative composing (ii) when made available to them, personal working spaces were extensively used, and were considered to be essential to successful collaborative music making and (iii) a publicly visible personal working space was preferable to a publicly invisible one. Based on these findings, three corresponding design implications for Shared Virtual Environments focusing on supporting collaborative creativity are given.},
  doi = {10.1145/3325480.3325495},
  isbn = {9781450359177},
  day = {13},
  publicationstatus = {published}
}
@inproceedings{nonnis2019maziautism,
  address = {https://dl.acm.org/citation.cfm?id=3325340},
  author = {Nonnis, A and Bryan-Kinns, NN},
  booktitle = {Proceedings of the 18th ACM International Conference on Interaction Design and Children},
  month = {Jun},
  note = {Best Demo Award},
  publisher = {ACM Press},
  title = {Mazi: a Tangible Toy for Collaborative Play between Children with Autism},
  year = {2019},
  abstract = {Playtime is an important activity for child development as it stimulates as well as predicts cognitive, motor, emotional and social skills. Children with autism may find it difficult to socialize, particularly initiating and maintaining human interactions. Consecutively, it is thought that playing with peers is often a challenge that many children avoid by simply playing in solitary mode. We present the design of Mazi, an e-textile sonic tangible user interface (TUI) designed with the aim of promoting basic social skills; stimulating spontaneous, independent and collaborative play; and providing sensory regulation opportunities. Mazi was tested in a SEN School based in North-East London, with a group of five children with moderate to high support needs autism aged between 6 to 9. The results show great potentials for TUI implementation in educational settings as a way of promoting social skills through carefully designed playful and recreational activities.},
  doi = {10.1145/3311927.3325340},
  startyear = {2019},
  startmonth = {Jun},
  startday = {12},
  finishyear = {2019},
  finishmonth = {Jun},
  finishday = {15},
  isbn = {9781450366908},
  conference = {IDC '19},
  day = {12},
  publicationstatus = {published}
}
@inproceedings{nonnis2019maziplay,
  author = {Nonnis, A and Bryan-Kinns, N},
  booktitle = {Conference on Human Factors in Computing Systems - Proceedings},
  month = {May},
  title = {Mazi: Tangible technologies as a channel for collaborative play},
  year = {2019},
  abstract = {© 2019 Association for Computing Machinery. This paper investigates how haptic and auditory stimulation can be playfully implemented as an accessible and stimulating form of interaction for children. We present the design of Mazi, a sonic Tangible User Interface (TUI) designed to encourage spontaneous and collaborative play between children with high support needs autism. We report on a five week study of Mazi with five children aged between 6 and 9 years old at a Special Education Needs (SEN) school in London, UK. We found that collaborative play emerged from the interaction with the system especially in regards to socialization and engagement. Our study contributes to exploring the potential of user-centered TUI development as a channel to facilitate social interaction while providing sensory regulation for children with SENs.},
  doi = {10.1145/3290605.3300670},
  isbn = {9781450359702},
  day = {2},
  publicationstatus = {published}
}
@inproceedings{thompson2019aapplications,
  author = {Thompson, A and Fazekas, G},
  booktitle = {ACM International Conference Proceeding Series},
  month = {Sep},
  pages = {219--222},
  title = {A model-view-update framework for interactive web audio applications},
  year = {2019},
  abstract = {© 2019 ACM. We present the Flow framework1, a front-end framework for interactive Web applications built on the Web Audio API. It encourages a purely declarative approach to application design by providing a number of abstractions for the creation of HTML, audio processing graphs, and event listeners. In doing so we place the burden of tracking and managing state solely on to the framework rather than the developer. We introduce the Model-View-Update architecture and how it applies to audio application design. The MVU architecture is built on the unidirectional flow of data through pure functions, pushing side effects onto the framework's runtime. Flow conceptualises the audio graph as another View into application state, and uses this conceptualisation to enforce strict separation of the audio and visual output of an application. Future plans for the framework include a robust plug-in system to add support for third-party audio nodes, a time travelling debugger to replay sequences of actions to the runtime, and a bespoke programming language that better aligns with Flow's functional influences.},
  doi = {10.1145/3356590.3356623},
  isbn = {9781450372978},
  day = {18},
  publicationstatus = {published}
}
@inproceedings{bromham2019thewarmth,
  author = {BROMHAM, G and Moffat, D and Barthet, M and Daneilsen, A and Fazekas, G},
  booktitle = {},
  month = {Sep},
  organization = {Nottingham, UK},
  title = {The Impact of Audio Effects Processing on the Perception of Brightness and Warmth},
  year = {2019},
  startyear = {2019},
  startmonth = {Sep},
  startday = {17},
  finishyear = {2019},
  finishmonth = {Sep},
  finishday = {20},
  keyword = {Brightness},
  keyword = {Warmth},
  keyword = {Retro},
  keyword = {Nostalgia},
  keyword = {Music Perception},
  keyword = {Digital Audio Effects},
  keyword = {Timbre},
  conference = {Audio Mostly},
  day = {17},
  publicationstatus = {published}
}
@inproceedings{liang2019transferdetection,
  author = {Liang, B and Fazekas, G and Sandler, M},
  booktitle = {Proceedings of the International Joint Conference on Neural Networks},
  month = {Jul},
  title = {Transfer Learning for Piano Sustain-Pedal Detection},
  volume = {2019-July},
  year = {2019},
  abstract = {© 2019 IEEE. Detecting piano pedalling techniques in polyphonic music remains a challenging task in music information retrieval. While other piano-related tasks, such as pitch estimation and onset detection, have seen improvement through applying deep learning methods, little work has been done to develop deep learning models to detect playing techniques. In this paper, we propose a transfer learning approach for the detection of sustain-pedal techniques, which are commonly used by pianists to enrich the sound. In the source task, a convolutional neural network (CNN) is trained for learning spectral and temporal contexts when the sustain pedal is pressed using a large dataset generated by a physical modelling virtual instrument. The CNN is designed and experimented through exploiting the knowledge of piano acoustics and physics. This can achieve an accuracy score of 0.98 in the validation results. In the target task, the knowledge learned from the synthesised data can be transferred to detect the sustain pedal in acoustic piano recordings. A concatenated feature vector using the activations of the trained convolutional layers is extracted from the recordings and classified into frame-wise pedal press or release. We demonstrate the effectiveness of our method in acoustic piano recordings of Chopin's music. From the cross-validation results, the proposed transfer learning method achieves an average F-measure of 0.89 and an overall performance of 0.84 obtained using the micro-averaged F-measure. These results outperform applying the pre-trained CNN model directly or the model with a fine-tuned last layer.},
  doi = {10.1109/IJCNN.2019.8851724},
  isbn = {9781728119854},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{sheng2019acompressor,
  author = {Sheng, D and Fazekas, G},
  booktitle = {Proceedings of the International Joint Conference on Neural Networks},
  month = {Jul},
  title = {A Feature Learning Siamese Model for Intelligent Control of the Dynamic Range Compressor},
  volume = {2019-July},
  year = {2019},
  abstract = {© 2019 IEEE. In this paper, a siamese DNN model is proposed to learn the characteristics of the audio dynamic range compressor (DRC). This facilitates an intelligent control system that uses audio examples to configure the DRC, a widely used nonlinear audio signal conditioning technique in the areas of music production, speech communication and broadcasting. Several alternative siamese DNN architectures are proposed to learn feature embeddings that can characterise subtle effects due to dynamic range compression. These models are compared with each other as well as handcrafted features proposed in previous work. The evaluation of the relations between the hyperparameters of DNN and DRC parameters are also provided. The best model is able to produce a universal feature embedding that is capable of predicting multiple DRC parameters simultaneously, which is a significant improvement from our previous research. The feature embedding shows better performance than handcrafted audio features when predicting DRC parameters for both mono-instrument audio loops and polyphonic music pieces.},
  doi = {10.1109/IJCNN.2019.8851950},
  isbn = {9781728119854},
  day = {1},
  publicationstatus = {published}
}
@incollection{xamb2019leveragingproduction,
  author = {Xambó, A and Font, F and Fazekas, G and Barthet, M},
  booktitle = {Foundations in Sound Design for Linear Media},
  month = {Jun},
  pages = {248--282},
  title = {Leveraging Online Audio Commons Content for Media Production},
  year = {2019},
  doi = {10.4324/9781315106335-10},
  day = {19}
}
@inproceedings{liang2019pianonetworks,
  author = {Liang, B and Fazekas, G and Sandler, M},
  booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
  month = {May},
  pages = {241--245},
  title = {Piano Sustain-pedal Detection Using Convolutional Neural Networks},
  volume = {2019-May},
  year = {2019},
  abstract = {© 2019 IEEE. Recent research on piano transcription has focused primarily on note events. Very few studies have investigated pedalling techniques, which form an important aspect of expressive piano music performance. In this paper, we propose a novel method for piano sustain-pedal detection based on Convolutional Neural Networks (CNN). Inspired by different acoustic characteristics at the start (pedal onset) versus during the pedalled segment, two binary classifiers are trained separately to learn both temporal dependencies and timbral features using CNN. Their outputs are fused in order to decide whether a portion in a piano recording is played with the sustain pedal. The proposed architecture and our detection system are assessed using a dataset with frame-wise pedal on/off annotations. An average F1 score of 0.74 is obtained for the test set. The method performs better on pieces of Romantic-era composers, who intended to deliver more colours to the piano sound through pedalling techniques.},
  doi = {10.1109/ICASSP.2019.8683505},
  isbn = {9781479981311},
  issn = {1520-6149},
  conference = {IEEE International Conference on Acoustics, Speech and Signal Processing},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{zhang2019coordinationlisteners,
  author = {Zhang, L and Healey, PGT},
  booktitle = {},
  month = {Sep},
  title = {Co-ordination of Head Nods: Asymmetries between Speakers and Listeners},
  year = {2019},
  abstract = {Previous research suggests that if people unconsciously mimic their interaction partner’s movement, they gain social influence. We compare the effectiveness of speakers that mimic listeners’ head nods with speakers that use natural nods in a special customised virtual environment. The results suggest that listeners agreed more with mimicking speakers than natural speakers. However, there are also asymmetries in speaker-listener nodding in the high and low-frequency domain. Listeners nod significantly more than speakers in the high frequency domain. This asymmetry may be an important factor in coordination. We conclude that speaker and listener nods have both different form and different functions.},
  conference = {Proceedings of the 23rd Workshop on the Semantics and Pragmatics of Dialogue},
  day = {5},
  publicationstatus = {published}
}
@inproceedings{clark2019mappinginteractions,
  author = {Clark, L and Cowan, BR and Edwards, J and Edlund, J and Szekely, E and Munteanu, C and Murad, C and Healey, P and Aylett, M and Harte, N and Torre, I and Moore, RK and Doyle, P},
  booktitle = {Conference on Human Factors in Computing Systems - Proceedings},
  month = {May},
  title = {Mapping theoretical and methodological perspectives for understanding speech interface interactions},
  year = {2019},
  abstract = {© 2019 Copyright held by the owner/author(s). The use of speech as an interaction modality has grown considerably through the integration of Intelligent Personal Assistants (IPAs- e.g. Siri, Google Assistant) into smartphones and voice based devices (e.g. Amazon Echo). However, there remain significant gaps in using theoretical frameworks to understand user behaviours and choices and how they may applied to specific speech interface interactions. This part-day multidisciplinary workshop aims to critically map out and evaluate theoretical frameworks and methodological approaches across a number of disciplines and establish directions for new paradigms in understanding speech interface user behaviour. In doing so, we will bring together participants from HCI and other speech related domains to establish a cohesive, diverse and collaborative community of researchers from academia and industry with interest in exploring theoretical and methodological issues in the field.},
  doi = {10.1145/3290607.3299009},
  isbn = {9781450359719},
  day = {2},
  publicationstatus = {published}
}
@inproceedings{galindoesparza2019embodiedtechnology,
  author = {Galindo Esparza, RP and Healey, PGT and Weaver, L and Delbridge, M},
  booktitle = {Conference on Human Factors in Computing Systems - Proceedings},
  month = {May},
  title = {Embodied imagination: An approach to stroke recovery combining participatory performance and interactive technology},
  year = {2019},
  abstract = {© 2019 Association for Computing Machinery. Participatory performance provides methods for exploring social identities and situations in ways that can help people to imagine new ways of being. Digital technologies provide tools that can help people envision these possibilities. We explore this combination through a performance workshop process designed to help stroke survivors imagine new physical and social possibilities by enacting fantasies of "things they always wanted to do". This process uses performance methods combined with specially designed real-time movement visualisations to progressively build fantasy narratives that are enacted with and for other workshop participants. Qualitative evaluations suggest this process successfully stimulates participant’s embodied imagination and generates a diverse range of fantasies. The interactive and communal aspects of the workshop process appear to be especially important in achieving these effects. This work highlights how the combination of performance methods and interactive tools can bring a rich, prospective and political understanding of people’s lived experience to design.},
  doi = {10.1145/3290605.3300735},
  isbn = {9781450359702},
  day = {2},
  publicationstatus = {published}
}
@article{albert2019drawinganalysis,
  author = {Albert, S and Heath, C and Skach, S and Harris, MT and Miller, M and Healey, PGT},
  journal = {Social Interaction Video-Based Studies of Human Sociality},
  month = {Mar},
  number = {1},
  title = {Drawing as transcription: how do graphical techniques inform interaction analysis?},
  volume = {2},
  year = {2019},
  doi = {10.7146/si.v2i1.113145},
  issn = {2446-3620},
  day = {28}
}
@article{theodorou2019engagingresponses,
  author = {Theodorou, L and Healey, PGT and Smeraldi, F},
  journal = {Front Psychol},
  month = {Feb},
  pages = {71--71},
  title = {Engaging With Contemporary Dance: What Can Body Movements Tell us About Audience Responses?},
  url = {https://www.ncbi.nlm.nih.gov/pubmed/30774609},
  volume = {10},
  year = {2019},
  abstract = {In live performances seated audiences have restricted opportunities for response. Some responses are obvious, such as applause and cheering, but there are also many apparently incidental movements including posture shifts, fixing hair, scratching and adjusting glasses. Do these movements provide clues to people's level of engagement with a performance? Our basic hypothesis is that audience responses are part of a bi-directional system of audience-performer communication. This communication is part of what distinguishes live from recorded performance and underpins live performers' moment-to-moment sense of how well a performance is going. Here we investigate the range of visible real-time movements of audiences in four live contemporary dance performances. Video recordings of performers and audiences were analyzed using computer vision techniques for extracting face, hand and body movement data. The meaning of audience movements were analyzed by comparing clips of the audience at moments of maximum and minimum movement to expert and novice judges. The results show that audience clips with the lowest overall movement are judged as displaying the highest engagement. In addition, we found that while there is no systematic relationship between audience and dancers movement, hands seem to play an especially significant role since they move significantly more compared to the rest of the body. We draw on these findings to argue that collective stillness is an especially salient signal of audience engagement.},
  doi = {10.3389/fpsyg.2019.00071},
  issn = {1664-1078},
  keyword = {audience},
  keyword = {contemporary dance},
  keyword = {engagement},
  keyword = {motion tracking},
  keyword = {movement},
  language = {eng},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{bauer2019designingreality,
  author = {Bauer, V and Nagele, A and Baume, C and Cowlishaw, T and Cooke, H and Pike, C and Healey, PGT},
  booktitle = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)},
  month = {Jan},
  pages = {305--311},
  title = {Designing an Interactive and Collaborative Experience in Audio Augmented Reality},
  volume = {11883 LNCS},
  year = {2019},
  abstract = {© Springer Nature Switzerland AG 2019. Audio Augmented Reality (AAR) consists of adding spatial audio entities into the real environment. Existing mobile applications and technologies open questions around interactive and collaborative AAR. This paper proposes an experiment to examine how spatial audio can prompt and support actions in interactive AAR experiences; how distinct auditory information influence collaborative tasks and group dynamics; and how gamified AAR can enhance participatory storytelling. We are developing an interactive multiplayer experience in AAR using the Bose “Frames” audio sunglasses. Four participants at a time will go through a gamified story that attempts to interfere with group dynamics. In this paper we present our AAR platform and collaborative game in terms of experience design, and detail the testing methodology and analysis that we will conduct to answer our research questions.},
  doi = {10.1007/978-3-030-31908-3_20},
  isbn = {9783030319076},
  issn = {0302-9743},
  eissn = {1611-3349},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{mice2019embodiedinstruments,
  address = {Marseille, France},
  author = {Mice, L and Mcpherson, AP},
  booktitle = {14th International Symposium on Computer Music Multidisciplinary Research},
  editor = {Mice, L and Mcpherson, AP},
  month = {Oct},
  organization = {Marseille, France},
  title = {Embodied Cognition in Performers of Large Acoustic Instruments as a Method of Designing New Large Digital Musical Instruments},
  url = {http://www.mat.qmul.ac.uk/students/lia-mice/},
  year = {2019},
  abstract = {We present The Large Instrument Performers Study, an interview-based exploration into how large scale acoustic instrument performers navigate the instrument's size-related aesthetic features during the performance. Through the conceptual frameworks of embodied music cognition and affordance theory, we discuss how the themes that emerged in the interview data reveal the ways size-related aesthetic features of large acoustic instruments influence the instrument performer's choices; how large scale acoustic instruments feature microscopic nuanced performance options; and how despite the preconception of large scale acoustic instruments being scaled up versions of the smaller instrument with the addition of a lower fundamental tone, the instruments o er different sonic and performative features to their smaller counterparts and require precise gestural control that is certainly not scaled up. This is followed by a discussion of how the study  findings could influence design features in new large scale digital musical instruments to result in more nuanced control and timbrally rich instruments, and better understanding of how interfaces and instruments influence performers' choices and as a result music repertoire and performance.},
  startyear = {2019},
  startmonth = {Oct},
  startday = {14},
  finishyear = {2019},
  finishmonth = {Oct},
  finishday = {18},
  keyword = {Embodied Cognition},
  keyword = {Digital Musical Instruments},
  conference = {Computer Music Multidisciplinary Research},
  day = {14},
  publicationstatus = {published}
}
@inproceedings{armitage2019bricolagestudy,
  author = {Armitage, J and Mcpherson, A},
  booktitle = {},
  month = {Sep},
  organization = {University of Nottingham},
  publisher = {ACM},
  title = {Bricolage in a hybrid digital lutherie context: a workshop study},
  url = {http://acm.org/},
  year = {2019},
  abstract = {Interaction design research typically differentiates processes involving hardware and software tools as being led by tinkering and play, versus engineering and conceptualisation. Increasingly however, embedded maker tools and platforms require hybridisation of these processes. In the domain of digital musical instrument (DMI) design, we were motivated to explore the tensions of such a hybrid process. We designed a workshop where groups of DMI designers were given the same partly-finished instrument consisting of four microphones exciting four vibrating string models. Their task was to refine this simple instrument to their liking for one hour using Pure Data software. All groups sought to use the microphone signals to control the instrument’s behaviour in rich and complex ways, but found even apparently simple mappings difficult to realise within the time constraint. We describe the difficulties they encountered and discuss emergent issues with tinkering in and with software. We conclude with further questions and suggestions for designers and technologists regarding embedded DMI design processes and tools.},
  startyear = {2019},
  startmonth = {Sep},
  startday = {17},
  finishyear = {2019},
  finishmonth = {Sep},
  finishday = {20},
  conference = {Audio Mostly},
  day = {17},
  publicationstatus = {accepted}
}
@inproceedings{lepri2019fictionalprototypes,
  author = {LEPRI, G and MCPHERSON, A},
  booktitle = {},
  month = {Jun},
  title = {Fictional Instruments, Real Values: Discovering Musical Backgrounds with Non-Functional Prototypes},
  year = {2019},
  abstract = {The emergence of a new technology can be considered as
the result of social, cultural and technical process. Instrument
designs are particularly in
uenced by cultural and aesthetic
values linked to the speci c contexts and communities
that produced them. In previous work, we ran a design
 ction workshop in which musicians created non-functional
instrument mockups. In the current paper, we report on
an online survey in which music technologists were asked
to speculate on the background of the musicians who designed
particular instruments. Our results showed several
cues for the interpretation of the artefacts' origins, including
physical features, body-instrument interactions, use of
language and references to established music practices and
tools. Tacit musical and cultural values were also identi-
 ed based on intuitive and holistic judgments. Our discussion
highlights the importance of cultural awareness and
context-dependent values on the design and use of interactive
musical systems.},
  conference = {New Interfaces for Musical Expression},
  day = {3},
  publicationstatus = {accepted}
}
@inproceedings{lepri2019makingpractice,
  author = {LEPRI, G and MCPHERSON, A},
  booktitle = {},
  month = {Jun},
  organization = {San Diego California USA},
  title = {Making Up Instruments: Design Fiction for Value Discovery in Communities of Musical Practice},
  url = {http://www.giacomolepri.com/},
  year = {2019},
  abstract = {The design of a new technology entails the materialisation
of values emerging from the specific community, culture and
context in which that technology is created. Within the domain
of musical interaction, HCI research often examines new digital
tools and technologies which can carry unstated cultural
assumptions. This paper takes a step back to present a value
discovery exercise exploring the breadth of perspectives different
communities might have in relation to the values inscribed
in fictional technologies for musical interaction. We conducted
a hands-on activity in which musicians active in different contexts
were invited to envision not-yet-existent musical instruments.
The activity revealed several sources of influence on
participants’ artefacts, including cultural background, instrumental
training, and prior experience with music technology.
Our discussion highlights the importance of cultural awareness
and value rationality for the design of interactive systems
within and beyond the musical domain.},
  conference = {Designing Interactive Systems},
  day = {1},
  publicationstatus = {accepted}
}
@inproceedings{harrison2019accessiblewild,
  author = {Harrison, J and Chamberlain, A and McPherson, AP},
  booktitle = {},
  month = {May},
  pages = {1--6},
  title = {Accessible Instruments in the Wild},
  year = {2019},
  doi = {10.1145/3290607.3313037},
  day = {2}
}
@article{pardue2019realtimeintonation,
  author = {Pardue, LS and McPherson, A},
  journal = {Front Psychol},
  month = {Apr},
  pages = {627--627},
  title = {Real-Time Aural and Visual Feedback for Improving Violin Intonation.},
  url = {https://www.ncbi.nlm.nih.gov/pubmed/31001159},
  volume = {10},
  year = {2019},
  abstract = {Playing with correct intonation is one of the major challenges for a string player. A player must learn how to physically reproduce a target pitch, but before that, the player must learn what correct intonation is. This requires audiation- the aural equivalent of visualization- of every note along with self-assessment whether the pitch played matches the target, and if not, what action should be taken to correct it. A challenge for successful learning is that much of it occurs during practice, typically without outside supervision. A student who has not yet learned to hear correct intonation may repeatedly practice out of tune, blithely normalizing bad habits and bad intonation. The real-time reflective nature of intonation and its consistent demand on attention make it a ripe target for technological intervention. Using a violin augmented to combine fingerboard sensors with audio analysis for real-time pitch detection, we examine the efficacy of three methods of real-time feedback for improving intonation and pitch learning. The first, aural feedback in the form of an in-tune guide pitch following the student in real-time, is inspired by the tradition of students playing along with teachers. The second is visual feedback on intonation correctness using an algorithm optimized for use throughout normal practice. The third is a combination of the two methods, simultaneously providing aural and visual feedback. Twelve beginning violinists, including children and adults, were given four in-situ 20-30 min lessons. Each lesson used one of the intonation feedback methods, along with a control lesson using no feedback. We collected data on intonation accuracy and conducted interviews on student experience and preference. The results varied by player, with evidence of some players being helped by the feedback methods but also cases where the feedback was distracting and intonation suffered. However interviews suggested a high level of interest and potential in having such tools to help during practice, and results also suggested that it takes time to learn to use the real-time aural and visual feedback. Both methods of feedback demonstrate potential for assisting self-reflection during individual practice.},
  doi = {10.3389/fpsyg.2019.00627},
  issn = {1664-1078},
  keyword = {aural feedback},
  keyword = {intonation},
  keyword = {motor learning},
  keyword = {pedagogy},
  keyword = {real-time feedback},
  keyword = {violin},
  keyword = {visual feedback},
  language = {eng},
  day = {2},
  publicationstatus = {published}
}
@incollection{holland2019understandingmatters,
  author = {Holland, S and Mudd, T and Wilkie-McKenna, K and McPherson, A and Wanderley, MM},
  booktitle = {New Directions in Music and Human-Computer Interaction},
  month = {Jan},
  pages = {1--20},
  title = {Understanding Music Interaction, and Why It Matters},
  year = {2019},
  doi = {10.1007/978-3-319-92069-6_1},
  issn = {2195-9056},
  day = {1}
}
@incollection{mcpherson2019theverplank,
  author = {McPherson, A and Verplank, B},
  booktitle = {New Directions in Music and Human-Computer Interaction},
  month = {Jan},
  pages = {61--70},
  title = {The Poetry of Strange Connections: An Interview with Bill Verplank},
  year = {2019},
  doi = {10.1007/978-3-319-92069-6_4},
  issn = {2195-9056},
  day = {1}
}
@incollection{mcpherson2019musicalapproaches,
  author = {MCPHERSON, A and MORREALE, F and HARRISON, J},
  booktitle = {New Directions in Music and Human-Computer Interaction},
  editor = {Holland, S and Wilkie-McKenna, K and Mudd, T and MCPHERSON, A and Wanderley, M},
  publisher = {Springer},
  title = {Musical Instruments for Novices: Comparing NIME, HCI and Crowdfunding Approaches},
  year = {2019},
  abstract = {Designing musical instruments to make performance accessible to novice musicians is a goal which long predates digital technology. However, just in the space of the past 6 years, dozens of instrument designs have been introduced in various academic venues and in commercial crowdfunding campaigns. In this paper, we draw comparisons in design, evaluation and marketing across four domains: crowdfunding campaigns on Kickstarter and Indiegogo; the New Interfaces for Musical Expression (NIME) conference; conferences in human-computer interaction (HCI); and researchers  creating accessible instruments for children and adults with disabilities. We observe striking differences in approach between commercial and academic projects, with less pronounced differences between each of the academic communities. The paper concludes with general reflections on the identity and purpose of instruments for novice musicians, with suggestions for future exploration.},
  numberofpieces = {18},
  publicationstatus = {submitted}
}
@incollection{mcpherson2019musicbenford,
  author = {McPherson, A and Benford, S},
  booktitle = {New Directions in Music and Human-Computer Interaction},
  month = {Jan},
  pages = {213--220},
  title = {Music, Design and Ethnography: An Interview with Steve Benford},
  year = {2019},
  doi = {10.1007/978-3-319-92069-6_13},
  issn = {2195-9056},
  day = {1}
}
@inproceedings{hazzard2019failingperformance,
  author = {Hazzard, A and Greenhalgh, C and Kallionpaa, M and Benford, S and Veinberg, A and Kanga, Z and McPherson, A},
  booktitle = {},
  month = {Jan},
  pages = {30},
  title = {Failing with Style: Designing for Aesthetic Failure in Interactive Performance},
  year = {2019},
  doi = {10.1145/3290605.3300260},
  day = {1}
}
@article{quirogamartinez2019musicalnonmusicians,
  author = {Quiroga-Martinez, DR and C Hansen, N and Højlund, A and Pearce, M and Brattico, E and Vuust, P},
  journal = {European Journal of Neuroscience},
  month = {Dec},
  publisher = {Wiley},
  title = {Musical prediction error responses similarly reduced by predictive uncertainty in musicians and non-musicians.},
  url = {https://www.ncbi.nlm.nih.gov/pubmed/31891423},
  year = {2019},
  abstract = {Auditory prediction error responses elicited by surprising sounds can be reliably recorded with musical stimuli that are more complex and realistic than those typically employed in EEG or MEG oddball paradigms. However, these responses are reduced as the predictive uncertainty of the stimuli increases. In this study, we investigate whether this effect is modulated by musical expertise. Magnetic mismatch negativity (MMNm) responses were recorded from 26 musicians and 24 non-musicians while they listened to low- and high-uncertainty melodic sequences in a musical multi-feature paradigm that included pitch, slide, intensity and timbre deviants. When compared to non-musicians, musically trained participants had significantly larger pitch and slide MMNm responses. However, both groups showed comparable reductions in pitch and slide MMNm amplitudes in the high-uncertainty condition compared with the low-uncertainty condition. In a separate, behavioural deviance detection experiment, musicians were more accurate and confident about their responses than non-musicians, but deviance detection in both groups was similarly affected by the uncertainty of the melodies. In both experiments, the interaction between uncertainty and expertise was not significant, suggesting that the effect is comparable in both groups. Consequently, our results replicate the modulatory effect of predictive uncertainty on prediction error; show that it is present across different types of listeners; and suggest that expertise-related and stimulus-driven modulations of predictive precision are dissociable and independent.},
  doi = {10.1111/ejn.14667},
  issn = {0953-816X},
  eissn = {1460-9568},
  keyword = {expertise},
  keyword = {mismatch negativity},
  keyword = {music},
  keyword = {precision},
  language = {eng},
  day = {31},
  publicationstatus = {published}
}
@article{pearce2019informationtheoreticcomplexity,
  author = {Pearce, M and Sauvé, S},
  journal = {Music Perception},
  month = {Dec},
  publisher = {University of California Press},
  title = {Information-theoretic Modeling of Perceived Musical Complexity},
  year = {2019},
  doi = {10.1525/mp.2019.37.2.165},
  issn = {0730-7829},
  day = {10},
  publicationstatus = {published}
}
@article{cheung2019uncertaintyactivity,
  author = {Cheung, V and HARRISON, PMC and Meyer, L and Pearce, M and Haynes, J-D and Koelsch, S},
  journal = {Current Biology},
  month = {Nov},
  publisher = {Elsevier (Cell Press)},
  title = {Uncertainty and Surprise Jointly Predict Musical Pleasure and Amygdala, Hippocampus, and Auditory Cortex Activity},
  year = {2019},
  doi = {10.1016/j.cub.2019.09.067},
  issn = {0960-9822},
  day = {7},
  publicationstatus = {published}
}
@article{zioga2019fromcreativity,
  author = {Zioga, I and Harrison, P and Pearce, M and Bhattacharya, J and Di Bernardi Luft, C},
  journal = {NeuroImage},
  month = {Oct},
  publisher = {Elsevier},
  title = {From learning to creativity: Identifying the behavioural and neural correlates of learning to predict human judgements of musical creativity},
  year = {2019},
  doi = {10.1016/j.neuroimage.2019.116311},
  issn = {1053-8119},
  day = {25},
  publicationstatus = {published}
}
@article{gold2019predictabilitylearning,
  author = {Gold, B and Pearce, M and Mas-Herrero, E and Dagher, A and Zatorre, RJ},
  journal = {The Journal of Neuroscience},
  month = {Oct},
  publisher = {Society for Neuroscience},
  title = {Predictability and uncertainty in the pleasure of music: a reward for learning?},
  year = {2019},
  doi = {10.1523/JNEUROSCI.0428-19.2019},
  issn = {0270-6474},
  day = {21},
  publicationstatus = {published}
}
@article{defleurian2019rewardpleasure,
  author = {de Fleurian, R and Harrison, PMC and Pearce, MT and Quiroga-Martinez, DR},
  journal = {Proc Natl Acad Sci U S A},
  month = {Sep},
  title = {Reward prediction tells us less than expected about musical pleasure},
  url = {https://www.ncbi.nlm.nih.gov/pubmed/31537748},
  year = {2019},
  doi = {10.1073/pnas.1913244116},
  eissn = {1091-6490},
  language = {eng},
  pii = {1913244116},
  day = {19},
  publicationstatus = {online-published}
}
@article{quirogamartinez2019reducedcontexts,
  author = {Quiroga-Martinez, DR and Hansen, NC and Højlund, A and Pearce, MT and Brattico, E and Vuust, P},
  journal = {Cortex},
  month = {Jun},
  pages = {181--200},
  publisher = {Elsevier BV},
  title = {Reduced prediction error responses in high-as compared to low-uncertainty musical contexts},
  volume = {120},
  year = {2019},
  doi = {10.1016/j.cortex.2019.06.010},
  issn = {0010-9452},
  language = {en},
  day = {28},
  publicationstatus = {published}
}
@article{omigie2019intracranialcortices,
  author = {Omigie, D and Pearce, M and Lehongre, K and Hasboun, D and Navarro, V and Adam, C and Samson, S},
  journal = {J Cogn Neurosci},
  month = {Jun},
  number = {6},
  pages = {855--873},
  title = {Intracranial Recordings and Computational Modeling of Music Reveal the Time Course of Prediction Error Signaling in Frontal and Temporal Cortices.},
  url = {https://www.ncbi.nlm.nih.gov/pubmed/30883293},
  volume = {31},
  year = {2019},
  abstract = {Prediction is held to be a fundamental process underpinning perception, action, and cognition. To examine the time course of prediction error signaling, we recorded intracranial EEG activity from nine presurgical epileptic patients while they listened to melodies whose information theoretical predictability had been characterized using a computational model. We examined oscillatory activity in the superior temporal gyrus (STG), the middle temporal gyrus (MTG), and the pars orbitalis of the inferior frontal gyrus, lateral cortical areas previously implicated in auditory predictive processing. We also examined activity in anterior cingulate gyrus (ACG), insula, and amygdala to determine whether signatures of prediction error signaling may also be observable in these subcortical areas. Our results demonstrate that the information content (a measure of unexpectedness) of musical notes modulates the amplitude of low-frequency oscillatory activity (theta to beta power) in bilateral STG and right MTG from within 100 and 200 msec of note onset, respectively. Our results also show this cortical activity to be accompanied by low-frequency oscillatory modulation in ACG and insula-areas previously associated with mediating physiological arousal. Finally, we showed that modulation of low-frequency activity is followed by that of high-frequency (gamma) power from approximately 200 msec in the STG, between 300 and 400 msec in the left insula, and between 400 and 500 msec in the ACG. We discuss these results with respect to models of neural processing that emphasize gamma activity as an index of prediction error signaling and highlight the usefulness of musical stimuli in revealing the wide-reaching neural consequences of predictive processing.},
  doi = {10.1162/jocn_a_01388},
  eissn = {1530-8898},
  language = {eng},
  publicationstatus = {published}
}
@article{cameron2019neuralrhythms,
  author = {Cameron, DJ and Zioga, I and Lindsen, JP and Pearce, MT and Wiggins, GA and Potter, K and Bhattacharya, J},
  journal = {Experimental Brain Research},
  month = {Aug},
  number = {8},
  pages = {1981--1991},
  title = {Neural entrainment is associated with subjective groove and complexity for performed but not mechanical musical rhythms},
  volume = {237},
  year = {2019},
  abstract = {© 2019, The Author(s). Both movement and neural activity in humans can be entrained by the regularities of an external stimulus, such as the beat of musical rhythms. Neural entrainment to auditory rhythms supports temporal perception, and is enhanced by selective attention and by hierarchical temporal structure imposed on rhythms. However, it is not known how neural entrainment to rhythms is related to the subjective experience of groove (the desire to move along with music or rhythm), the perception of a regular beat, the perception of complexity, and the experience of pleasure. In two experiments, we used musical rhythms (from Steve Reich’s Clapping Music) to investigate whether rhythms that are performed by humans (with naturally variable timing) and rhythms that are mechanical (with precise timing), elicit differences in (1) neural entrainment, as measured by inter-trial phase coherence, and (2) subjective ratings of the complexity, preference, groove, and beat strength of rhythms. We also combined results from the two experiments to investigate relationships between neural entrainment and subjective perception of musical rhythms. We found that mechanical rhythms elicited a greater degree of neural entrainment than performed rhythms, likely due to the greater temporal precision in the stimulus, and the two types only elicited different ratings for some individual rhythms. Neural entrainment to performed rhythms, but not to mechanical ones, correlated with subjective desire to move and subjective complexity. These data, therefore, suggest multiple interacting influences on neural entrainment to rhythms, from low-level stimulus properties to high-level cognition and perception.},
  doi = {10.1007/s00221-019-05557-4},
  issn = {0014-4819},
  eissn = {1432-1106},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{supej2019gendercorpora,
  address = {Ljubljana, Slovenia},
  author = {Supej, A and Plahuta, M and Purver, M and Mathioudakis, M and Pollak, S},
  booktitle = {},
  editor = {Ignjatović, M and Kanjuo-Mrčela, A and Kuhar, R},
  month = {Dec},
  organization = {Bled, Slovenia},
  pages = {75--83},
  publisher = {Slovensko sociološko društvo},
  title = {Gender, Language and Society - Word Embeddings as a Reflection of Social Inequalities in Linguistic Corpora},
  url = {http://www.eecs.qmul.ac.uk/~mpurver/papers/supej-et-al19sss.pdf},
  url = {https://www.sociolosko-drustvo.si/wp-content/uploads/2019/10/SSD-ZBORNIK-PRISPEVKOV-19-V4.pdf},
  year = {2019},
  startyear = {2019},
  startmonth = {Oct},
  startday = {18},
  finishyear = {2019},
  finishmonth = {Oct},
  finishday = {19},
  isbn = {978-961-94302-3-1},
  conference = {Znanost in družbe prihodnosti, Slovensko sociološko srečanje [Annual meeting of the Slovenian Sociological Association: Science and future societies]},
  day = {24},
  publicationstatus = {published}
}
@misc{armendariz2019cosimlexcontext,
  author = {Armendariz, CS and Purver, M and Ulčar, M and Pollak, S and Ljubešić, N and Robnik-Šikonja, M and Granroth-Wilding, M and Vaik, K},
  month = {Dec},
  title = {CoSimLex: A Resource for Evaluating Graded Word Similarity in Context},
  url = {http://arxiv.org/abs/1912.05320v2},
  year = {2019},
  abstract = {State of the art natural language processing tools are built on
context-dependent word embeddings, but no direct method for evaluating these
representations currently exists. Standard tasks and datasets for intrinsic
evaluation of embeddings are based on judgements of similarity, but ignore
context; standard tasks for word sense disambiguation take account of context
but do not provide continuous measures of meaning similarity. This paper
describes an effort to build a new dataset, CoSimLex, intended to fill this
gap. Building on the standard pairwise similarity task of SimLex-999, it
provides context-dependent similarity measures; covers not only discrete
differences in word sense but more subtle, graded changes in meaning; and
covers not only a well-resourced language (English) but a number of
less-resourced languages. We define the task and evaluation metrics, outline
the dataset collection methodology, and describe the status of the dataset so
far.},
  confidential = {False},
  keyword = {cs.CL},
  keyword = {cs.CL},
  day = {11}
}
@inproceedings{rohanian2019detectingfusion,
  author = {Rohanian, M and Hough, J and Purver, M},
  booktitle = {Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH},
  month = {Sep},
  organization = {Graz, Austria},
  pages = {1443--1447},
  title = {Detecting depression with word-level multimodal fusion},
  volume = {2019-September},
  year = {2019},
  abstract = {Copyright © 2019 ISCA Semi-structured clinical interviews are frequently used diagnostic tools for identifying depression during an assessment phase. In addition to the lexical content of a patient's responses, multimodal cues concurrent with the responses are indicators of their motor and cognitive state, including those derivable from their voice quality and gestural behaviour. In this paper, we use information from different modalities in order to train a classifier capable of detecting the binary state of a subject (clinically depressed or not), as well as the level of their depression. We propose a model that is able to perform modality fusion incrementally after each word in an utterance using a time-dependent recurrent approach in a deep learning set-up. To mitigate noisy modalities, we utilize fusion gates that control the degree to which the audio or visual modality contributes to the final prediction. Our results show the effectiveness of word-level multimodal fusion, achieving state-of-the-art results in depression detection and outperforming early feature-level and late fusion techniques.},
  doi = {10.21437/Interspeech.2019-2283},
  issn = {2308-457X},
  eissn = {1990-9772},
  conference = {20th Annual Conference of the International Speech Communication Association - INTERSPEECH},
  day = {15},
  publicationstatus = {published}
}
@inproceedings{nasreen2019apatients,
  author = {Nasreen, S and Purver, M and Hough, J},
  booktitle = {Proceedings of the 23rd Workshop on the Semantics and Pragmatics of Dialogue - Full Papers},
  month = {Sep},
  publisher = {SEMDIAL},
  title = {A Corpus Study on Questions, Responses and Misunderstanding Signals in Conversations with Alzheimer’s Patients},
  url = {http://semdial.org/anthology/Z19-Nasreen_semdial_0013.pdf},
  year = {2019}
}
@article{mcgregor2019rerepresentingsemantics,
  author = {MCGREGOR, SE and AGRES, K and Rataj, K and PURVER, MRJ and WIGGINS, GA},
  journal = {Frontiers in Psychology},
  month = {Apr},
  publisher = {Frontiers Media},
  title = {Re-Representing Metaphor: Modelling metaphor perception using dynamically contextual distributional semantics},
  year = {2019},
  issn = {1664-1078},
  day = {15},
  publicationstatus = {accepted}
}
@inproceedings{drooghayes2019detectingfeatures,
  author = {DROOG-HAYES, M and WIGGINS, GA and PURVER, MRJ},
  booktitle = {},
  month = {Feb},
  organization = {Newport Beach, CA},
  title = {Detecting Summary-worthy Sentences: the Effect of Discourse Features},
  year = {2019},
  startyear = {2019},
  startmonth = {Jan},
  startday = {30},
  finishyear = {2019},
  finishmonth = {Feb},
  finishday = {2},
  conference = {13th IEEE International Conference on Semantic Computing},
  day = {1},
  publicationstatus = {published}
}
@article{xiao2019conceptualcreation,
  author = {Xiao, P and Toivonen, H and Gross, O and Cardoso, A and Correia, J and Machado, P and Martins, P and Oliveira, HG and Sharma, R and Pinto, AM and Díaz, A and Francisco, V and Gervás, P and Hervás, R and León, C and Forth, J and Purver, M and Wiggins, GA and Miljkovic, D and Podpecan, V and Pollak, S and Kralj, J and Znidarsic, M and Bohanec, M and Lavrac, N and Urbancic, T and Velde, FVD and Battersby, SA},
  journal = {ACM Computing Surveys},
  month = {Feb},
  number = {1},
  pages = {9:1--9:33},
  publisher = {Association for Computing Machinery},
  title = {Conceptual Representations for Computational Concept Creation},
  url = {http://www.eecs.qmul.ac.uk/~mpurver/papers/xiao-et-al19acm.pdf},
  url = {https://dl.acm.org/citation.cfm?id=3186729},
  volume = {52},
  year = {2019},
  doi = {10.1145/3186729},
  issn = {0360-0300},
  day = {28},
  publicationstatus = {published}
}
@inproceedings{mcginity2019theagents,
  author = {McGinity, MM and Purver, M and Wiggins, G},
  booktitle = {2019 AISB Convention},
  month = {Jan},
  pages = {21--27},
  title = {The influence of cost on the emergence of a common language among cooperating agents},
  year = {2019},
  abstract = {© 2019 AISB Convention.All right reserved. We investigate convergence to a common language in a population of agents with two possible cooperation strategies: altruism and mutualism. We consider altruism as the willingness of an agent to engage in cooperative interactions with other agents regardless of the cost, whereas a mutualistic agent will cooperate if the expected outcome of the interaction is beneficial to itself. Agents engage in a language game in which they have to align their languages to carry out an action. Coordinating the language is costly, as is carrying out the action itself, a fact which influences their decision to help each other. Our model includes a revision protocol which allows individuals to adopt the cooperation strategy of more successful agents. Our results show that, if the costs are too high, mutualistic agents will enjoy a fitness advantage and that the unwillingness of agents to help each other will make it impossible for the population to reach a common language.},
  day = {1},
  publicationstatus = {published}
}
@book{martnvide2019prefacepreface,
  author = {Martín-Vide, C and Pollak, S and Purver, M},
  month = {Jan},
  pages = {v--vi},
  publisher = {},
  title = {Preface},
  volume = {11816 LNAI},
  year = {2019},
  isbn = {9783030313715},
  issn = {0302-9743},
  eissn = {1611-3349},
  day = {1},
  publicationstatus = {published}
}
@incollection{saitis2019thetimbre,
  author = {Saitis, C and Weinzierl, S},
  booktitle = {Timbre: Acoustics, Perception, and Cognition},
  month = {Jan},
  pages = {119--149},
  title = {The Semantics of Timbre},
  volume = {69},
  year = {2019},
  doi = {10.1007/978-3-030-14832-4_5},
  issn = {0947-2657},
  day = {1}
}
@incollection{siedenburg2019theresearch,
  author = {Siedenburg, K and Saitis, C and McAdams, S},
  booktitle = {Timbre: Acoustics, Perception, and Cognition},
  month = {Jan},
  pages = {1--19},
  title = {The Present, Past, and Future of Timbre Research},
  volume = {69},
  year = {2019},
  doi = {10.1007/978-3-030-14832-4_1},
  issn = {0947-2657},
  day = {1}
}
@incollection{caetano2019audiotimbre,
  author = {Caetano, M and Saitis, C and Siedenburg, K},
  booktitle = {Timbre: Acoustics, Perception, and Cognition},
  month = {Jan},
  pages = {297--333},
  title = {Audio Content Descriptors of Timbre},
  volume = {69},
  year = {2019},
  doi = {10.1007/978-3-030-14832-4_11},
  issn = {0947-2657},
  day = {1}
}
@inproceedings{chourdakis2019modellingmix,
  address = {http://dafx2019.bcu.ac.uk/programme/tue/oral-session-3},
  author = {Chourdakis, E and Ward, L and Paradis, M and Reiss, JD},
  booktitle = {},
  month = {Sep},
  organization = {Birmigham, UK},
  title = {Modelling Experts’ Decisions on Assigning Narrative Importances of Objects in a Radio Drama Mix},
  year = {2019},
  abstract = {There is an increasing number of consumers of broadcast audio who suffer from a degree of hearing impairment. One of the methods developed for tackling this issue consists of creating customizable object-based audio mixes where users can attenuate parts of the mix using a simple complexity parameter. The method relies on the mixing engineer classifying audio objects in the mix according to their narrative importance. This paper focuses on automating this process. Individual tracks are classified based on their music, speech, or sound effect content. Then the decisions for assigning narrative importance to each segment of a radio drama mix are modelled using mixture distributions. Finally, the learned decisions and resultant mixes are evaluated using the Short Term Objective Intelligibility, with reference to the narrative importance selections made by the original producer. This approach has applications for providing customizable mixes for legacy content, or automatically generated media content where the engineer is not able to intervene.},
  startyear = {2019},
  startmonth = {Sep},
  startday = {2},
  finishyear = {2019},
  finishmonth = {Sep},
  finishday = {6},
  conference = {22nd International Conference on Digital Audio Effects},
  day = {2},
  publicationstatus = {published}
}
@incollection{moffat2019soundsynthesis,
  author = {Moffat, D and SELFRIDGE, R and Reiss, J},
  booktitle = {Foundations in Sound Design for Interactive Media: A Multidisciplinary Approach},
  edition = {1},
  editor = {Filimowicz, M},
  month = {Jul},
  number = {13},
  publisher = {Routledge},
  series = {Foundations in Sound Design},
  title = {Sound effect synthesis},
  year = {2019},
  numberofpieces = {15},
  day = {31},
  publicationstatus = {published}
}
@inproceedings{martinezramirez2019modelingnetworks,
  author = {Martinez Ramirez, M and Reiss, J},
  booktitle = {},
  month = {May},
  title = {Modeling nonlinear audio effects with end-to-end deep neural networks},
  url = {http://www.m-marco.com/},
  year = {2019},
  abstract = {In the context of music production, distortion effects are mainly used for aesthetic reasons and are usually applied to electric musical instruments. Most existing methods for nonlinear modeling are often either simplified or optimized to a very specific circuit. In this work, we investigate deep learning architectures for audio processing and we aim to find a general purpose end-to-end deep neural network to perform modeling of nonlinear audio effects. We show the network modeling various nonlinearities and we discuss the generalization capabilities among different instruments.},
  conference = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
  day = {16},
  publicationstatus = {published}
}
@inproceedings{wilkinson2019unifyinganalysis,
  author = {Wilkinson, WJ and Riis Andersen, M and Reiss, JD and Stowell, D and Solin, A},
  booktitle = {ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings},
  month = {Apr},
  pages = {3352--3356},
  title = {Unifying Probabilistic Models for Time-frequency Analysis},
  volume = {2019-May},
  year = {2019},
  abstract = {© 2019 IEEE. In audio signal processing, probabilistic time-frequency models have many benefits over their non-probabilistic counterparts. They adapt to the incoming signal, quantify uncertainty, and measure correlation between the signal's amplitude and phase information, making time domain resynthesis straightforward. However, these models are still not widely used since they come at a high computational cost, and because they are formulated in such a way that it can be difficult to interpret all the modelling assumptions. By showing their equivalence to Spectral Mixture Gaussian processes, we illuminate the underlying model assumptions and provide a general framework for constructing more complex models that better approximate real-world signals. Our interpretation makes it intuitive to inspect, compare, and alter the models since all prior knowledge is encoded in the Gaussian process kernel functions. We utilise a state space representation to perform efficient inference via Kalman smoothing, and we demonstrate how our interpretation allows for efficient parameter learning in the frequency domain.},
  doi = {10.1109/ICASSP.2019.8682306},
  isbn = {9781479981311},
  issn = {1520-6149},
  conference = {2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  day = {17},
  publicationstatus = {published}
}
@phdthesis{ronan2019intelligentaudio,
  author = {RONAN, DM},
  editor = {REISS, J and GUNES, H and CAVALLARO, A},
  month = {Jan},
  school = {},
  title = {Intelligent Subgrouping of Multitrack Audio},
  year = {2019},
  abstract = {Subgrouping facilitates the simultaneous manipulation of a number of audio tracks and
is a central aspect of mix engineering. However, the decision process of subgrouping is
a poorly documented technique. This research sheds light on this ubiquitous but poorly
de ned mix practice, provides rules and constraints on how it should be approached as
well as demonstrates its bene t to an automatic mixing system.
I  rst explored the relationship that subgrouping has with perceived mix quality by
examining a number of mix projects. This was in order to decipher the actual process
of creating subgroups and to see if any of the decisions made were intrinsically linked
to mix quality. I found mix quality to be related to the number of subgroups and type
of subgroup processing used. This subsequently led me to interviewing distinguished
professionals in the audio engineering  eld, with the intention of gaining a deeper understanding
of the process. The outcome of these interviews and the previous analyses
of mix projects allowed me to propose rules that could be used for real life mixing and
automatic mixing. Some of the rules I established were used to research and develop a
method for the automatic creation of subgroups using machine learning techniques.
I also investigated the relationship between music production quality and human emotion.
This was to see if music production quality had an emotional e ect on a particular
type of listener. The results showed that the emotional impact of mixing only really mattered
to those with critical listening skills. This result is important for automatic mixing
systems in general, as it would imply that quality only really matters to a minority of
people.
I concluded my research on subgrouping by conducting an experiment to see if subgrouping
would bene t the perceived clarity and quality of a mix. The results of a subjective
listening test showed this to be true.},
  filedyear = {2018},
  filedmonth = {Nov},
  keyword = {Centre for Intelligent Sensing},
  keyword = {Multitrack Audio},
  keyword = {Subgrouping},
  day = {11}
}
@inproceedings{mycroft2019visuallymixing,
  author = {Mycroft, J and Reiss, JD and Stockman, T},
  booktitle = {SMC 2016 - 13th Sound and Music Computing Conference, Proceedings},
  month = {Jan},
  pages = {332--337},
  title = {Visually representing and interpreting multivariate data for audio mixing},
  year = {2019},
  abstract = {Copyright: © 2016 Mycroft et al. This is an open-access article distributed under the terms of the Creative Commons Attribution License 3.0 Unported, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited. The majority of Digital Audio Workstation designs represent mix data using a channel strip metaphor. While this is a familiar design based on physical mixing desk layout, it can lead to a visually complex interface incorporating a large number of User Interface objects which can increase the need for navigation and disrupt the mixing workflow. Within other areas of data visualisation, multivariate data objects such as glyphs are used to simultaneously represent a number of parameters within one graphical object by assigning data to specific visual variables. This can reduce screen clutter, enhance visual search and support visual analysis and interpretation of data. This paper reports on two subjective evaluation studies that investigate the efficacy of different design strategies to visually encode mix information (volume, pan, reverb and delay) within a stage metaphor mixer using multivariate data objects and a channel strip design using faders and dials. The analysis of the data suggest that compared to channel strip designs, multivariate objects can lead to quicker visual search without any subsequent reduction in search accuracy.},
  isbn = {9783000537004},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{chourdakis2019taggingreverberation,
  author = {Chourdakis, ET and Reiss, JD},
  booktitle = {AES 146th International Convention},
  month = {Jan},
  title = {Tagging and retrieval of room impulse responses using semantic word vectors and perceptual measures of reverberation},
  year = {2019},
  abstract = {© 2019 The Audio Engineering Society (AES). All rights reserved. This paper studies tagging and retrieval of room impulse responses from a labelled library. A similarity-based method is introduced that relies on perceptually relevant characteristics of reverberation. This method is developed using a publicly available dataset of algorithmic reverberation settings. Semantic word vectors are introduced to exploit semantic correlation among tags and allow for unseen words to be used for retrieval. Average precision is reported on a subset of the dataset as well as tagging of recorded room impulse responses. The developed approach manages to assign downloaded room impulse responses to tags that match their short descriptions. Furthermore, introducing semantic word vectors allows it to perform well even when large portions of the training data have been replaced by synonyms.},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{foster2019reproducingsynthesis,
  author = {Foster, D and Reiss, JD},
  booktitle = {AES 146th International Convention},
  month = {Jan},
  title = {Reproducing bass guitar performances using descriptor driven synthesis},
  year = {2019},
  abstract = {© 2019 The Audio Engineering Society (AES). All rights reserved. Sample-based synthesis is a widely used method of synthesising the sounds of live instrumental performances, but the control of such sampler instruments is made difficult by the number of parameters that control the output, the expertise required to set those parameters and by the constraints of the real-time system. In this paper, the principles of descriptor-driven synthesis were used to develop a pair of software tools that aid the user in the specific task of reproducing a live performance using a sampler instrument, by the automatic generation of MIDI controller messages derived from analysis of the input audio. The techniques employed build on existing work and commercially available products. The output of the system is compared to manipulation by expert users. The results show that the system outperforms the human version, despite the latter taking considerably more time. Future developments of the techniques are discussed, including the application to automatic performer replication.},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{snchez2019realtimesolids,
  author = {Sánchez, P and Reiss, JD},
  booktitle = {AES 146th International Convention},
  month = {Jan},
  title = {Real-time synthesis of sound effects caused by the interaction between two solids},
  year = {2019},
  abstract = {© 2019 The Audio Engineering Society (AES). All rights reserved. We present the implementation of two sound effect synthesis engines that work in a web environment. These are physically driven models that recreate the sonic behaviour of friction and impact interactions. The models are integrated into an online project aimed at providing users with browser-based sound effect synthesis tools that can be controlled in real time. This is achieved thanks to a physical modelling approach and existing web tools like the Web Audio API. A modular architecture was followed, making the code versatile and easy to reuse, which encourages the development of higher-level models based on the existing ones, as well as similar models based on the same principles. The final implementations present satisfactory performance results despite some minor issues.},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{ziga2019realisticoptimisation,
  author = {Zúñiga, J and Reiss, JD},
  booktitle = {147th Audio Engineering Society International Convention 2019},
  month = {Jan},
  title = {Realistic procedural sound synthesis of bird song using particle swarm optimisation},
  year = {2019},
  abstract = {© 147th Audio Engineering Society International Convention 2019. All rights reserved. We present a synthesis algorithm for approximating bird song using particle swarm optimization to match real bird recordings. Frequency and amplitude envelope curves are first extracted from a bird recording. Further analysis identifies the presence of even and odd harmonics. A particle swarm algorithm is then used to find cubic Bezier curves which emulate the envelopes. These curves are applied to modulate a sine oscillator and its harmonics. The synthesised syllable can then be repeated to generate the sound. 36 bird sounds have been emulated this way, and a real-time web-based demonstrator is available, with user control of all parameters. Objective evaluation showed that the synthesised bird sounds captured most audio features of the recordings.},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{mourgela2019perceptuallyreference,
  author = {Mourgela, A and Agus, T and Reiss, JD},
  booktitle = {147th Audio Engineering Society International Convention 2019},
  month = {Jan},
  title = {Perceptually motivated hearing loss simulation for audio mixing reference},
  year = {2019},
  abstract = {© 147th Audio Engineering Society International Convention 2019. All rights reserved. This paper proposes the development of a hearing loss simulation for use in audio mix referencing, designed according to psychoacoustics and audiology research findings. The simulation proposed in this paper aims to reproduce four perceptual aspects of hearing loss; threshold elevation, loss of dynamic range, reduced frequency and temporal resolution, while providing an audio input/output functionality.},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{colonel2019exploringfeatures,
  author = {Colonel, J and Reiss, J},
  booktitle = {147th Audio Engineering Society International Convention 2019},
  month = {Jan},
  title = {Exploring preference for multitrack mixes using statistical analysis of MIR and textual features},
  year = {2019},
  abstract = {© 147th Audio Engineering Society International Convention 2019. All rights reserved. We investigate listener preference in multitrack music production using the Mix Evaluation Dataset, comprised of 184 mixes across 19 songs. Features are extracted from verses and choruses of stereo mixdowns. Each observation is associated with an average listener preference rating and standard deviation of preference ratings. Principal component analysis is performed to analyze how mixes vary within the feature space. We demonstrate that virtually no correlation is found between the embedded features and either average preference or standard deviation of preference. We instead propose using principal component projections as a semantic embedding space by associating each observation with listener comments from the Mix Evaluation Dataset. Initial results disagree with simple descriptions such as “width” or “loudness” for principal component axes.},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{tom2019anpractices,
  author = {Tom, A and Reiss, J and Depalle, P},
  booktitle = {AES 146th International Convention},
  month = {Jan},
  title = {An automatic mixing system for multitrack spatialization for stereo based on unmasking and best panning practices},
  year = {2019},
  abstract = {© 2019 The Audio Engineering Society (AES). All rights reserved. One of the most important tasks in audio production is to place sound sources across the stereo field so as to reduce masking and immerse the listener within the space. This process of panning sources of a multitrack recording to achieve spatialization and masking minimization is a challenging optimization problem, mainly because of the complexity of auditory perception. We propose a novel panning system that makes use of a common framework for spectral decomposition, masking detection, multitrack sub-grouping and frequency-based spreading. It creates a well spatialized mix with increased clarity while complying to the best panning practices. Both real-time and offline optimization-based approaches are designed and implemented. We investigate the reduction of inter-track auditory masking using the MPEG psychoacoustic model along with various other masking and spatialization metrics, extended for multitrack content. Subjective and objective tests compare the proposed work against mixes by professional sound engineers and existing auto-mix systems.},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{wilkinson2019endtoendanalysis,
  author = {Wilkinson, WJ and Andersen, MR and Reiss, JD and Stowell, D and Solin, A},
  booktitle = {36th International Conference on Machine Learning, ICML 2019},
  month = {Jan},
  pages = {11751--11760},
  title = {End-to-end probabilistic inference for nonstationary audio analysis},
  volume = {2019-June},
  year = {2019},
  abstract = {Copyright © 2019 ASME A typical audio signal processing pipeline includes multiple disjoint analysis stages, including calculation of a time-frequency representation followed by spectrogram-based feature analysis. We show how time-frequency analysis and non-negative matrix factorisation can be jointly formulated as a spectral mixture Gaussian process model with nonstationary priors over the amplitude variance parameters. Further, we formulate this nonlinear model's state space representation, making it amenable to infinite-horizon Gaussian process regression with approximate inference via expectation propagation, which scales linearly in the number of time steps and quadratically in the state dimensionality. By doing so, we are able to process audio signals with hundreds of thousands of data points. We demonstrate, on various tasks with empirical data, how this inference scheme outperforms more standard techniques that rely on extended Kalman filtering.},
  isbn = {9781510886988},
  day = {1},
  publicationstatus = {published}
}
@inproceedings{demirel2019chordscale,
  author = {Demirel, Emir and Bozkurt, Baris and Serra, Xavier},
  booktitle = {Barbancho I, Tard{\'o}n LJ, Peinado A, Barbancho AM, editors. Proceedings of the 16th Sound \& Music Computing Conference; 2019 May 28-31; M{\'a}laga, Spain.[M{\'a}laga]: SMC; 2019.},
  title = {Automatic chord-scale recognition using harmonic pitch class profiles},
  year = {2019},
  organization = {Sound \& Music Computing Conference},
  url = {https://zenodo.org/record/3249258}
}
@comment{{jabref-meta: databaseType:bibtex;}}

This file was generated by bibtex2html 1.99.