@proceedings{Dong_H-2021_64093, title = {KnowLab at BioCreative VII Track 5 LitCovid: Ensemble of deep learning models from diverse sources for COVID-19 literature classification}, author = {Dong, H. and Wang, M. and Zhang, H. and Casey, A. and Wu, H.}, month = {nov}, year = {2021}, abstract = {Classifying scientific literature into an abstract set of topics requires leveraging various sources from the publication and external knowledge. In the BioCreative VII LitCovid track on COVID-19 literature multi-label topic annotation, we applied state-of-the-art deep learning based document classification models (BERT, variations of HAN, CNN, LSTM) and each with a different combination of metadata (title, abstract, keywords, and journal), knowledge sources, pre-trained embedding, and data augmentation techniques. Several ensemble techniques were then used to combine individual model outputs for synergized predictions. We showed that a class-specific average ensembling of the pre-trained and task-specific models achieved the best micro-F1 score in validation (90.31%) and testing (89.32%) sets in the experiments, beyond the medium (89.25%) and mean value (87.78%) of all 80 valid submissions. We summarize lessons learned from our work on this task}, pages = {310-313}, journal = {Proceedings of the BioCreative VII Challenge Evaluation Workshop}, publisher = {BioCreative}, url = {https://publichealthscotland.scot/id/64093}, }