@article{60885cf225bc447fa7c30f84d2c09669,
title = "Comparing reverse complementary genomic words based on their distance distributions and frequencies",
abstract = "In this work, we study reverse complementary genomic word pairs in the human DNA, by comparing both the distance distribution and the frequency of a word to those of its reverse complement. Several measures of dissimilarity between distance distributions are considered, and it is found that the peak dissimilarity works best in this setting. We report the existence of reverse complementary word pairs with very dissimilar distance distributions, as well as word pairs with very similar distance distributions even when both distributions are irregular and contain strong peaks. The association between distribution dissimilarity and frequency discrepancy is also explored, and it is speculated that symmetric pairs combining low and high values of each measure may uncover features of interest. Taken together, our results suggest that some asymmetries in the human genome go far beyond Chargaff{\textquoteright}s rules. This study uses both the complete human genome and its repeat-masked version.",
keywords = "Chargaff{\textquoteright}s rules, Distance distribution, Human genome, Peak dissimilarity, Symmetric word pairs",
author = "Tavares, {Ana Helena} and Jakob Raymaekers and Rousseeuw, {Peter J.} and Silva, {Raquel M.} and Bastos, {Carlos A. C.} and Armando Pinho and Paula Brito and Vera Afreixo",
note = "Funding Information: Acknowledgements This work was partially supported by the Portuguese Foundation for Science and Technology (FCT), Center for Research and Development in Mathematics and Applications (CIDMA), Institute of Biomedicine (iBiMED) and Institute of Electronics and Telematics Engineering of Aveiro (IEETA), within projects UID/MAT/04106/2013, UID/BIM/04501/2013 and UID/ CEC/00127/2013. A. Tavares acknowledges the Ph.D. Grant PD/ BD/105729/2014 from the FCT. The research of P. Brito was financed by the ERDF—European Regional Development Fund through the Operational Programme for Competitiveness and Internationalization—COMPETE 2020 Programme within project POCI-01-0145-FEDER-006961, and by the FCT as part of project UID/ EEA/50014/2013. The research of J. Raymaekers and P. J. Rousseeuw was supported by projects of Internal Funds KU Leuven. Publisher Copyright: {\textcopyright} 2017, Springer-Verlag GmbH Germany, part of Springer Nature. Copyright: Copyright 2018 Elsevier B.V., All rights reserved.",
year = "2018",
month = mar,
day = "1",
doi = "10.1007/s12539-017-0273-0",
language = "English",
volume = "10",
pages = "1--11",
journal = "Interdisciplinary Sciences: Computational Life Sciences",
issn = "1913-2751",
publisher = "Springer Verlag",
number = "1",
}