@conference {IT606, title = {IT606: Measuring the importance of annotation granularity to the detection of semantic similarity between phenotype profiles}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Inphenotypeannotationscuratedfromthebiolog-icalandmedicalliterature,considerablehumaneffortmustbeinvestedtoselectontologicalclassesthatcapturetheexpressivityoftheoriginalnaturallanguagedescriptions,andannotationgranularitycanalsoentailhighercomputationalcostsforpartic-ularreasoningtasks.Docoarseannotationsforcertainapplications?Here,wemeasurehowannotationgranularityaffectsthestatisticalbehaviorofsemanticsimilaritymetrics.Weusearandomizeddatasetofphenotypeprdrawnfrom57,051taxon-phenotypeannotationsinthePhenoscapeKnowledgebase.WecomparedqueryprhavingvariableproportionsofmatchingphenotypestosubjectdatabaseprusingbothpairwiseandgroupwiseJaccard(edge-based)andResnik(node-based)semanticsimilaritymetrics,andcomparedstatisticalperformanceforthreedifferentlevelsofannotationgranularity:entitiesalone,entitiesplusattributes,andentitiesplusqualities(withimplicitattributes).Allfourmetricsexaminedshowedmoreextremevaluesthanexpectedbychancewhenapproximatelyhalftheannotationsmatchedbetweenthequeryandsubjectprwithamoresuddendeclineforpairwisestatisticsandamoregradualoneforthegroupwisestatistics.Annotationgranularityhadanegligibleeffectonthepositionofthethresholdatwhichmatchescouldbediscriminatedfromnoise.Theseresultssuggestthatcoarseannotationsofphenotypes,atthelevelofentitieswithorwithoutattributes,maybetoidentifyphenotypeprwithstatisticallysemanticsimilarity.

}, url = {http://ceur-ws.org/Vol-1747/IT606_ICBO2016.pdf}, author = {Prashanti Manda and James P. Balhoff and Todd J. Vision} }