@conference {BT304, title = {BT304: BioCconvert: A Conversion Tool Between BioC and PubAnnotation}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

BioC is a simple XML data format for text, annotations, and relations. PubAnnotation is a repository of text annotations focused on the life science literature. A conversion tool between BioC XML and the JSON import / export format of PubAnnotation has been developed, BioCconvert. As a demonstration, the Ab3P gold standard abbreviation annotations are being made available through PubAnnotation.

}, url = {http://ceur-ws.org/Vol-1747/BT304_ICBO2016.pdf}, author = {Donald C. Comeau and Rezarta Islamaj Do{\u g}an and Sun Kim and Chih-Hsuan Wei and W. John Wilbur and Zhiyong Lu} } @conference {BT303, title = {BT303: PubAnnotation: a public shared platform for scientific literature annotation.}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

"In the last decade, the technology for biomedical literature annotation made a significant progress in terms of accuracy and speed. Now, some annotation systems claim that they have reached a production level. However, there still remain critical issues which we believe hinder further progress of the community. Among them, a relatively well known issue is "interoperability" of annotation resources. We also recognize that the community is missing a general solution for "storage infrastructure". The talk will present the PubAnnotation project which aims at addressing these two issues. In the end, a new model for "sustainable shared tasks", which is implemented on PubAnnotation, will be introduced as well."

}, url = {http://ceur-ws.org/Vol-1747/BT303_ICBO2016.pdf}, author = {Jin-Dong Kim} } @conference {BT302, title = {BT302: Annotations for biomedical research and healthcare {\textendash} Bridging the gap}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

"Characterizing protein products from various model organisms with Gene Ontology terms, indexing the biomedical literature with MeSH descriptors, and coding clinical data with ICD10-CM all constitute examples of annotation tasks, i.e., the extraction ands summarization of knowledge related to a biological entity, article or patient, in reference to some controlled vocabulary or ontology. However, the annotations made in biomedical research and healthcare environments tend to rely on different terminologies and ontologies, making it difficult to reconcile these annotations for translational research purposes. We will discuss how terminology integration systems, such as the Unified Medical Language System (UMLS) and BioPortal, can help bridge the gap between annotations made by biomedical researchers and physicians, and argue that more efforts are needed to foster interoperability between the resources developed by these two communities."

}, url = {http://ceur-ws.org/Vol-1747/BT302_ICBO2016.pdf}, author = {Olivier Bodenreider} } @conference {BT301, title = {BT301: NLP for the Institute: Developing and Deploying an NLP Capability to Accelerate Cancer Research}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

"It has been well documented that a great deal of data useful for medical research is present in clinical narrative text. There is perhaps less discussion about how often what was structured data at its origin has become inaccessible except in free text form. This problem is further compounded in tertiary care institutions, like the OHSU Knight Cancer Institute, where the entire history of a referred patient{\textquoteright}s condition may only be present in the electronic health record (EHR) as free text. At the same time, future medical advances, such as in cancer research, will require much more complete patient data than has been previously available. Such advances include the discovery of new cures, expanding early detection, and realizing the promise of precision medicine. Phenotype description and outcome characterization are two areas in particular where text sources could greatly supplement our current data. The OHSU Knight Cancer Institute has begun a program to create a natural language processing (NLP) capability to extract, store, and link data from free text sources at the patient level, and make this data available to researchers in a continuous, reusable, efficient and timely manner through services delivery from the Translational Research Hub (TRH). This talk will present the challenges, progress, and future goals of our program to build NLP capabilities that can help us use free text from the EHR to first support the transformation of cancer research with the hopes of positively impacting clinical care in the future."

}, url = {http://ceur-ws.org/Vol-1747/BT301_ICBO2016.pdf}, author = {Aaron Cohen} } @conference {BT205, title = {BT205: Text Mining for Drug Development: Gathering Insights to Support Decision Making}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Drug discovery in Pharma R\&D is an information driven process requiring many disparate bits of data from many different sources, both structured and unstructured. Text mining is the key methodology used to extract entities and relationships from unstructured text in the quest for the knowledge needed to bring a safe and effective drug to market and beyond. Much of the insight needed in early drug research to identify drug target to disease relationships and progress a potential drug target, comes from published literature and internal reports. Later stage drug development requires many additional sources of information including case reports, clinical trials, competitive intelligence and other diverse sources. In this publication, I will present 4 different use cases on how text mining is used to drive decision making in drug discovery and development and also how it can be used to identify patient insights from sources such as social media.

}, url = {http://ceur-ws.org/Vol-1747/BT205_ICBO2016.pdf}, author = {Sherri Matis-Mitchell} } @conference {BT204, title = {BT204: CancerMine: Knowledge base construction for personalised cancer treatment}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Knowledge of the relevant genomic aberrations that drive a particular cancer type is necessary to accelerate efficient interpretation of genomic data and enable large-scale endeavors in precision medicine. Currently, this field is limited by the lack of focused and scalable literature curation tools that can reliably capture the required information. Here we present a knowledge-base of genes that have been described in the literature as drivers, oncogenes or tumour suppressors with respect to a specific type of cancer. We have annotated a large body of literature which reports oncogenic aberrations using a custom designed annotation tool. We then applied VERSE, an in-house relation extraction tool, to catalogue driver mutations and illustrate the ability to build a useful resource for clinical interpretation of genomic data for personalized treatment approaches.

}, url = {http://ceur-ws.org/Vol-1747/BT204_ICBO2016.pdf}, author = {Jake Lever and Martin Jones and Steven Jm Jones} } @conference {BT203, title = {BT203: MutD {\textendash} A PubMed Scale Resource for Protein Mutation-Disease Relations through Bio-Medical Literature Mining}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

A large amount of information about the role of gene variants and mutations in diseases is available in curated databases such as OMIM, ClinVar, and UniprotKB. However, much of this information remains {\textquoteleft}locked{\textquoteright} in the unstructured form in the scientific publications. Since manual curation involves significant human effort and time there is always a lag in the information between the curated databases and the literature. The recent findings published in the literature takes significant time to find its way into the curated knowledgebase. Text mining approaches can accelerate the process of assembling this knowledge from the published literature. However, developing a text-mining system with semantic understanding capability in the biomedical domain is very challenging. In an earlier work, we described MutD, a literature mining system that extracts relationship between protein point mutation and diseases from bio-medical abstracts. In this abstract, we present access to a PubMed scale resource through a web interface that allows users to retrieve protein point mutation-disease relations extracted through biomedical literature mining.

}, url = {http://ceur-ws.org/Vol-1747/BT203_ICBO2016.pdf}, author = {RK Elayavilli and Majid Rastegar-Mojarad and Hongfang Liu} } @conference {BT202, title = {BT202: Social Media Mining for Pharmacovigilance}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

N/A

}, url = {http://icbo.cgrb.oregonstate.edu/}, author = {Graciela Gonzalez} } @conference {BT201, title = {BT201: Text mining to enable routine personalized cancer therapy}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Genomic profiling information is frequently available to oncologists, enabling targeted cancer therapy. Because clinically relevant genomic information is rapidly emerging in narrative data sources such as biomedical literature and clinical trials documents, there is a need for text mining technologies to support targeted therapies. In this talk, we will present two projects about developing text-mining tools to enable personalized cancer therapy, including 1) to identify molecular effects of drugs in biomedical literature, and 2) to create a knowledge base of cancer treatment trials with annotations about genetic alterations. We believe such tools would be valuable for physicians and patients who are seeking information about personalized cancer therapy, thus facilitating their decision making.

}, url = {http://ceur-ws.org/Vol-1747/BT201_ICBO2016.pdf}, author = {Hua Xu} } @conference {BT104, title = {BT105: Opportunities and challenges presented by Wikidata in the context of biocuration}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Wikidata is a world readable and writable knowledge base maintained by the Wikimedia Foundation. It offers the opportunity to collaboratively construct a fully open access knowledge graph spanning biology, medicine, and all other domains of knowledge. To meet this potential, social and technical challenges must be overcome most of which are familiar to the biocuration community. These include community ontology building, high precision information extraction, provenance, and license management. By working together with Wikidata now, we can help shape it into a trustworthy, unencumbered central node in the Semantic Web of biomedical data.

}, url = {http://ceur-ws.org/Vol-1747/BT105_ICBO2016.pdf}, author = {Benjamin Good and Sebastian Burgstaller-Muehlbacher and Elvira Mitraka and Timothy Putman and Andrew Su and Andra Waagmeester} } @conference {BT103, title = {BT104: Crowdsourcing Protein Family Database Curation}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

We propose a novel method for crowdsourcing a protein family database. We discuss how we intend to identify novel groupings of proteins from user sequence similarity search, and how text mining will be applied to assist in annotation of these novel groupings, and more broadly as an enrichment of protein sequence similarity search results.

}, url = {http://ceur-ws.org/Vol-1747/BT104_ICBO2016.pdf}, author = {Matt Jeffryes and Maria Liakata and Alex Bateman} } @conference {BT102, title = {BT103: Collaborative Workspaces for Pathway Curation}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

We present a web based visual biocuration workspace, focusing on curating detailed mechanistic pathways. It was designed as a flexible platform where multiple humans, NLP and AI agents can collaborate in real-time on a common model using an event driven API. We will use this platform for exploring disruptive technologies that can scale up biocuration such as NLP, human-computer collaboration, crowd-sourcing, alternative publishing and gamification. As a first step, we are designing a pilot to include an author-curation step into the scientific publishing, where the authors of an article create formal pathway fragments representing their discovery- heavily assisted by computer agents. We envision that this {\textquotedblleft}micro-curation{\textquotedblright} use-case will create an excellent opportunity to integrate multiple NLP approaches and semi-automated curation.

}, url = {http://ceur-ws.org/Vol-1747/BT103_ICBO2016.pdf}, author = {Funda Durupinar-Babur and MC Siper and Ugur Dogrusoz and Istemi Bahceci and Ozgun Babur and Emek Demir} } @conference {BT101, title = {BT102: Cycles of Scientific Investigation in Discourse - Machine Reading Methods for the Primary Research Contributions of a Paper}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

We describe a novel approach to machine reading of the primary scientific literature. We treat a description of an experiment as a discourse, viewing a scientific corpus not merely into a collection of documents, but also an extended conversation formed by the collective set of experiments, their introductions and interpretations. This paper introduces this approach as a methodology called {\textquoteleft}Cycles of Scientific Investigation in Discourse{\textquoteright} (CoSID). In CoSID, we capture the central conceptual structure of a paper as a series of nested reasoning loops, composed of passages in results sections, which describe individual research findings. We ground our work with a number of worked examples based on data from the MINTACT and Pathway Logic databases, and illustrate the idea in the context of machine-enable biocuration.

}, url = {http://ceur-ws.org/Vol-1747/BT102_ICBO2016.pdf}, author = {Gully A. Burns and Anita de Waard and Pradeep Dasigi and Eduard H. Hovy} } @conference {342, title = {BT101: SourceData: Making Data discoverable}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

In molecular and cell biology, most of the data presented in published papers are not available in formats that allow for direct analysis and systematic mining. The goal of the SourceData project (http://sourcedata.embo.org) is to make published data easier to find, to connect papers containing related information and to promote the reuse and novel analysis of published data. The main concept underlying the project is that the structure of a dataset provides information about the design of the study in question and can be exploited in powerful data-oriented search strategies. SourceData has therefore developed tools to generate machine-readable descriptive metadata from figures in published manuscripts. Experimentally tested hypotheses are represented as directed relationships between standardized biological entities. Once processed, a comprehensive {\textquoteleft}scientific knowledge graph{\textquoteright} can be generated from this data (see demo video1 at https://vimeo.com/sourcedata/kg), making the body of data efficiently searchable. Importantly, this graph is objectively grounded in published data and not on the potentially subjective interpretation of the results.\ 

}, url = {http://ceur-ws.org/Vol-1747/BT101_ICBO2016.pdf}, author = {Nancy George and Sara El-Gebali and Thomas Lemberger} } @conference {BP03, title = {BP03: Label Embedding Approach for Transfer Learning}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Automatically tagging textual mentions with the concepts, types and entities that they represent are important tasks for which supervised learning has been found to be very effective. In this paper, we consider the problem of exploiting multiple sources of training data with variant ontologies. We present a new transfer learning approach based on embedding multiple label sets in a shared space, and using it to augment the training data.

}, url = {http://ceur-ws.org/Vol-1747/BP03_ICBO2016.pdf}, author = {Rasha Obeidat and Xiaoli Fern and Prasad Tadepalli} } @conference {BP02, title = {BP02: Disease Named Entity Recognition Using NCBI Corpus}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Named Entity Recognition (NER) in biomedical literature is a very active research area. NER is a crucial component of biomedical text mining because it allows for information retrieval, reasoning and knowledge discovery. Much research has been carried out in this area using semantic type categories, such as fiDNAfl, fiRNAfl, fiproteinsfl and figenesfl. However, disease NER has not received its needed attention yet, specifically human disease NER. Traditional machine learning approaches lack the precision for disease NER, due to their dependence on token level features, sentence level features and the integration of features, such as orthographic, contextual and linguistic features. In this paper a method for disease NER is proposed which utilizes sentence and token level features based on Conditional Random Fields using the NCBI disease corpus. Our system utilizes rich features including orthographic, contextual, affixes, bigrams, part of speech and stem based features. Using these feature sets our approach has achieved a maximum F-score of 94\% for the training set by applying 10 fold cross validation for semantic labeling of the NCBI disease corpus. For testing and development corpus the model has achieved an F-score of 88\% and 85\% respectively.

}, url = {http://ceur-ws.org/Vol-1747/BP02_ICBO2016.pdf}, author = {Thomas Hahn and Hidayat Ur Rahman and Richard Segall} } @conference {BP01, title = {BP01: Ignet: A centrality and INO-based web system for analyzing and visualizing literature-mined networks}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Ignet (Integrative Gene Network) is a web-based system for dynamical- ly updating and analyzing gene interaction networks mined using all Pub- Med abstracts. Four centrality metrics, namely degree, eigenvector, be- tweenness, and closeness are used to determine the importance of genes in the networks. Different gene interaction types between genes are classified using the Interaction Network Ontology (INO) that classifies interaction types in an ontological hierarchy along with individual keywords listed for each interaction type. An interactive user interface is designed to explore the interaction network as well as the centrality and ontology based net- work analysis. Availability: http://ignet.hegroup.org.

}, url = {http://ceur-ws.org/Vol-1747/BP01_ICBO2016.pdf}, author = {Arzucan Ozgur and Junguk Hur and Zuoshuang Xiang and Edison Ong and Dragomir Radev and Yongqun He} } @conference {BIT106, title = {BIT106: Use of text mining for Experimental Factor Ontology coverage expansion in the scope of target validation}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Understanding the molecular biology and development of disease plays a key role in drug development. Integrating evidence from different experimental approaches with data available from public resources (such as gene expression level changes and reaction pathways affected by pathogenic mutations) can be a powerful approach for evaluating different aspects of target-disease associations. The application of ontologies is of fundamental importance to effective integration. The Target Validation Platform is a user-friendly interface that integrates such evidences from various resources with the aim of assisting scientists to identify and prioritise drug targets. Currently, the EFO is used as the reference ontology for diseases in the platform, importing terms from existing disease ontologies such as the Human Phenotype Ontology as required. In order to generalize the use of EFO from key target-diseases for wider use, we need to compare the target associated disease coverage in EFO with the scope of other available disease terminology resources. In this study, we address this issue by using text mining and present our initial results.

}, url = {http://ceur-ws.org/Vol-1747/BIT106_ICBO2016.pdf}, author = {Senay Kafkas and Ian Dunham and Helen Parkinson and Johanna Mcentyre} } @conference {BIT105, title = {BIT105: A Web Application for Extracting Key Domain Information for Scientific Publications using Ontology}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

We present demos of an ongoing project, domain informational vocabulary extraction (DIVE), which aims to enrich digital publications through entity and key informational words detection and by adding additional annotations. The system implements multiple strategies for biological entity detection, including using regular expression rules, ontologies, and a keyword dictionary. These extracted entities are then stored in a database and made accessible through an interactive web application for curation and evaluation by authors. Through the web interface, the user can make additional annotations and corrections to the current results. The updates can then be used to improve the entity detection in subsequent processed articles. Although the system is being developed in the context of annotating journal articles, it can be also be beneficial to domain curators and researchers at large.

}, url = {http://ceur-ws.org/Vol-1747/BIT105_ICBO2016.pdf}, author = {Weijia Xu and Amit Gupta and Pankaj Jaiswal and Crispin Taylor and Patti Lockhart} } @conference {BIT104, title = {BIT104: Cardiovascular Health and Physical Activity: A Model for Health Promotion and Decision Support Ontologies}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Current cardiovascular disease decision support systems (DSS) rely primarily on ontologies that characterize and quantify disease, recommending appropriate pharmacotherapy (PT) and/or surgical interventions (SI). PubMed and Google Scholar searches reveal no specific ontologies or literature related to DSS for recommending physical activity (PA) and diet interventions (DI) for cardiovascular health and fitness (CVHF) improvement. This dearth of CVHF-PA/DI structured knowledge repositories has resulted in a scarcity of user-friendly tools for scientifically validated information retrieval about CVHF improvement. Advancement of health science depends on timely development and implementation of health (rather than disease) ontologies. We developed a time-efficient workflow for constructing/maintaining structured knowledge repositories capable of providing informational underpinnings for CVHF- PA/DI ontologies and DSS that support health promotion, including precise, personalized exercise prescription. This workflow creates conceptual lattices about effects of varied PA on CVHF. These conceptual maps lay the foundation for accelerated creation of health-focused ontologies, which ultimately equip DSS with CVHF knowledge related PA and DI.

}, url = {http://ceur-ws.org/Vol-1747/BIT104_ICBO2016.pdf}, author = {Vimala Ponna and Aaron Baer and Matthew Lange} } @conference {BIT103, title = {BIT103: Scalable Text Mining Assisted Curation of PTM Proteoforms in the Protein Ontology}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

The Protein Ontology (PRO) defines protein classes and their interrelationships from the family to the protein form (proteoform) level within and across species. One of the unique contributions of PRO is its representation of post-translationally modified (PTM) proteoforms. However, progress in adding PTM proteoform classes to PRO has been relatively slow due to the extensive manual curation effort required. Here we report an automated pipeline for creation of PTM proteoform classes that leverages two phosphorylation-focused text mining tools (RLIMS-P, which detects mentions of kinases, substrates, and phosphorylation sites, and eFIP, which detects phosphorylation-dependent protein-protein interactions (PPIs)) and our integrated PTM database, iPTMnet. By applying this pipeline, we obtained a set of \ 820 substrate-site pairs that are suitable for automated PRO term generation with literature-based evidence attribution. Inclusion of these terms in PRO will increase PRO coverage of species-specific PTM proteoforms by 50\%. Many of these new proteoforms also have associated kinase and/or PPI information. Finally, we show a phosphorylation network for the human and mouse peptidyl-prolyl cis-trans isomerase (PIN1/Pin1) derived from our dataset that demonstrates the biological complexity of the information we have extracted. Our approach addresses scalability in PRO curation and will be further expanded to advance PRO representation of phosphorylated proteoforms.

}, url = {http://ceur-ws.org/Vol-1747/BIT103_ICBO2016.pdf}, author = {Karen Ross and Darren Natale and Cecilia Arighi and Sheng-Chih Chen and Hongzhan Huang and Gang Li and Jia Ren and Michael Wang and K Vijay-Shanker and Cathy Wu} } @conference {BIT102, title = {BIT102: One tagger, many uses: Illustrating the power of ontologies in dictionary-based named entity recognition}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

Automatic annotation of text is an important complement to manual annotation, because the latter is highly labour intensive. We have developed a fast dictionary-based named entity recognition (NER) system and addressed a wide variety of biomedical problems by applied it to text from many different sources. We have used this tagger both in real-time tools to support curation efforts and in pipelines for populating databases through bulk processing of entire Medline, the open-access subset of PubMed Central, NIH grant abstracts, FDA drug labels, electronic health records, and the Encyclopedia of Life. Despite the simplicity of the approach, it typically achieves 80{\DH}90\% precision and 70{\DH}80\% recall. Many of the underlying dictionaries were built from open biomedical ontologies, which further facilitate integration of the text-mining results with evidence from other sources.

}, url = {http://ceur-ws.org/Vol-1747/BIT102_ICBO2016.pdf}, author = {Jensen, LJ} } @conference {BIT101, title = {BIT101-D204: Large-scale Semantic Indexing with Biomedical Ontologies}, booktitle = {International Conference on Biomedical Ontology and BioCreative (ICBO BioCreative 2016)}, series = {Proceedings of the Joint International Conference on Biological Ontology and BioCreative (2016)}, year = {2016}, month = {11/30/16}, publisher = {CEUR-ws.org Volume 1747}, organization = {CEUR-ws.org Volume 1747}, abstract = {

We introduce PubTator, a web-based application that enables large-scale semantic indexing and automatic concept recognition in biomedical ontologies. Not only was PubTator formally evaluated and top-rated in BioCreative, it also has been widely adopted and used by the scientific community from around the world, supporting both research projects and real-world applications in biocuration, crowdsourcing and translational bioinformatics.

}, url = {http://ceur-ws.org/Vol-1747/BIT101-D204_ICBO2016.pdf}, author = {Chih-Hsuan Wei and Robert Leaman and Zhiyong Lu} }