Code Coverage
 
Classes and Traits
Functions and Methods
Lines
Total
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 2
CRAP
0.00% covered (danger)
0.00%
0 / 73
EnsembleFastaEntry
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 2
342
0.00% covered (danger)
0.00%
0 / 73
 parseIdentifier
0.00% covered (danger)
0.00%
0 / 1
6
0.00% covered (danger)
0.00%
0 / 12
 getProtein
0.00% covered (danger)
0.00%
0 / 1
272
0.00% covered (danger)
0.00%
0 / 61
<?php
/**
 * Copyright 2019 University of Liverpool
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
namespace pgb_liv\php_ms\Reader\FastaEntry;
use pgb_liv\php_ms\Core\Entry\DatabaseEntry;
use pgb_liv\php_ms\Core\Gene;
use pgb_liv\php_ms\Core\Protein;
use pgb_liv\php_ms\Core\Chromosome;
use pgb_liv\php_ms\Core\Transcript;
use pgb_liv\php_ms\Core\Database\EnsembleGDatabase;
use pgb_liv\php_ms\Core\Database\EnsembleTDatabase;
use pgb_liv\php_ms\Core\Database\DatabaseFactory;
/**
 * FASTA entry parser to map Ensemble header to protein elements
 *
 * @author Andrew Collins
 */
class EnsembleFastaEntry implements FastaInterface
{
    const CHROMOSOME = 'chromosome';
    const GENE = 'gene';
    const GENE_BIOTYPE = 'gene_biotype';
    const GENE_SYMBOL = 'gene_symbol';
    const TRANSCRIPT = 'transcript';
    const TRANSCRIPT_BIOTYPE = 'transcript_biotype';
    const DESCRIPTION = 'description';
    public static function parseIdentifier($identifier)
    {
        $matches = null;
        $isMatched = preg_match('/^(ENSP|GENSCAN)([0-9]+).([0-9]+)$/', $identifier, $matches);
        if (! $isMatched) {
            throw new \InvalidArgumentException($identifier . ' is not Ensemble format');
        }
        return array(
            $matches[1],
            $matches[2],
            $matches[3]
        );
    }
    /**
     *
     * {@inheritdoc}
     */
    public function getProtein($identifier, $description)
    {
        $protein = new Protein();
        $identifierParts = self::parseIdentifier($identifier);
        $dbEntry = null;
        $database = DatabaseFactory::getDatabase($identifierParts[0]);
        $dbEntry = new DatabaseEntry($database);
        $protein->setDatabaseEntry($dbEntry);
        $dbEntry->setUniqueIdentifier($identifierParts[1]);
        $dbEntry->setEntryVersion($identifierParts[2]);
        $matches = array();
        preg_match_all('/(\w+):(.*?(?=\s\[|\s\w+:))/', $description, $matches);
        $keyValues = array();
        foreach ($matches[1] as $key => $value) {
            switch ($value) {
                case self::CHROMOSOME:
                case self::GENE:
                case self::GENE_BIOTYPE:
                case self::GENE_SYMBOL:
                case self::TRANSCRIPT:
                case self::TRANSCRIPT_BIOTYPE:
                case self::DESCRIPTION:
                    $keyValues[$value] = $matches[2][$key];
                    break;
                default:
                    // Unknown
                    break;
            }
        }
        if (isset($keyValues[self::CHROMOSOME])) {
            $chromosome = new Chromosome();
            $chromosome->setName($keyValues[self::CHROMOSOME]);
            $protein->setChromosome($chromosome);
        }
        if (isset($keyValues[self::GENE])) {
            if (isset($keyValues[self::GENE_SYMBOL])) {
                $gene = Gene::getInstance($keyValues[self::GENE_SYMBOL]);
            } else {
                $gene = new Gene();
            }
            $geneEntry = new DatabaseEntry(EnsembleGDatabase::getInstance());
            $geneEntry->setUniqueIdentifier($keyValues[self::GENE]);
            $gene->setDatabaseEntry($geneEntry);
            $protein->setGene($gene);
            if (isset($keyValues[self::GENE_BIOTYPE])) {
                $gene->setType($keyValues[self::GENE_BIOTYPE]);
            }
        }
        if (isset($keyValues[self::TRANSCRIPT])) {
            $transcript = new Transcript();
            $transcriptEntry = new DatabaseEntry(EnsembleTDatabase::getInstance());
            $transcriptEntry->setUniqueIdentifier($keyValues[self::TRANSCRIPT]);
            $transcript->setDatabaseEntry($transcriptEntry);
            $protein->setTranscript($transcript);
            if (isset($keyValues[self::TRANSCRIPT_BIOTYPE])) {
                $transcript->setType($keyValues[self::TRANSCRIPT_BIOTYPE]);
            }
        }
        if (isset($keyValues[self::DESCRIPTION])) {
            $protein->setDescription($keyValues[self::DESCRIPTION]);
        }
        return $protein;
    }
}