Code Coverage
 
Classes and Traits
Functions and Methods
Lines
Total
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 2
CRAP
64.81% covered (warning)
64.81%
35 / 54
UniProtFastaEntry
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 2
27.15
64.81% covered (warning)
64.81%
35 / 54
 parseIdentifier
0.00% covered (danger)
0.00%
0 / 1
6
0.00% covered (danger)
0.00%
0 / 8
 getProtein
0.00% covered (danger)
0.00%
0 / 1
16.68
76.09% covered (warning)
76.09%
35 / 46
<?php
/**
 * Copyright 2019 University of Liverpool
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
namespace pgb_liv\php_ms\Reader\FastaEntry;
use pgb_liv\php_ms\Core\Protein;
use pgb_liv\php_ms\Core\Entry\DatabaseEntry;
use pgb_liv\php_ms\Core\Organism;
use pgb_liv\php_ms\Core\Gene;
use pgb_liv\php_ms\Core\Database\UniProtSpDatabase;
use pgb_liv\php_ms\Core\Database\UniProtTrDatabase;
/**
 * FASTA entry parser to map UniProt header to protein elements
 *
 * @author Andrew Collins
 */
class UniProtFastaEntry implements FastaInterface
{
    public static function parseIdentifier($identifier)
    {
        $matches = null;
        $isMatched = preg_match('/^(sp|tr)\|(\w+)\|(\w+)$/', $identifier, $matches);
        if (! $isMatched) {
            throw new \InvalidArgumentException($identifier . ' is not UniProt format');
        }
        return array(
            $matches[1],
            $matches[2],
            $matches[3]
        );
    }
    /**
     *
     * {@inheritdoc}
     */
    public function getProtein($identifier, $description)
    {
        // Parse identifier
        $identifierParts = self::parseIdentifier($identifier);
        $protein = new Protein();
        $dbEntry = null;
        if ($identifierParts[0] == 'sp') {
            $dbEntry = new DatabaseEntry(UniProtSpDatabase::getInstance());
        } elseif ($identifierParts[0] == 'tr') {
            $dbEntry = new DatabaseEntry(UniProtTrDatabase::getInstance());
        }
        $protein->setDatabaseEntry($dbEntry);
        $dbEntry->setUniqueIdentifier($identifierParts[1]);
        $dbEntry->setName($identifierParts[2]);
        // Parse description
        $osPosition = strpos($description, ' OS=');
        $protein->setDescription(substr($description, 0, $osPosition));
        $matches = array();
        preg_match_all('/(\w{2})=([\w\s]+)(?![\w=])/', $description, $matches);
        $keyValues = array();
        foreach ($matches[1] as $key => $value) {
            switch ($value) {
                case 'OS':
                case 'OX':
                case 'GN':
                case 'PE':
                case 'SV':
                    $keyValues[$value] = trim($matches[2][$key]);
                    break;
                default:
                    // Unknown
                    break;
            }
        }
        if (isset($keyValues['OX'])) {
            $organism = Organism::getInstance($keyValues['OX']);
            $protein->setOrganism($organism);
            if (isset($keyValues['OS'])) {
                $organism->setName($keyValues['OS']);
            }
        }
        if (isset($keyValues['GN'])) {
            $gene = Gene::getInstance($keyValues['GN']);
            $protein->setGene($gene);
        }
        if (isset($keyValues['PE'])) {
            $dbEntry->setEvidence($keyValues['PE']);
        }
        if (isset($keyValues['SV'])) {
            $dbEntry->setSequenceVersion($keyValues['SV']);
        }
        return $protein;
    }
}