Code Coverage
 
Classes and Traits
Functions and Methods
Lines
Total
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 86
PeffFastaEntry
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 4
756
0.00% covered (danger)
0.00%
0 / 86
 parseIdentifier
0.00% covered (danger)
0.00%
0 / 1
6
0.00% covered (danger)
0.00%
0 / 7
 getProtein
0.00% covered (danger)
0.00%
0 / 1
6
0.00% covered (danger)
0.00%
0 / 14
 parseAttributes
0.00% covered (danger)
0.00%
0 / 1
420
0.00% covered (danger)
0.00%
0 / 49
 parseModifications
0.00% covered (danger)
0.00%
0 / 1
12
0.00% covered (danger)
0.00%
0 / 16
<?php
/**
 * Copyright 2019 University of Liverpool
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
namespace pgb_liv\php_ms\Reader\FastaEntry;
use pgb_liv\php_ms\Core\Protein;
use pgb_liv\php_ms\Core\Modification;
use pgb_liv\php_ms\Core\Entry\DatabaseEntry;
use pgb_liv\php_ms\Core\Gene;
use pgb_liv\php_ms\Core\Organism;
use pgb_liv\php_ms\Core\Database\DatabaseFactory;
use pgb_liv\php_ms\Reader\HupoPsi\PsiVerb;
/**
 * FASTA entry parser to map generic PEFF headers to protein elements
 *
 * @author Andrew Collins
 */
class PeffFastaEntry implements FastaInterface
{
    public static function parseIdentifier($identifier)
    {
        $matches = null;
        $isMatched = preg_match('/^(\w+):([\w-]+)$/', $identifier, $matches);
        if (! $isMatched) {
            throw new \InvalidArgumentException($identifier . ' is not PEFF format');
        }
        return array(
            $matches[1],
            $matches[2]
        );
    }
    /**
     *
     * {@inheritdoc}
     */
    public function getProtein($identifier, $description)
    {
        $protein = new Protein();
        // Parse identifier
        $identifierParts = $this->parseIdentifier($identifier);
        $database = DatabaseFactory::getDatabase($identifierParts[0]);
        $dbEntry = new DatabaseEntry($database);
        $protein->setDatabaseEntry($dbEntry);
        $dbEntry->setUniqueIdentifier($identifierParts[1]);
        // Parse description
        $matches = null;
        preg_match_all('/\\\\(\\w+)=(.+?(?= \\\\|$))/', $description, $matches);
        $attributes = array();
        foreach ($matches[1] as $index => $key) {
            $attributes[$key] = $matches[2][$index];
        }
        $this->parseAttributes($protein, $attributes);
        return $protein;
    }
    /**
     * Parses the attribute array and inputs the data into the protein
     *
     * @param Protein $protein
     *            Object to input values to
     * @param array $attributes
     *            Array to read from
     * @return void
     */
    private function parseAttributes(Protein $protein, array $attributes)
    {
        if (isset($attributes[PsiVerb::NCBI_TAX_ID])) {
            $organism = Organism::getInstance($attributes[PsiVerb::NCBI_TAX_ID]);
            $protein->setOrganism($organism);
        }
        if (isset($attributes[PsiVerb::TAX_NAME])) {
            if (! $protein->getOrganism()) {
                $protein->setOrganism(new Organism());
            }
            $protein->getOrganism()->setName($attributes[PsiVerb::TAX_NAME]);
        }
        foreach ($attributes as $key => $value) {
            switch ($key) {
                case 'DbUniqueId':
                    $protein->setAccession($value);
                    break;
                case 'GName':
                    $gene = Gene::getInstance($value);
                    $protein->setGene($gene);
                    break;
                case 'SV':
                    $protein->getDatabaseEntry()->setSequenceVersion($value);
                    break;
                case 'EV':
                    $protein->getDatabaseEntry()->setEntryVersion($value);
                    break;
                case 'PE':
                    $protein->getDatabaseEntry()->setEvidence($value);
                    break;
                case 'PName':
                    $protein->setDescription($value);
                    break;
                case 'ModRes':
                case 'ModResPsi':
                case 'ModResUnimod':
                    $modifications = self::parseModifications($value);
                    $protein->addModifications($modifications);
                    break;
                case PsiVerb::NCBI_TAX_ID:
                case PsiVerb::TAX_NAME:
                    // Safe to ignore - already handled
                    break;
                case 'Length':
                case 'VariantSimple ':
                case 'VariantComplex':
                case 'Processed':
                    // Not supported
                    break;
                default:
                    // Not supported
                    break;
            }
        }
    }
    /**
     * Parses ModResPsi/ModResUnimod element and returns the parsed modifications
     *
     * @param string $value
     *            The ModResXXX value
     * @return Modification[]
     */
    private static function parseModifications($value)
    {
        $modifications = array();
        $matches = null;
        preg_match_all('/\(([^()]|(?R))*\)/', $value, $matches);
        foreach ($matches[0] as $modString) {
            $elements = null;
            preg_match('/\(([0-9,]+)\|([A-Z]+:[0-9]+)?\|(.*)\|?(.+)?\)/', $modString, $elements);
            $locations = explode(',', $elements[1]);
            foreach ($locations as $location) {
                $modification = new Modification();
                $modification->setLocation((int) $location);
                $modification->setName($elements[3]);
                $modification->setAccession($elements[2]);
                $modifications[] = $modification;
            }
        }
        return $modifications;
    }
}