Source of file PeffFastaEntry.php

Size: 5,696 Bytes - Last Modified: 2019-05-10T12:24:09+01:00

src/Reader/FastaEntry/PeffFastaEntry.php

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
<?php
/**
 * Copyright 2019 University of Liverpool
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
namespace pgb_liv\php_ms\Reader\FastaEntry;

use pgb_liv\php_ms\Core\Protein;
use pgb_liv\php_ms\Core\Modification;
use pgb_liv\php_ms\Core\Entry\DatabaseEntry;
use pgb_liv\php_ms\Core\Gene;
use pgb_liv\php_ms\Core\Organism;
use pgb_liv\php_ms\Core\Database\DatabaseFactory;
use pgb_liv\php_ms\Reader\HupoPsi\PsiVerb;

/**
 * FASTA entry parser to map generic PEFF headers to protein elements
 *
 * @author Andrew Collins
 */
class PeffFastaEntry implements FastaInterface
{

    public static function parseIdentifier($identifier)
    {
        $matches = null;
        $isMatched = preg_match('/^(\w+):([\w-]+)$/', $identifier, $matches);

        if (! $isMatched) {
            throw new \InvalidArgumentException($identifier . ' is not PEFF format');
        }

        return array(
            $matches[1],
            $matches[2]
        );
    }

    /**
     *
     * {@inheritdoc}
     */
    public function getProtein($identifier, $description)
    {
        $protein = new Protein();

        // Parse identifier
        $identifierParts = $this->parseIdentifier($identifier);
        $database = DatabaseFactory::getDatabase($identifierParts[0]);
        $dbEntry = new DatabaseEntry($database);
        $protein->setDatabaseEntry($dbEntry);

        $dbEntry->setUniqueIdentifier($identifierParts[1]);

        // Parse description
        $matches = null;
        preg_match_all('/\\\\(\\w+)=(.+?(?= \\\\|$))/', $description, $matches);

        $attributes = array();
        foreach ($matches[1] as $index => $key) {
            $attributes[$key] = $matches[2][$index];
        }

        $this->parseAttributes($protein, $attributes);

        return $protein;
    }

    /**
     * Parses the attribute array and inputs the data into the protein
     *
     * @param Protein $protein
     *            Object to input values to
     * @param array $attributes
     *            Array to read from
     * @return void
     */
    private function parseAttributes(Protein $protein, array $attributes)
    {
        if (isset($attributes[PsiVerb::NCBI_TAX_ID])) {
            $organism = Organism::getInstance($attributes[PsiVerb::NCBI_TAX_ID]);
            $protein->setOrganism($organism);
        }

        if (isset($attributes[PsiVerb::TAX_NAME])) {
            if (! $protein->getOrganism()) {
                $protein->setOrganism(new Organism());
            }

            $protein->getOrganism()->setName($attributes[PsiVerb::TAX_NAME]);
        }

        foreach ($attributes as $key => $value) {
            switch ($key) {
                case 'DbUniqueId':
                    $protein->setAccession($value);
                    break;
                case 'GName':
                    $gene = Gene::getInstance($value);
                    $protein->setGene($gene);
                    break;
                case 'SV':
                    $protein->getDatabaseEntry()->setSequenceVersion($value);
                    break;
                case 'EV':
                    $protein->getDatabaseEntry()->setEntryVersion($value);
                    break;
                case 'PE':
                    $protein->getDatabaseEntry()->setEvidence($value);
                    break;
                case 'PName':
                    $protein->setDescription($value);
                    break;
                case 'ModRes':
                case 'ModResPsi':
                case 'ModResUnimod':
                    $modifications = self::parseModifications($value);
                    $protein->addModifications($modifications);
                    break;
                case PsiVerb::NCBI_TAX_ID:
                case PsiVerb::TAX_NAME:
                    // Safe to ignore - already handled
                    break;
                case 'Length':
                case 'VariantSimple ':
                case 'VariantComplex':
                case 'Processed':
                    // Not supported
                    break;
                default:
                    // Not supported
                    break;
            }
        }
    }

    /**
     * Parses ModResPsi/ModResUnimod element and returns the parsed modifications
     *
     * @param string $value
     *            The ModResXXX value
     * @return Modification[]
     */
    private static function parseModifications($value)
    {
        $modifications = array();
        $matches = null;
        preg_match_all('/\(([^()]|(?R))*\)/', $value, $matches);

        foreach ($matches[0] as $modString) {
            $elements = null;
            preg_match('/\(([0-9,]+)\|([A-Z]+:[0-9]+)?\|(.*)\|?(.+)?\)/', $modString, $elements);

            $locations = explode(',', $elements[1]);
            foreach ($locations as $location) {
                $modification = new Modification();
                $modification->setLocation((int) $location);
                $modification->setName($elements[3]);
                $modification->setAccession($elements[2]);

                $modifications[] = $modification;
            }
        }

        return $modifications;
    }
}