Source of file PeffFastaEntry.php

Size: 5,953 Bytes - Last Modified: 2018-03-23T16:40:14+00:00

src/Core/Database/Fasta/PeffFastaEntry.php

1234567891011121314151617181920212223242526272829303132333435363738394041
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
4243444546474849505152
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
5354
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
55
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
56
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
5758
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
59
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
60
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
6162
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
63
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
64
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
6566
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
67
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
68
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
6970
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
71
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
72
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
7374
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
75
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
76
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
7778
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
79
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
80
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
8182
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
838485868788899091929394
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
9596
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
97
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
98
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
99
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
100
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
101102
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
103104105
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
106107
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
108
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
109
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
110
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
111112
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
113114
Covered by 1 test(s):
  • pgb_liv\php_ms\Test\Unit\FastaWriterTest::testCanWritePeffEntry
115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
<?php
/**
 * Copyright 2016 University of Liverpool
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
namespace pgb_liv\php_ms\Core\Database\Fasta;

use pgb_liv\php_ms\Core\Protein;
use pgb_liv\php_ms\Core\Modification;

/**
 * A sequence Database Entry object.
 * By default the identifier, description
 * and sequence are available. Additional fields will be available if the
 * description has been able to be parsed in the case of FASTA data.
 *
 * @author Andrew Collins
 */
class PeffFastaEntry implements FastaInterface
{

    /**
     *
     * {@inheritdoc}
     *
     * @see \pgb_liv\php_ms\Core\Database\Fasta\FastaInterface::getHeader()
     */
    public function getHeader()
    {
        return '# PEFF 1.0draft24' . PHP_EOL;
    }

    /**
     *
     * {@inheritdoc}
     *
     * @see \pgb_liv\php_ms\Core\Database\Fasta\FastaInterface::getDescription()
     */
    public function getDescription(Protein $protein)
    {
        $description = '>' . $protein->getDatabasePrefix() . ':' . $protein->getAccession();
        
        if ($protein->getAccession()) {
            $description .= ' \DbUniqueId=' . $protein->getAccession();
        }
        
        if ($protein->getEntryName()) {
            $description .= ' \CC=' . $protein->getEntryName();
        }
        
        if ($protein->getName()) {
            $description .= ' \Pname=' . $protein->getName();
        }
        
        if ($protein->getGeneName()) {
            $description .= ' \Gname=' . $protein->getGeneName();
        }
        
        if ($protein->getOrganismName()) {
            $description .= ' \TaxName=' . $protein->getOrganismName();
        }
        
        if ($protein->getSequenceVersion()) {
            $description .= ' \SV=' . $protein->getSequenceVersion();
        }
        
        if ($protein->getProteinExistence()) {
            $description .= ' \PE=' . $protein->getProteinExistence();
        }
        
        return $description;
    }

    /**
     *
     * {@inheritdoc}
     *
     * @see \pgb_liv\php_ms\Core\Database\Fasta\FastaInterface::getProtein()
     */
    public function getProtein($identifier, $description, $sequence)
    {
        // Parse identifier
        $identifierParts = explode(':', $identifier, 3);
        
        $protein = new Protein();
        $protein->setUniqueIdentifier($identifier);
        $protein->setSequence($sequence);
        $protein->setDatabasePrefix($identifierParts[0]);
        $protein->setAccession($identifierParts[1]);
        
        $protein->setDescription($description);
        
        // Parse description
        preg_match_all('/\\\\(\\w+)=(.+?(?= \\\\|$))/', $description, $matches);
        
        $attributes = array();
        foreach ($matches[1] as $index => $key) {
            $attributes[$key] = $matches[2][$index];
        }
        
        $this->parseAttributes($protein, $attributes);
        
        return $protein;
    }

    /**
     * Parses the attribute array and inputs the data into the protein
     *
     * @param Protein $protein
     *            Object to input values to
     * @param array $attributes
     *            Array to read from
     * @return void
     */
    private function parseAttributes(Protein $protein, array $attributes)
    {
        foreach ($attributes as $key => $value) {
            switch (strtolower($key)) {
                case 'dbuniqueid':
                    $protein->setAccession($value);
                    break;
                case 'cc':
                    $protein->setEntryName($value);
                    break;
                case 'gname':
                    $protein->setGeneName($value);
                    break;
                case 'sv':
                    $protein->setSequenceVersion($value);
                    break;
                case 'pe':
                    $protein->setProteinExistence($value);
                    break;
                case 'pname':
                    $protein->setName($value);
                    break;
                case 'taxname':
                    $protein->setOrganismName($value);
                    break;
                case 'modrespsi':
                case 'modresunimod':
                    $modifications = self::parseModifications($value);
                    $protein->addModifications($modifications);
                    break;
                default:
                    // Invalid or not yet supported fields
                    break;
            }
        }
    }

    /**
     * Parses ModResPsi/ModResUnimod element and returns the parsed modifications
     *
     * @param string $value
     *            The ModResXXX value
     * @return Modification[]
     */
    private static function parseModifications($value)
    {
        preg_match_all('/\(([0-9,]+)\|((?>UNIMOD|MOD):[0-9]+)\|(.+?)(?>\)|\|(.+?)\))/', $value, $matches, PREG_SET_ORDER);
        
        $modifications = array();
        foreach ($matches as $match) {
            $locations = explode(',', $match[1]);
            foreach ($locations as $location) {
                $modification = new Modification();
                $modification->setLocation((int) $location);
                $modification->setName($match[3]);
                $modification->setAccession($match[2]);
                
                $modifications[] = $modification;
            }
        }
        
        return $modifications;
    }
}