Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
| Total | |
0.00% |
0 / 1 |
|
0.00% |
0 / 2 |
CRAP | |
64.81% |
35 / 54 |
| UniProtFastaEntry | |
0.00% |
0 / 1 |
|
0.00% |
0 / 2 |
27.15 | |
64.81% |
35 / 54 |
| parseIdentifier | |
0.00% |
0 / 1 |
6 | |
0.00% |
0 / 8 |
|||
| getProtein | |
0.00% |
0 / 1 |
16.68 | |
76.09% |
35 / 46 |
|||
| <?php | |
| /** | |
| * Copyright 2019 University of Liverpool | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| */ | |
| namespace pgb_liv\php_ms\Reader\FastaEntry; | |
| use pgb_liv\php_ms\Core\Protein; | |
| use pgb_liv\php_ms\Core\Entry\DatabaseEntry; | |
| use pgb_liv\php_ms\Core\Organism; | |
| use pgb_liv\php_ms\Core\Gene; | |
| use pgb_liv\php_ms\Core\Database\UniProtSpDatabase; | |
| use pgb_liv\php_ms\Core\Database\UniProtTrDatabase; | |
| /** | |
| * FASTA entry parser to map UniProt header to protein elements | |
| * | |
| * @author Andrew Collins | |
| */ | |
| class UniProtFastaEntry implements FastaInterface | |
| { | |
| public static function parseIdentifier($identifier) | |
| { | |
| $matches = null; | |
| $isMatched = preg_match('/^(sp|tr)\|(\w+)\|(\w+)$/', $identifier, $matches); | |
| if (! $isMatched) { | |
| throw new \InvalidArgumentException($identifier . ' is not UniProt format'); | |
| } | |
| return array( | |
| $matches[1], | |
| $matches[2], | |
| $matches[3] | |
| ); | |
| } | |
| /** | |
| * | |
| * {@inheritdoc} | |
| */ | |
| public function getProtein($identifier, $description) | |
| { | |
| // Parse identifier | |
| $identifierParts = self::parseIdentifier($identifier); | |
| $protein = new Protein(); | |
| $dbEntry = null; | |
| if ($identifierParts[0] == 'sp') { | |
| $dbEntry = new DatabaseEntry(UniProtSpDatabase::getInstance()); | |
| } elseif ($identifierParts[0] == 'tr') { | |
| $dbEntry = new DatabaseEntry(UniProtTrDatabase::getInstance()); | |
| } | |
| $protein->setDatabaseEntry($dbEntry); | |
| $dbEntry->setUniqueIdentifier($identifierParts[1]); | |
| $dbEntry->setName($identifierParts[2]); | |
| // Parse description | |
| $osPosition = strpos($description, ' OS='); | |
| $protein->setDescription(substr($description, 0, $osPosition)); | |
| $matches = array(); | |
| preg_match_all('/(\w{2})=([\w\s]+)(?![\w=])/', $description, $matches); | |
| $keyValues = array(); | |
| foreach ($matches[1] as $key => $value) { | |
| switch ($value) { | |
| case 'OS': | |
| case 'OX': | |
| case 'GN': | |
| case 'PE': | |
| case 'SV': | |
| $keyValues[$value] = trim($matches[2][$key]); | |
| break; | |
| default: | |
| // Unknown | |
| break; | |
| } | |
| } | |
| if (isset($keyValues['OX'])) { | |
| $organism = Organism::getInstance($keyValues['OX']); | |
| $protein->setOrganism($organism); | |
| if (isset($keyValues['OS'])) { | |
| $organism->setName($keyValues['OS']); | |
| } | |
| } | |
| if (isset($keyValues['GN'])) { | |
| $gene = Gene::getInstance($keyValues['GN']); | |
| $protein->setGene($gene); | |
| } | |
| if (isset($keyValues['PE'])) { | |
| $dbEntry->setEvidence($keyValues['PE']); | |
| } | |
| if (isset($keyValues['SV'])) { | |
| $dbEntry->setSequenceVersion($keyValues['SV']); | |
| } | |
| return $protein; | |
| } | |
| } |