Source of file UniProtFastaEntry.php
Size: 3,517 Bytes - Last Modified: 2019-05-10T12:24:09+01:00
src/Reader/FastaEntry/UniProtFastaEntry.php
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
Covered by 3 test(s):
5859
Covered by 3 test(s):
6061
Covered by 3 test(s):
6263
Covered by 3 test(s):
64
Covered by 3 test(s):
65
Covered by 3 test(s):
66676869
Covered by 3 test(s):
7071
Covered by 3 test(s):
72
Covered by 3 test(s):
737475
Covered by 3 test(s):
76
Covered by 3 test(s):
7778
Covered by 3 test(s):
79
Covered by 3 test(s):
8081
Covered by 3 test(s):
8283
Covered by 3 test(s):
8485
Covered by 3 test(s):
86
Covered by 3 test(s):
87
Covered by 3 test(s):
88
Covered by 3 test(s):
89
Covered by 3 test(s):
90
Covered by 3 test(s):
91
Covered by 3 test(s):
9293949596
Covered by 3 test(s):
9798
Covered by 3 test(s):
99100101102103104105106107
Covered by 3 test(s):
108
Covered by 3 test(s):
109
Covered by 3 test(s):
110
Covered by 3 test(s):
111112
Covered by 3 test(s):
113
Covered by 3 test(s):
114
Covered by 3 test(s):
115116
Covered by 3 test(s):
117
Covered by 3 test(s):
118
Covered by 3 test(s):
119120
Covered by 3 test(s):
121122123
| <?php /** * Copyright 2019 University of Liverpool * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ namespace pgb_liv\php_ms\Reader\FastaEntry; use pgb_liv\php_ms\Core\Protein; use pgb_liv\php_ms\Core\Entry\DatabaseEntry; use pgb_liv\php_ms\Core\Organism; use pgb_liv\php_ms\Core\Gene; use pgb_liv\php_ms\Core\Database\UniProtSpDatabase; use pgb_liv\php_ms\Core\Database\UniProtTrDatabase; /** * FASTA entry parser to map UniProt header to protein elements * * @author Andrew Collins */ class UniProtFastaEntry implements FastaInterface { public static function parseIdentifier($identifier) { $matches = null; $isMatched = preg_match('/^(sp|tr)\|(\w+)\|(\w+)$/', $identifier, $matches); if (! $isMatched) { throw new \InvalidArgumentException($identifier . ' is not UniProt format'); } return array( $matches[1], $matches[2], $matches[3] ); } /** * * {@inheritdoc} */ public function getProtein($identifier, $description) { // Parse identifier $identifierParts = self::parseIdentifier($identifier); $protein = new Protein(); $dbEntry = null; if ($identifierParts[0] == 'sp') { $dbEntry = new DatabaseEntry(UniProtSpDatabase::getInstance()); } elseif ($identifierParts[0] == 'tr') { $dbEntry = new DatabaseEntry(UniProtTrDatabase::getInstance()); } $protein->setDatabaseEntry($dbEntry); $dbEntry->setUniqueIdentifier($identifierParts[1]); $dbEntry->setName($identifierParts[2]); // Parse description $osPosition = strpos($description, ' OS='); $protein->setDescription(substr($description, 0, $osPosition)); $matches = array(); preg_match_all('/(\w{2})=([\w\s]+)(?![\w=])/', $description, $matches); $keyValues = array(); foreach ($matches[1] as $key => $value) { switch ($value) { case 'OS': case 'OX': case 'GN': case 'PE': case 'SV': $keyValues[$value] = trim($matches[2][$key]); break; default: // Unknown break; } } if (isset($keyValues['OX'])) { $organism = Organism::getInstance($keyValues['OX']); $protein->setOrganism($organism); if (isset($keyValues['OS'])) { $organism->setName($keyValues['OS']); } } if (isset($keyValues['GN'])) { $gene = Gene::getInstance($keyValues['GN']); $protein->setGene($gene); } if (isset($keyValues['PE'])) { $dbEntry->setEvidence($keyValues['PE']); } if (isset($keyValues['SV'])) { $dbEntry->setSequenceVersion($keyValues['SV']); } return $protein; } } |