Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
| Total | |
0.00% |
0 / 1 |
|
0.00% |
0 / 4 |
CRAP | |
0.00% |
0 / 86 |
| PeffFastaEntry | |
0.00% |
0 / 1 |
|
0.00% |
0 / 4 |
756 | |
0.00% |
0 / 86 |
| parseIdentifier | |
0.00% |
0 / 1 |
6 | |
0.00% |
0 / 7 |
|||
| getProtein | |
0.00% |
0 / 1 |
6 | |
0.00% |
0 / 14 |
|||
| parseAttributes | |
0.00% |
0 / 1 |
420 | |
0.00% |
0 / 49 |
|||
| parseModifications | |
0.00% |
0 / 1 |
12 | |
0.00% |
0 / 16 |
|||
| <?php | |
| /** | |
| * Copyright 2019 University of Liverpool | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| */ | |
| namespace pgb_liv\php_ms\Reader\FastaEntry; | |
| use pgb_liv\php_ms\Core\Protein; | |
| use pgb_liv\php_ms\Core\Modification; | |
| use pgb_liv\php_ms\Core\Entry\DatabaseEntry; | |
| use pgb_liv\php_ms\Core\Gene; | |
| use pgb_liv\php_ms\Core\Organism; | |
| use pgb_liv\php_ms\Core\Database\DatabaseFactory; | |
| use pgb_liv\php_ms\Reader\HupoPsi\PsiVerb; | |
| /** | |
| * FASTA entry parser to map generic PEFF headers to protein elements | |
| * | |
| * @author Andrew Collins | |
| */ | |
| class PeffFastaEntry implements FastaInterface | |
| { | |
| public static function parseIdentifier($identifier) | |
| { | |
| $matches = null; | |
| $isMatched = preg_match('/^(\w+):([\w-]+)$/', $identifier, $matches); | |
| if (! $isMatched) { | |
| throw new \InvalidArgumentException($identifier . ' is not PEFF format'); | |
| } | |
| return array( | |
| $matches[1], | |
| $matches[2] | |
| ); | |
| } | |
| /** | |
| * | |
| * {@inheritdoc} | |
| */ | |
| public function getProtein($identifier, $description) | |
| { | |
| $protein = new Protein(); | |
| // Parse identifier | |
| $identifierParts = $this->parseIdentifier($identifier); | |
| $database = DatabaseFactory::getDatabase($identifierParts[0]); | |
| $dbEntry = new DatabaseEntry($database); | |
| $protein->setDatabaseEntry($dbEntry); | |
| $dbEntry->setUniqueIdentifier($identifierParts[1]); | |
| // Parse description | |
| $matches = null; | |
| preg_match_all('/\\\\(\\w+)=(.+?(?= \\\\|$))/', $description, $matches); | |
| $attributes = array(); | |
| foreach ($matches[1] as $index => $key) { | |
| $attributes[$key] = $matches[2][$index]; | |
| } | |
| $this->parseAttributes($protein, $attributes); | |
| return $protein; | |
| } | |
| /** | |
| * Parses the attribute array and inputs the data into the protein | |
| * | |
| * @param Protein $protein | |
| * Object to input values to | |
| * @param array $attributes | |
| * Array to read from | |
| * @return void | |
| */ | |
| private function parseAttributes(Protein $protein, array $attributes) | |
| { | |
| if (isset($attributes[PsiVerb::NCBI_TAX_ID])) { | |
| $organism = Organism::getInstance($attributes[PsiVerb::NCBI_TAX_ID]); | |
| $protein->setOrganism($organism); | |
| } | |
| if (isset($attributes[PsiVerb::TAX_NAME])) { | |
| if (! $protein->getOrganism()) { | |
| $protein->setOrganism(new Organism()); | |
| } | |
| $protein->getOrganism()->setName($attributes[PsiVerb::TAX_NAME]); | |
| } | |
| foreach ($attributes as $key => $value) { | |
| switch ($key) { | |
| case 'DbUniqueId': | |
| $protein->setAccession($value); | |
| break; | |
| case 'GName': | |
| $gene = Gene::getInstance($value); | |
| $protein->setGene($gene); | |
| break; | |
| case 'SV': | |
| $protein->getDatabaseEntry()->setSequenceVersion($value); | |
| break; | |
| case 'EV': | |
| $protein->getDatabaseEntry()->setEntryVersion($value); | |
| break; | |
| case 'PE': | |
| $protein->getDatabaseEntry()->setEvidence($value); | |
| break; | |
| case 'PName': | |
| $protein->setDescription($value); | |
| break; | |
| case 'ModRes': | |
| case 'ModResPsi': | |
| case 'ModResUnimod': | |
| $modifications = self::parseModifications($value); | |
| $protein->addModifications($modifications); | |
| break; | |
| case PsiVerb::NCBI_TAX_ID: | |
| case PsiVerb::TAX_NAME: | |
| // Safe to ignore - already handled | |
| break; | |
| case 'Length': | |
| case 'VariantSimple ': | |
| case 'VariantComplex': | |
| case 'Processed': | |
| // Not supported | |
| break; | |
| default: | |
| // Not supported | |
| break; | |
| } | |
| } | |
| } | |
| /** | |
| * Parses ModResPsi/ModResUnimod element and returns the parsed modifications | |
| * | |
| * @param string $value | |
| * The ModResXXX value | |
| * @return Modification[] | |
| */ | |
| private static function parseModifications($value) | |
| { | |
| $modifications = array(); | |
| $matches = null; | |
| preg_match_all('/\(([^()]|(?R))*\)/', $value, $matches); | |
| foreach ($matches[0] as $modString) { | |
| $elements = null; | |
| preg_match('/\(([0-9,]+)\|([A-Z]+:[0-9]+)?\|(.*)\|?(.+)?\)/', $modString, $elements); | |
| $locations = explode(',', $elements[1]); | |
| foreach ($locations as $location) { | |
| $modification = new Modification(); | |
| $modification->setLocation((int) $location); | |
| $modification->setName($elements[3]); | |
| $modification->setAccession($elements[2]); | |
| $modifications[] = $modification; | |
| } | |
| } | |
| return $modifications; | |
| } | |
| } |