Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
Total | |
0.00% |
0 / 1 |
|
0.00% |
0 / 2 |
CRAP | |
0.00% |
0 / 73 |
EnsembleFastaEntry | |
0.00% |
0 / 1 |
|
0.00% |
0 / 2 |
342 | |
0.00% |
0 / 73 |
parseIdentifier | |
0.00% |
0 / 1 |
6 | |
0.00% |
0 / 12 |
|||
getProtein | |
0.00% |
0 / 1 |
272 | |
0.00% |
0 / 61 |
<?php | |
/** | |
* Copyright 2019 University of Liverpool | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
namespace pgb_liv\php_ms\Reader\FastaEntry; | |
use pgb_liv\php_ms\Core\Entry\DatabaseEntry; | |
use pgb_liv\php_ms\Core\Gene; | |
use pgb_liv\php_ms\Core\Protein; | |
use pgb_liv\php_ms\Core\Chromosome; | |
use pgb_liv\php_ms\Core\Transcript; | |
use pgb_liv\php_ms\Core\Database\EnsembleGDatabase; | |
use pgb_liv\php_ms\Core\Database\EnsembleTDatabase; | |
use pgb_liv\php_ms\Core\Database\DatabaseFactory; | |
/** | |
* FASTA entry parser to map Ensemble header to protein elements | |
* | |
* @author Andrew Collins | |
*/ | |
class EnsembleFastaEntry implements FastaInterface | |
{ | |
const CHROMOSOME = 'chromosome'; | |
const GENE = 'gene'; | |
const GENE_BIOTYPE = 'gene_biotype'; | |
const GENE_SYMBOL = 'gene_symbol'; | |
const TRANSCRIPT = 'transcript'; | |
const TRANSCRIPT_BIOTYPE = 'transcript_biotype'; | |
const DESCRIPTION = 'description'; | |
public static function parseIdentifier($identifier) | |
{ | |
$matches = null; | |
$isMatched = preg_match('/^(ENSP|GENSCAN)([0-9]+).([0-9]+)$/', $identifier, $matches); | |
if (! $isMatched) { | |
throw new \InvalidArgumentException($identifier . ' is not Ensemble format'); | |
} | |
return array( | |
$matches[1], | |
$matches[2], | |
$matches[3] | |
); | |
} | |
/** | |
* | |
* {@inheritdoc} | |
*/ | |
public function getProtein($identifier, $description) | |
{ | |
$protein = new Protein(); | |
$identifierParts = self::parseIdentifier($identifier); | |
$dbEntry = null; | |
$database = DatabaseFactory::getDatabase($identifierParts[0]); | |
$dbEntry = new DatabaseEntry($database); | |
$protein->setDatabaseEntry($dbEntry); | |
$dbEntry->setUniqueIdentifier($identifierParts[1]); | |
$dbEntry->setEntryVersion($identifierParts[2]); | |
$matches = array(); | |
preg_match_all('/(\w+):(.*?(?=\s\[|\s\w+:))/', $description, $matches); | |
$keyValues = array(); | |
foreach ($matches[1] as $key => $value) { | |
switch ($value) { | |
case self::CHROMOSOME: | |
case self::GENE: | |
case self::GENE_BIOTYPE: | |
case self::GENE_SYMBOL: | |
case self::TRANSCRIPT: | |
case self::TRANSCRIPT_BIOTYPE: | |
case self::DESCRIPTION: | |
$keyValues[$value] = $matches[2][$key]; | |
break; | |
default: | |
// Unknown | |
break; | |
} | |
} | |
if (isset($keyValues[self::CHROMOSOME])) { | |
$chromosome = new Chromosome(); | |
$chromosome->setName($keyValues[self::CHROMOSOME]); | |
$protein->setChromosome($chromosome); | |
} | |
if (isset($keyValues[self::GENE])) { | |
if (isset($keyValues[self::GENE_SYMBOL])) { | |
$gene = Gene::getInstance($keyValues[self::GENE_SYMBOL]); | |
} else { | |
$gene = new Gene(); | |
} | |
$geneEntry = new DatabaseEntry(EnsembleGDatabase::getInstance()); | |
$geneEntry->setUniqueIdentifier($keyValues[self::GENE]); | |
$gene->setDatabaseEntry($geneEntry); | |
$protein->setGene($gene); | |
if (isset($keyValues[self::GENE_BIOTYPE])) { | |
$gene->setType($keyValues[self::GENE_BIOTYPE]); | |
} | |
} | |
if (isset($keyValues[self::TRANSCRIPT])) { | |
$transcript = new Transcript(); | |
$transcriptEntry = new DatabaseEntry(EnsembleTDatabase::getInstance()); | |
$transcriptEntry->setUniqueIdentifier($keyValues[self::TRANSCRIPT]); | |
$transcript->setDatabaseEntry($transcriptEntry); | |
$protein->setTranscript($transcript); | |
if (isset($keyValues[self::TRANSCRIPT_BIOTYPE])) { | |
$transcript->setType($keyValues[self::TRANSCRIPT_BIOTYPE]); | |
} | |
} | |
if (isset($keyValues[self::DESCRIPTION])) { | |
$protein->setDescription($keyValues[self::DESCRIPTION]); | |
} | |
return $protein; | |
} | |
} |