Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
| Total | |
0.00% |
0 / 1 |
|
70.00% |
7 / 10 |
CRAP | |
93.15% |
68 / 73 |
| FastaReader | |
0.00% |
0 / 1 |
|
70.00% |
7 / 10 |
28.25 | |
93.15% |
68 / 73 |
| __construct | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
| current | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| key | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
| next | |
0.00% |
0 / 1 |
3.03 | |
85.71% |
6 / 7 |
|||
| rewind | |
100.00% |
1 / 1 |
3 | |
100.00% |
10 / 10 |
|||
| valid | |
100.00% |
1 / 1 |
2 | |
100.00% |
3 / 3 |
|||
| getLine | |
100.00% |
1 / 1 |
2 | |
100.00% |
5 / 5 |
|||
| peekLine | |
100.00% |
1 / 1 |
2 | |
100.00% |
4 / 4 |
|||
| parseEntry | |
0.00% |
0 / 1 |
9.03 | |
93.10% |
27 / 29 |
|||
| parseSequence | |
0.00% |
0 / 1 |
4.10 | |
81.82% |
9 / 11 |
|||
| <?php | |
| /** | |
| * Copyright 2019 University of Liverpool | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| */ | |
| namespace pgb_liv\php_ms\Reader; | |
| use Exception; | |
| use pgb_liv\php_ms\Core\Protein; | |
| use pgb_liv\php_ms\Reader\FastaEntry\FastaInterface; | |
| use pgb_liv\php_ms\Reader\FastaEntry\PeffFastaEntry; | |
| use pgb_liv\php_ms\Reader\FastaEntry\DefaultFastaEntry; | |
| use pgb_liv\php_ms\Reader\FastaEntry\FastaEntryFactory; | |
| /** | |
| * A FASTA parser that creates a new iterable object that will return a database | |
| * entry on each iteration. | |
| * | |
| * @author Andrew Collins | |
| */ | |
| class FastaReader implements \Iterator | |
| { | |
| private $filePath; | |
| private $fileHandle; | |
| private $filePeek; | |
| /** | |
| * The current protein that will be returned by the current() method | |
| * | |
| * @var Protein | |
| */ | |
| private $current; | |
| private $key = 0; | |
| /** | |
| * The FASTA format engine to use for parsing | |
| * | |
| * @var FastaInterface | |
| */ | |
| private $format; | |
| public function __construct($filePath) | |
| { | |
| $this->filePath = $filePath; | |
| } | |
| /** | |
| * | |
| * {@inheritdoc} | |
| * | |
| * @see \Iterator::current() | |
| * @return Protein | |
| */ | |
| public function current() | |
| { | |
| return $this->current; | |
| } | |
| public function key() | |
| { | |
| return $this->key; | |
| } | |
| public function next() | |
| { | |
| $this->current = null; | |
| if (! feof($this->fileHandle)) { | |
| try { | |
| $this->current = $this->parseEntry(); | |
| } catch (\InvalidArgumentException $ex) { | |
| $this->next(); | |
| } | |
| } | |
| } | |
| public function rewind() | |
| { | |
| // Reset file parsing to start | |
| if ($this->fileHandle != null) { | |
| fclose($this->fileHandle); | |
| } | |
| $this->fileHandle = fopen($this->filePath, 'r'); | |
| if (stripos($this->peekLine(), '# PEFF') === 0) { | |
| $this->format = new PeffFastaEntry(); | |
| } | |
| $this->key = 0; | |
| $this->next(); | |
| } | |
| public function valid() | |
| { | |
| if ($this->current instanceof Protein) { | |
| return true; | |
| } | |
| return false; | |
| } | |
| /** | |
| * Gets the next line and increments the file iterator | |
| * | |
| * @return string The next line in the file | |
| */ | |
| private function getLine() | |
| { | |
| if ($this->filePeek == null) { | |
| return fgets($this->fileHandle); | |
| } | |
| $ret = $this->filePeek; | |
| $this->filePeek = null; | |
| return $ret; | |
| } | |
| /** | |
| * Gets the next line, though does not move the file iterator | |
| * | |
| * @return string The next line in the file | |
| */ | |
| private function peekLine() | |
| { | |
| if ($this->filePeek == null) { | |
| $this->filePeek = fgets($this->fileHandle); | |
| } | |
| return $this->filePeek; | |
| } | |
| /** | |
| * Parses the current chunk into a Protein object | |
| * | |
| * @return Protein | |
| */ | |
| private function parseEntry() | |
| { | |
| // Scan to first entry | |
| do { | |
| $line = trim($this->peekLine()); | |
| if (strpos($line, '>') === 0) { | |
| break; | |
| } | |
| } while ($this->getLine()); | |
| $identifier = ''; | |
| while ($line = $this->getLine()) { | |
| $line = trim($line); | |
| $identifier .= substr($line, 1); | |
| $nextLine = trim($this->peekLine()); | |
| if (strpos($nextLine, '>') !== 0) { | |
| break; | |
| } | |
| } | |
| $description = ''; | |
| $separator = strpos($identifier, ' '); | |
| if ($separator !== false) { | |
| $description = substr($identifier, $separator + 1); | |
| $identifier = substr($identifier, 0, $separator); | |
| } | |
| try { | |
| if ($this->format == null || $this->format instanceof DefaultFastaEntry) { | |
| $this->format = FastaEntryFactory::getParser($identifier); | |
| } | |
| $protein = $this->format->getProtein($identifier, $description); | |
| } catch (Exception $e) { | |
| $this->format = FastaEntryFactory::getParser($identifier); | |
| $protein = $this->format->getProtein($identifier, $description); | |
| } | |
| $protein->setIdentifier($identifier); | |
| $protein->setSequence($this->parseSequence()); | |
| $this->key ++; | |
| return $protein; | |
| } | |
| /** | |
| * Parses the sequence block from the FASTA file and returns the sequence without any line ending or formatting | |
| * | |
| * @return string | |
| */ | |
| private function parseSequence() | |
| { | |
| $sequence = ''; | |
| while ($line = $this->getLine()) { | |
| $sequence .= trim($line); | |
| $nextLine = trim($this->peekLine()); | |
| if (strpos($nextLine, '>') === 0) { | |
| break; | |
| } | |
| } | |
| // Remove stop codon in IRGSP FASTA | |
| if (strrpos($sequence, '*', - 1) !== false) { | |
| $sequence = substr($sequence, 0, - 1); | |
| } | |
| return $sequence; | |
| } | |
| } |