Source of file MzMlMerge.php
Size: 10,537 Bytes - Last Modified: 2017-10-17T12:36:38+00:00
src/Utility/Misc/MzMlMerge.php
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334 | <?php /** * Copyright 2016 University of Liverpool * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ namespace pgb_liv\php_ms\Utility\Misc; /** * Performs the merging of two or more MzML files into a single MzML file. * Note this class is built for speed and does not correctly validate all fields. * * @author Andrew Collins */ class MzMlMerge { private $timeOffset; private $indexOffset; private $idOffset; private $spectrumIdRef = array(); private $dataFiles = array(); private $outputFiles = array(); private $fractionOffsets = array(); private $spectrumCount = array(); /** * The number of seconds that should be used as padding between scans. * This should be high enough that image recognition alignment tools can identify a distinct boundary * * @var integer */ private $paddingBetweenScans = 600; /** * Sets the output path for a specified replicate * * @param int $replicate * The replicate ID * @param string $path * The path to write to */ public function setOutputPath($replicate, $path) { $this->outputFiles[$replicate] = $path; } /** * Adds a data file for processing. * The index is the fraction order which should be consistent across replicates * * @param int $replicate * @param string $file * @param int $index */ public function addDataFile($replicate, $index, $file) { $this->dataFiles[$replicate][$index] = array( 'path' => $file, 'endTime' => - 1, 'endIndex' => - 1, 'endScans' => - 1, 'spectra' => 0 ); } public function analyseData() { // TODO: Validate input and output // Perform first pass on each data file to identify start, stop RT foreach ($this->dataFiles as $replicate => $fractions) { foreach ($fractions as $index => $file) { $this->dataFiles[$replicate][$index] = $this->analyseFile($file); } } // Set replicate start time foreach ($this->dataFiles as $replicate => $fractions) { foreach ($fractions as $index => $file) { if (! isset($this->fractionOffsets[$index]) || $file['endTime'] > $this->fractionOffsets[$index]) { $this->fractionOffsets[$index] = $file['endTime']; } if (! isset($this->spectrumCount[$replicate])) { $this->spectrumCount[$replicate] = 0; } $this->spectrumCount[$replicate] += $file['spectra']; } } // Push forward $sum = 0; foreach ($this->fractionOffsets as $index => $time) { // Add time to force seperation in alignment tools $this->fractionOffsets[$index] = $sum; $sum += $time + $this->paddingBetweenScans; } return $this->dataFiles; } public function getFractionOffsets() { return $this->fractionOffsets; } private function analyseFile($file) { $reader = fopen($file['path'], 'r'); $isSpectrumList = false; while (! feof($reader)) { $line = fgets($reader); if (stripos($line, '<spectrumList') !== false) { $isSpectrumList = true; if (preg_match('/<spectrumList(?=.*count="([0-9.]+)")/', $line, $matches)) { $file['spectra'] = $matches[1]; } continue; } if (! $isSpectrumList) { continue; } if (stripos($line, '</spectrumList') !== false) { break; } if (preg_match('/<spectrum(?=.*(index="([0-9.]+)"))(?=.*(id=".*(scan=([0-9]+)).*?"))/', $line, $matches)) { $index = $matches[2]; if ($index > $file['endIndex']) { $file['endIndex'] = $index; } $scan = $matches[5]; if ($scan > $file['endScans']) { $file['endScans'] = $scan; } } if (preg_match('/accession="MS:1000016"(?=.*(value="([0-9.]+))")(?=.*unitAccession="([A-Z0-9:]+)")/', $line, $matches)) { $scanTime = $matches[2]; if ($matches[3] == 'UO:0000031') { $scanTime *= 60; } if ($scanTime > $file['endTime']) { $file['endTime'] = $scanTime; } if ($matches[3] == 'UO:0000031') { $scanTime /= 60; } } } return $file; } /** * Merges the MzML files specified in the constructor into a single MzML file and writes the data to the output file specified in the constructor */ public function merge() { // TODO: verify analysis phase run foreach ($this->dataFiles as $replicate => $fractions) { $firstFile = current($fractions); // Write header $this->writeHeader($firstFile['path'], $this->spectrumCount[$replicate], $this->outputFiles[$replicate]); // Resets vars $this->spectrumIdRef = array(); $this->timeOffset = 0; $this->indexOffset = 0; $this->idOffset = 0; foreach ($fractions as $fractionIndex => $file) { $this->writeSpectrum($file['path'], $this->outputFiles[$replicate], $this->fractionOffsets[$fractionIndex]); } $this->writeFooter($firstFile['path'], $this->outputFiles[$replicate]); } } private function writeHeader($file, $spectrumCount, $outputFile) { $reader = fopen($file, 'r'); $writer = fopen($outputFile, 'w'); while (! feof($reader)) { $line = fgets($reader); if (preg_match('/<spectrumList(?=.*count="([0-9.]+)")/', $line, $matches)) { $line = str_replace($matches[1], $spectrumCount, $line); } fwrite($writer, $line); if (stripos($line, '<spectrumList') !== false) { break; } } fclose($reader); fclose($writer); } private function writeFooter($file, $outputFile) { $reader = fopen($file, 'r'); $writer = fopen($outputFile, 'a'); $isPastSpectrumList = false; while (! feof($reader)) { $line = fgets($reader); if (stripos($line, '</spectrumList') !== false) { $isPastSpectrumList = true; } if ($isPastSpectrumList) { fwrite($writer, $line); } } fclose($reader); fclose($writer); } private function writeSpectrum($file, $outputFile, $timeOffset) { $reader = fopen($file, 'r'); $writer = fopen($outputFile, 'a'); $isSpectrumList = false; // Local offsets $localIndexOffset = $this->indexOffset; $localIdOffset = $this->idOffset; while (! feof($reader)) { $line = fgets($reader); if (stripos($line, '<spectrumList') !== false) { $isSpectrumList = true; continue; } if (! $isSpectrumList) { continue; } if (stripos($line, '</spectrumList') !== false) { break; } if (preg_match('/<spectrum(?=.*(index="([0-9.]+)"))(?=.*(id=".*(scan=([0-9]+)).*?"))/', $line, $matches)) { $index = $matches[2]; $index += $localIndexOffset; if ($index > $this->indexOffset) { $this->indexOffset = $index; } $scan = $matches[5]; $scan += $localIdOffset; if ($scan > $this->idOffset) { $this->idOffset = $scan; } $line = str_replace($matches[1], 'index="' . $index . '"', $line); $idChunk = str_replace($matches[4], 'scan=' . $scan, $matches[3]); $this->spectrumIdRef[substr($matches[3], 4, - 1)] = substr($idChunk, 4, - 1); $line = str_replace($matches[3], $idChunk, $line); } if (preg_match('/<precursor(?=.*(spectrumRef="(.*?)"))/', $line, $matches)) { $line = str_replace($matches[2], $this->spectrumIdRef[$matches[2]], $line); } if (preg_match('/accession="MS:1000016"(?=.*(value="([0-9.]+))")(?=.*unitAccession="([A-Z0-9:]+)")/', $line, $matches)) { $scanTime = $matches[2]; if ($matches[3] == 'UO:0000031') { $scanTime *= 60; } $scanTime += $timeOffset; if ($matches[3] == 'UO:0000031') { $scanTime /= 60; } $line = str_replace($matches[2], $scanTime, $line); } fwrite($writer, $line); } fclose($reader); fclose($writer); } } |