'****************************************************************************
'* Copyright (C) 2004 Peter Mortensen and Matthias Mann *
'* This file is part of MSQuant. *
'* *
'* MSQuant is distributed under the terms of *
'* the GNU General Public License. See src/COPYING.TXT or *
'* <http://www.gnu.org/licenses/gpl.txt> for details. *
'* *
'* MSQuant is free software; you can redistribute it *
'* and/or modify it under the terms of the GNU *
'* General Public License as published by the Free *
'* Software Foundation; either version 2 of the *
'* License, or (at your option) any later version. *
'* *
'* MSQuant is distributed in the hope that it will be *
'* useful, but WITHOUT ANY WARRANTY; without even the *
'* implied warranty of MERCHANTABILITY or FITNESS FOR *
'* A PARTICULAR PURPOSE. See the GNU General Public *
'* License for more details. *
'* *
'* You should have received a copy of the GNU General *
'* Public License along with MSQuant; if not, write to *
'* the Free Software Foundation, Inc., 59 Temple *
'* Place, Suite 330, Boston, MA 02111-1307 USA *
'* *
'* Purpose: Holds Class MultipleRawFilesHandling, see below *
'* for documentation. *
'* *
'****************************************************************************
'****************************************************************************
'* CEBI *
'* Software Development Group *
'* Peter Mortensen *
'* E-mail: NUKESPAMMERSdrmortensen@get2netZZZZZZ.dk *
'* WWW: http://www.cebi.sdu.dk/ *
'* *
'* Program for post-processing of result from search in mass *
'* spectrometric data. *
'* *
'* FILENAME: MultipleRawFilesHandling.vb *
'* TYPE: VISUAL_BASIC *
'* *
'* CREATED: PM 2003-11-21 Vrs 1.0. *
'* UPDATED: PM 2003-xx-xx *
'* *
'****************************************************************************
Option Strict On
Option Explicit On
Imports System.IO 'For Path class/functions
Imports System.Text.RegularExpressions
Imports System.Collections.Generic 'For Dictionary
Imports App 'For class quantApplication
'Imports System.xyz
'****************************************************************************
'd$ <summary>
'd$ Purpose: Namespace for lower layers of mass spectrometric
'd$ applications: raw data file handling, descriptive statistics,
'd$ fragment masses, digestion, file associations, etc.
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ <isUnitTest></isUnitTest>
'd$ <applicationname>test_rawDataFileHandling</applicationname>
'd$ <author>Peter Mortensen</author>
'd$ <seealso>http://www.cebi.sdu.dk/</seealso>
'd$ <codetype>PLATFORM independent / GUI</codetype>
'd$ </summary>
Namespace massSpectrometryBase
'****************************************************************************
'd$ <summary>
'd$ Purpose: Encapsulates the details of handling several raw files at a time.
'd$
'd$
'd$
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ <applicationname>test_rawDataFileHandling</applicationname>
'd$ <author>Peter Mortensen</author>
'd$ <seealso>http://www.cebi.sdu.dk/</seealso>
'd$ <codetype>PLATFORM independent / GUI</codetype>
'd$ </summary>
Public NotInheritable Class MultipleRawFilesHandling
Const RAWFILEREF_BASE As Integer = 809
Public Const RAWFILEREF_NOTMULTIPLE As Integer = 1789
'Changed PM_PROTEINRATIO_FOR_EACH_RAWFILE 2006-02-27
'They have to be distinct, for the code in CalculateProteinRatio()
'to work.
Public Const REF_WHOLEPROTEIN As Integer = RAWFILEREF_NOTMULTIPLE + 1
Private mRawFiles() As fileSpecStructure
Private mDirPath As String
Private mCurrentRawFileNumber As Integer
'Changed PM_REFACTOR 2008-02-05
'Private mRawFileNames As Hashtable
Private mRawFileNamesHash As Dictionary(Of String, Integer) 'Note: helper
' object for filling in mRawFiles.
Private mArrIndex As Integer
Private mOneFullRawFileNamePath As String
Private mRawFileType As rawFileModeEnum
'Changed PM_MULTIPLE_FINNEGAN_TROUBLE 2004-01-06
Private mRecentlyAddedRawFile As Boolean 'Note: this
' is used to re-create our raw file object after the list
' of raw files is added to.
' This will happen in the beginning of a parsing when the
' full list of raw files is not yet known.
'Changed PM_REFACTOR 2008-02-05. Disabled. Does not seem to be used...
'Private mRawFileObjects2 As Hashtable
Private mForConversionRawDataFileHandling As rawDataFileHandling
'Changed PM_IRINA_SAMPLE_NUMBER_TROUBLE 2003-12-02
Private mGeneralSampleNumber As Integer 'This implies that the
' sample number is the same in all files which may indeed
' not be what we want.
'Changed PM_GENERALISED_SPECTRUMCLASSIFICATION 2005-03-14
'Was only introduced for getClassifierObject(). Is it a problem
'with this dependency on the application object?
Private mApplication As quantApplication
Private mCOMversion As COMversionEnum
'Changed PM_ACCEPT_UPPER_CASE 2008-02-05
Private mAcceptedRawFileExtensionsWithDot As _
Dictionary(Of String, Integer)
'****************************************************************************
'* SUBROUTINE NAME: New *
'd$ <summary>Constructor</summary>
Public Sub New( _
ByVal aRawFileType As rawFileModeEnum, _
ByRef anApplication As quantApplication, _
ByVal aCOMversion As COMversionEnum)
MyBase.New() 'Is this necessary? Yes!
'InitializeComponent() Is this necessary??
mRawFileType = aRawFileType
mCurrentRawFileNumber = RAWFILEREF_BASE
mRawFileNamesHash = New Dictionary(Of String, Integer)
'mRawFiles = New fileSpecStruct()
mArrIndex = 0
mGeneralSampleNumber = -1 'For error detection. Clients MUST set it.
'Changed PM_REFACTOR 2008-02-05. Disabled. Does not seem to be used...
''Changed PM_MULTIPLE_FINNEGAN_TROUBLE 2004-01-06
'mRawFileObjects = New Hashtable
mApplication = anApplication
'Changed PM_COMVERSIONS 2007-07-24
mCOMversion = aCOMversion
'Changed PM_ACCEPT_UPPER_CASE 2008-02-05
If True Then
mAcceptedRawFileExtensionsWithDot = _
New Dictionary(Of String, Integer)
mAcceptedRawFileExtensionsWithDot.Add(".wiff", 1)
mAcceptedRawFileExtensionsWithDot.Add(".WIFF", 1)
mAcceptedRawFileExtensionsWithDot.Add(".raw", 2)
mAcceptedRawFileExtensionsWithDot.Add(".RAW", 2)
mAcceptedRawFileExtensionsWithDot.Add(".idx", 3)
mAcceptedRawFileExtensionsWithDot.Add(".IDX", 3)
End If
End Sub 'New()
'Changed PM_IRINA_SAMPLE_NUMBER_TROUBLE 2003-12-02
'****************************************************************************
'* SUBROUTINE NAME: SetGeneralSampleNumber *
'd$ <summary> N/A </summary>
Public Sub SetGeneralSampleNumber(ByVal aGeneralSampleNumber As Integer)
mGeneralSampleNumber = aGeneralSampleNumber
End Sub 'SetGeneralSampleNumber
'Changed PM_EXTERNAL_QUANT_BROKEN 2003-12-02
'****************************************************************************
'* SUBROUTINE NAME: getDefaultRawFileID *
'd$ <summary> N/A </summary>
Public Shared Function getDefaultRawFileID() As Integer
Return RAWFILEREF_NOTMULTIPLE
End Function 'getDefaultRawFileID
'****************************************************************************
'* SUBROUTINE NAME: SetBaseDirFromFullFileNamePath *
'd$ <summary> N/A </summary>
Public Sub SetBaseDirFromFullFileNamePath( _
ByVal aFullFileNamePath As String)
mOneFullRawFileNamePath = aFullFileNamePath
mDirPath = _
Path.GetDirectoryName(aFullFileNamePath) & Path.DirectorySeparatorChar
'Dim separatorCharacter As String = Path.DirectorySeparatorChar
End Sub 'SetBaseDirFromFullFileNamePath
'****************************************************************************
'* SUBROUTINE NAME: addPossibleFileName *
'd$ <summary> aValueString is usually a file name or the integer 0.
'd$ Returns raw file ID.
'd$ </summary>
Public Function addPossibleFileName( _
ByVal aValueString As String, _
ByRef anOutNoProblem As Boolean, ByRef aNumParseErrors As Integer _
) As Integer
Dim toReturn As Integer = -1
Dim periodValueString As String = ""
Dim rawFileNumberToUse As Integer
'Out here to be used in the last part of this function.
Dim fExtension2 As String = Nothing 'Keep compiler happy.
If IsNumeric(aValueString) Then
'Changed PM_SEVERALWIFFS_BACKCOMPATIBILITY 2003-08-28
rawFileNumberToUse = RAWFILEREF_NOTMULTIPLE
periodValueString = "NO_MULTIPLE"
Else
Dim acceptedFileName As Boolean = True
Dim fileNoExtension As String = Nothing 'Keep compiler happy.
Try 'Exception can happen if it is not a valid Windows file
' name (say from mismatch between the parse rule
' and the file content),
' e.g. "20080117JEA_SILAC2_1_06.RAW<BR>Score" ( "<"
' and ">" are invalid).
'Changed PM_ACCEPT_UPPER_CASE 2008-02-05. Accept
' upper-case as well.
'Dim result2 As Match = _
' Regex.Match(aValueString, "(.*)\.(wiff|raw|idx)")
fileNoExtension = _
Path.GetFileNameWithoutExtension(aValueString)
fExtension2 = Path.GetExtension(aValueString)
'Why is the dot returned??? Sample: ".raw".
Dim extensionAccepted As Boolean = False
Dim someVal As Integer
If mAcceptedRawFileExtensionsWithDot.TryGetValue( _
fExtension2, someVal) Then
extensionAccepted = True
End If
If Not extensionAccepted Then
acceptedFileName = False
End If
Catch exceptionObject As Exception
acceptedFileName = False
'For now avoid getting the message "Bad raw
' file ID..." first.
Trace.Assert(False, _
"PIL ASSERT. " & _
AppConstants.LONG_APP & " can not continue. " & _
"The extracted raw file name, " & _
aValueString & _
", is not proper. " & _
"It may help to change the MGF generator (in menu Tools/Options) to " & _
"something that match the information in the " & _
"Mascot result file.")
End Try
'If result2.Success Then
If acceptedFileName Then
'Changed PM_ACCEPT_UPPER_CASE 2008-02-05.
'Dim file1 As String = result2.Groups(1).Value
'Dim extension As String = result2.Groups(2).Value
'periodValueString = file1 & "." & extension
Dim file1 As String = fileNoExtension
'Do we need to coerce to lower case? - do other
'parts of the program expect it??.
'Dim extension2 As String = fExtension
periodValueString = file1 & fExtension2 'The system
' function above, GetExtension(), includes the dot...
rawFileNumberToUse = mCurrentRawFileNumber
Else
anOutNoProblem = False
aNumParseErrors += 1
End If
End If
mRecentlyAddedRawFile = False
'Would be so much more compact in Perl!
Dim rawFileRefNum As Integer = -1
If mRawFileNamesHash.TryGetValue(periodValueString, rawFileRefNum) Then 'What
' is the difference between Contains and ContainsKey?.
'Already in hash. Result is in rawFileRefNum
Dim peter2 As Integer = 2
Else
'Note: we are only here very few times (equal to number
' of raw files). Thus memory efficiency is
' of ***no*** concern here.
'Changed PM_NOMULTIPLE_ASSERT 2004-01-28. Don't check for
'non-combined raw files...
'Changed PM_RAWFILE_CASESENSITIVITY 2004-01-26
'Note: the association between file name extension and raw file
' type should be moved to .
If rawFileNumberToUse <> RAWFILEREF_NOTMULTIPLE Then
Dim defaultExtension As String = Nothing 'Keep compiler happy.
Select Case mRawFileType
Case rawFileModeEnum.enumAnalyst
defaultExtension = ".wiff"
Case rawFileModeEnum.enumFinnegan
defaultExtension = ".raw"
Case rawFileModeEnum.enumMicromass
defaultExtension = ".idx"
Case Else
Trace.Assert(False, _
"PIL ASSERT. Select Case never fall-through")
End Select
'Changed PM_REFACTOR 2008-02-05
Dim extLower As String = fExtension2.ToLower()
If extLower.Length > 0 Then
Trace.Assert(defaultExtension = extLower, _
"PIL ASSERT. " & _
AppConstants.LONG_APP & _
" can not continue. The extension for " & _
periodValueString & _
" does not match the expected extension (" & _
defaultExtension & _
") for the current raw data file mode. " & _
"It may help to change the Raw file type " & _
"in menu Tools/Options.")
Else
Trace.Assert(aValueString.Length > 0, _
"PIL ASSERT. " & _
AppConstants.LONG_APP & " can not continue. " & _
"The ""period"" field or the ""rawFile"" field " & _
"(or similar) does not exist at all. " & _
"Please make sure the input file to the Mascot search is in " & _
"the proper format. The expected format can e.g. be seen on " & _
"the " & AppConstants.LONG_APP & _
" home page, <" & AppConstants.APP_HOMEPAGE & ">, " & _
"section ""Preparing input files for " & _
"the Mascot search engine"". " _
)
End If
Else
'Changed PM_NOMULTIPLE_ASSERT 2004-01-28
Dim peter6 As Integer = 6
End If
mRawFileNamesHash.Add(periodValueString, rawFileNumberToUse)
rawFileRefNum = rawFileNumberToUse
mCurrentRawFileNumber += 1
'Update array and find full path, etc.
ReDim Preserve mRawFiles(mArrIndex)
'Dim key As String = myEnumerator.Key
Dim fileName As String = periodValueString
Dim fileID As Integer = rawFileNumberToUse
'Changed PM_SEVERALWIFFS_BACKCOMPATIBILITY 2003-08-28
Dim fullPath As String
If fileName = "NO_MULTIPLE" Then
fullPath = mOneFullRawFileNamePath
Else
fullPath = mDirPath & fileName
End If
Dim newItem2 As fileSpecStructure
newItem2.path = fullPath
newItem2.tag2 = fileID
mRawFiles(mArrIndex) = newItem2
mRecentlyAddedRawFile = True
mArrIndex += 1
End If
toReturn = rawFileRefNum
Return toReturn
End Function 'addPossibleFileName
'****************************************************************************
'* SUBROUTINE NAME: getRawFiles *
'd$ <summary> N/A </summary>
Public Function getRawFiles() _
As fileSpecStructure()
If Not mRawFiles Is Nothing Then
Dim len As Integer = mRawFiles.Length
Trace.Assert(len > 0, "PIL ASSERT. Empty raw file list!")
End If
Return mRawFiles
End Function 'getRawFiles
'Changed PM_REFACTOR 2004-11-24
'****************************************************************************
'* SUBROUTINE NAME: possibleNewFileHandling *
'd$ <summary> Helper function to create a new instance of a raw file
'd$ object if it does not exist already.
'd$ </summary>
Private Sub possibleNewFileHandling(ByVal aRawFileRefNum As Integer)
'If we have got more raw files since last call then we
'recreate the raw file object.
'Changed PM_MULTIPLE_FINNEGAN_TROUBLE 2004-01-06
'mRecentlyAddedRawFile = True
Dim len As Integer = mRawFiles.Length
Trace.Assert(len > 0, "PIL ASSERT. Empty raw file list!")
If mRecentlyAddedRawFile Or _
mForConversionRawDataFileHandling Is Nothing Then
Trace.Assert(mGeneralSampleNumber > 0, _
"PIL ASSERT. Internal error. mGeneralSampleNumber is not initialised.")
'Changed PM_MULTIPLE_FINNEGAN_TROUBLE 2004-01-06
'If Not mForConversionRawDataFileHandling Is Nothing Then
' mForConversionRawDataFileHandling.doCleanUp()
'End If
Dim sharedSpectrumClassifier As spectrumClassifier = _
mApplication.getClassifierObject()
quantApplication.createAndInitRawFileObject( _
mRawFileType, mRawFiles, mForConversionRawDataFileHandling, _
mGeneralSampleNumber, _
sharedSpectrumClassifier, _
mCOMversion)
mRecentlyAddedRawFile = False
End If
mForConversionRawDataFileHandling.newCurrentFile(aRawFileRefNum)
End Sub 'possibleNewFileHandling
'****************************************************************************
'* SUBROUTINE NAME: retentionTime2 *
'd$ <summary> Returns retentiontime in seconds </summary>
Public Function retentionTime2( _
ByVal aSpectrumNumber As Integer, _
ByVal aRawFileRefNum As Integer, _
ByVal aCOMversion As COMversionEnum) _
As Double
Dim toReturn As Double = -10.0
'Changed PM_REFACTOR 2004-11-24
possibleNewFileHandling(aRawFileRefNum)
toReturn = _
mForConversionRawDataFileHandling.SpectrumNumber2RetentionTime( _
aSpectrumNumber, 0) '0: ????????????
Return toReturn
End Function 'retentionTime
'****************************************************************************
'* SUBROUTINE NAME: MaybeToBeChanged_MSMSretentionTime *
'd$ <summary> Returns retentiontime in seconds.
'd$ Temperarily function that converts the scannumber
'd$ of fragment spectra into retention time
'd$
'd$ Micromass: The scannumber in the MSMS channels is not
'd$ corresponding to the scan numbers in the MS channel.
'd$ The number (passed as aMSMSspecification) in the
'd$ FinneganScanNumber field in the Mascot result file is
'd$ a number from the PKL file. That number is the scan
'd$ number for the corresponding MSMS channel number.
'd$ The MSMS channel number (passed as aExtraNumber) is
'd$ encoded in the experiment field inthe Mascot result
'd$ file. These conversions are defined by how the Perl
'd$ script PKL2Mascot.pl is written.
'd$
'd$ Finnigan: The Finnigan spectrum numbering scheme is flat. A
'd$ spectrum with a particular number can be either MS
'd$ or MSMS. The type is encoded in the spectrumtitle.
'd$ aMSMSspecification is taken as Finnigan spectrum
'd$ number. aExtraNumber is ignored.
'</summary>
Public Function MaybeToBeChanged_MSMSretentionTime( _
ByVal aMSMSspecification As Integer, _
ByVal aExtraNumber As Integer, _
ByVal aRawFileRefNum As Integer) _
As Double
Dim toReturn As Double = -10.0
'Changed PM_REFACTOR 2004-11-24
possibleNewFileHandling(aRawFileRefNum)
toReturn = _
mForConversionRawDataFileHandling.MaybeToBeChanged_MSMSretentionTime( _
aMSMSspecification, aExtraNumber)
Return toReturn
End Function 'MaybeToBeChanged_MSMSretentionTime
'Changed PM_FRAGMENTS_FOR_INSERTED 2004-11-24
'****************************************************************************
'* SUBROUTINE NAME: retentionTimeForNearestMSMSspectrum *
'd$ <summary> Given an approximate retention for an MS-MS spectrum,
'd$ find nearest...
'd$ If no MS-MS spectrum with the right precursor mass could
'd$ be found then ............
'd$
'd$ This function is a convenience for the client. The alternative
'd$ would be to expose more of the raw file objects methods.
'd$ </summary>
Public Function retentionTimeForNearestMSMSspectrum( _
ByVal aRawFileRefNum As Integer, _
ByVal anInApproximateRetentionTimeSecs As Double, _
ByVal anInLeftRetT_ToleranceSecs As Double, _
ByVal anInRightRetT_ToleranceSecs As Double, _
ByVal anInPrecursorMCR As Double, _
ByRef anOutFoundMSMS As Boolean, _
ByRef anOutRetentionTimeWithinFile As Boolean) _
As Double
'Note: at this time we do NOT see if the precursor mass is matching...
Dim toReturn As Double = -10.0
anOutFoundMSMS = True 'Default
anOutRetentionTimeWithinFile = True
'Changed PM_REFACTOR 2004-11-24
possibleNewFileHandling(aRawFileRefNum)
Dim sRange As spectrumRangeStructure = _
mForConversionRawDataFileHandling.getSpectrumRange()
If anInApproximateRetentionTimeSecs > sRange.firstRetentionTimeSeconds AndAlso _
anInApproximateRetentionTimeSecs < sRange.lastRetentionTimeSeconds Then
'toReturn = mForConversionRawDataFileHandling.SpectrumNumber2RetentionTime( _
' aSpectrumNumber)
Dim dummy3 As spectrumSpecStructure
Dim dummy4 As SpectrumClassificationStructure
dummy4.dataSourceName = Nothing 'Keep compiler happy.
'Note: new ***MS*** spectrum.
Dim spectrumNum As Integer = _
mForConversionRawDataFileHandling.newSpectrumByRetentionTime( _
anInApproximateRetentionTimeSecs, dummy3, dummy4)
Dim precursorMass As Double
Dim classification As SpectrumClassificationStructure
classification.dataSourceName = Nothing 'Keep compiler happy.
Dim experimentNumber As Integer
Dim MSMSspectrumNumber As Integer = _
mForConversionRawDataFileHandling.nextFragmentspectrum2( _
spectrumNum, precursorMass, classification, experimentNumber)
'Changed PM_CORR_TROUBLE 2008-03-06.
If MSMSspectrumNumber > 0 Then
'Before the check above this was the error message:
' "PIL ASSERT. Spectrum number, 18981, is outside the
' range for the raw file, [1; 18980]."
Dim nearRetSecs As Double = _
mForConversionRawDataFileHandling.SpectrumNumber2RetentionTime( _
MSMSspectrumNumber, experimentNumber)
'For informational/debugging purposes only.
Dim secsDiff As Double = _
nearRetSecs - anInApproximateRetentionTimeSecs
toReturn = nearRetSecs
Else
Dim peter2 As Integer = 2 'No fragment spectra in the rest of
' the file. E.g. in "20071123jd_auto4_06.RAW",
' spectra 18952 through 18980 are all MS spectra (29 MS spectra
' in a row).
anOutFoundMSMS = False
anOutRetentionTimeWithinFile = False
End If
Else
anOutFoundMSMS = False
anOutRetentionTimeWithinFile = False
End If
If Not anOutFoundMSMS Then
toReturn = anInApproximateRetentionTimeSecs
End If
'Note: should return input retention time if MS-MS not found....
Return toReturn
End Function 'retentionTimeForNearestMSMSspectrum
End Class 'MultipleRawFilesHandling
End Namespace 'massSpectrometryBase
Generated by script codePublish.pl at 2009-01-05T15:20:59.