Source code for MSQuant: MultipleRawFilesHandling.vb, MSQuant/msquant/src/main/massbase/MultipleRawFilesHandling.vb.

Table of contents page.

Home page for MSQuant.

'****************************************************************************
'* Copyright (C) 2004 Peter Mortensen and Matthias Mann                     *
'* This file is part of MSQuant.                                            *
'*                                                                          *
'* MSQuant is distributed under the terms of                                *
'* the GNU General Public License. See src/COPYING.TXT or                   *
'* <http://www.gnu.org/licenses/gpl.txt> for details.                       *
'*                                                                          *
'* MSQuant is free software; you can redistribute it                        *
'* and/or modify it under the terms of the GNU                              *
'* General Public License as published by the Free                          *
'* Software Foundation; either version 2 of the                             *
'* License, or (at your option) any later version.                          *
'*                                                                          *
'* MSQuant is distributed in the hope that it will be                       *
'* useful, but WITHOUT ANY WARRANTY; without even the                       *
'* implied warranty of MERCHANTABILITY or FITNESS FOR                       *
'* A PARTICULAR PURPOSE.  See the GNU General Public                        *
'* License for more details.                                                *
'*                                                                          *
'* You should have received a copy of the GNU General                       *
'* Public License along with MSQuant; if not, write to                      *
'* the Free Software Foundation, Inc., 59 Temple                            *
'* Place, Suite 330, Boston, MA  02111-1307  USA                            *
'*                                                                          *
'* Purpose: Holds Class MultipleRawFilesHandling, see below                 *
'*          for documentation.                                              *
'*                                                                          *
'****************************************************************************

'****************************************************************************
'*                               CEBI                                       *
'*                    Software Development Group                            *
'*                         Peter Mortensen                                  *
'*                E-mail: NUKESPAMMERSdrmortensen@get2netZZZZZZ.dk          *
'*                 WWW: http://www.cebi.sdu.dk/                             *
'*                                                                          *
'*  Program for post-processing of result from search in mass               *
'*    spectrometric data.                                                   *
'*                                                                          *
'*    FILENAME:   MultipleRawFilesHandling.vb                               *
'*    TYPE:       VISUAL_BASIC                                              *
'*                                                                          *
'* CREATED: PM 2003-11-21   Vrs 1.0.                                        *
'* UPDATED: PM 2003-xx-xx                                                   *
'*                                                                          *
'****************************************************************************

Option Strict On
Option Explicit On

Imports System.IO 'For Path class/functions
Imports System.Text.RegularExpressions

Imports System.Collections.Generic 'For Dictionary


Imports App 'For class quantApplication


'Imports System.xyz

'****************************************************************************
'd$ <summary>
'd$   Purpose: Namespace for lower layers of mass spectrometric
'd$            applications: raw data file handling, descriptive statistics,
'd$            fragment masses, digestion, file associations, etc.
'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$   <isUnitTest></isUnitTest>
'd$   <applicationname>test_rawDataFileHandling</applicationname>
'd$   <author>Peter Mortensen</author>
'd$   <seealso>http://www.cebi.sdu.dk/</seealso>
'd$   <codetype>PLATFORM independent / GUI</codetype>
'd$ </summary>
Namespace massSpectrometryBase

    '****************************************************************************
    'd$ <summary>
    'd$   Purpose: Encapsulates the details of handling several raw files at a time.
    'd$ 
    'd$ 
    'd$ 
    'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
    'd$   <applicationname>test_rawDataFileHandling</applicationname>
    'd$   <author>Peter Mortensen</author>
    'd$   <seealso>http://www.cebi.sdu.dk/</seealso>
    'd$   <codetype>PLATFORM independent / GUI</codetype>
    'd$ </summary>
    Public NotInheritable Class MultipleRawFilesHandling

        Const RAWFILEREF_BASE As Integer = 809
        Public Const RAWFILEREF_NOTMULTIPLE As Integer = 1789

        'Changed PM_PROTEINRATIO_FOR_EACH_RAWFILE 2006-02-27
        'They have to be distinct, for the code in CalculateProteinRatio()
        'to work.
        Public Const REF_WHOLEPROTEIN As Integer = RAWFILEREF_NOTMULTIPLE + 1

        Private mRawFiles() As fileSpecStructure
        Private mDirPath As String
        Private mCurrentRawFileNumber As Integer

        'Changed PM_REFACTOR 2008-02-05
        'Private mRawFileNames As Hashtable
        Private mRawFileNamesHash As Dictionary(Of String, Integer) 'Note: helper
        '  object for filling in mRawFiles.

        Private mArrIndex As Integer

        Private mOneFullRawFileNamePath As String

        Private mRawFileType As rawFileModeEnum

        'Changed PM_MULTIPLE_FINNEGAN_TROUBLE 2004-01-06
        Private mRecentlyAddedRawFile As Boolean 'Note: this
        '  is used to re-create our raw file object after the list
        '  of raw files is added to.
        '  This will happen in the beginning of a parsing when the
        '  full list of raw files is not yet known.


        'Changed PM_REFACTOR 2008-02-05. Disabled. Does not seem to be used...
        'Private mRawFileObjects2 As Hashtable


        Private mForConversionRawDataFileHandling As rawDataFileHandling

        'Changed PM_IRINA_SAMPLE_NUMBER_TROUBLE 2003-12-02
        Private mGeneralSampleNumber As Integer 'This implies that the
        '  sample number is the same in all files which may indeed
        '  not be what we want.

        'Changed PM_GENERALISED_SPECTRUMCLASSIFICATION 2005-03-14
        'Was only introduced for getClassifierObject(). Is it a problem
        'with this dependency on the application object?
        Private mApplication As quantApplication

        Private mCOMversion As COMversionEnum


        'Changed PM_ACCEPT_UPPER_CASE 2008-02-05
        Private mAcceptedRawFileExtensionsWithDot As _
          Dictionary(Of String, Integer)




        '****************************************************************************
        '*  SUBROUTINE NAME:   New                                                  *
        'd$ <summary>Constructor</summary>
        Public Sub New( _
          ByVal aRawFileType As rawFileModeEnum, _
          ByRef anApplication As quantApplication, _
          ByVal aCOMversion As COMversionEnum)

            MyBase.New() 'Is this necessary? Yes!

            'InitializeComponent()    Is this necessary??

            mRawFileType = aRawFileType

            mCurrentRawFileNumber = RAWFILEREF_BASE
            mRawFileNamesHash = New Dictionary(Of String, Integer)
            'mRawFiles = New fileSpecStruct()

            mArrIndex = 0
            mGeneralSampleNumber = -1 'For error detection. Clients MUST set it.

            'Changed PM_REFACTOR 2008-02-05. Disabled. Does not seem to be used...
            ''Changed PM_MULTIPLE_FINNEGAN_TROUBLE 2004-01-06
            'mRawFileObjects = New Hashtable

            mApplication = anApplication

            'Changed PM_COMVERSIONS 2007-07-24
            mCOMversion = aCOMversion

            'Changed PM_ACCEPT_UPPER_CASE 2008-02-05
            If True Then
                mAcceptedRawFileExtensionsWithDot = _
                  New Dictionary(Of String, Integer)
                mAcceptedRawFileExtensionsWithDot.Add(".wiff", 1)
                mAcceptedRawFileExtensionsWithDot.Add(".WIFF", 1)
                mAcceptedRawFileExtensionsWithDot.Add(".raw", 2)
                mAcceptedRawFileExtensionsWithDot.Add(".RAW", 2)
                mAcceptedRawFileExtensionsWithDot.Add(".idx", 3)
                mAcceptedRawFileExtensionsWithDot.Add(".IDX", 3)
            End If

        End Sub 'New()


        'Changed PM_IRINA_SAMPLE_NUMBER_TROUBLE 2003-12-02
        '****************************************************************************
        '*  SUBROUTINE NAME:   SetGeneralSampleNumber                               *
        'd$ <summary> N/A </summary>
        Public Sub SetGeneralSampleNumber(ByVal aGeneralSampleNumber As Integer)
            mGeneralSampleNumber = aGeneralSampleNumber
        End Sub 'SetGeneralSampleNumber


        'Changed PM_EXTERNAL_QUANT_BROKEN 2003-12-02
        '****************************************************************************
        '*  SUBROUTINE NAME:   getDefaultRawFileID                                  *
        'd$ <summary> N/A </summary>
        Public Shared Function getDefaultRawFileID() As Integer
            Return RAWFILEREF_NOTMULTIPLE
        End Function 'getDefaultRawFileID


        '****************************************************************************
        '*  SUBROUTINE NAME:   SetBaseDirFromFullFileNamePath                       *
        'd$ <summary> N/A </summary>
        Public Sub SetBaseDirFromFullFileNamePath( _
          ByVal aFullFileNamePath As String)

            mOneFullRawFileNamePath = aFullFileNamePath
            mDirPath = _
              Path.GetDirectoryName(aFullFileNamePath) & Path.DirectorySeparatorChar
            'Dim separatorCharacter As String = Path.DirectorySeparatorChar
        End Sub 'SetBaseDirFromFullFileNamePath


        '****************************************************************************
        '*  SUBROUTINE NAME:   addPossibleFileName                                  *
        'd$ <summary> aValueString is usually a file name or the integer 0.
        'd$   Returns raw file ID.
        'd$ </summary>
        Public Function addPossibleFileName( _
          ByVal aValueString As String, _
          ByRef anOutNoProblem As Boolean, ByRef aNumParseErrors As Integer _
          ) As Integer

            Dim toReturn As Integer = -1

            Dim periodValueString As String = ""
            Dim rawFileNumberToUse As Integer

            'Out here to be used in the last part of this function.
            Dim fExtension2 As String = Nothing 'Keep compiler happy.

            If IsNumeric(aValueString) Then

                'Changed PM_SEVERALWIFFS_BACKCOMPATIBILITY 2003-08-28
                rawFileNumberToUse = RAWFILEREF_NOTMULTIPLE
                periodValueString = "NO_MULTIPLE"
            Else
                Dim acceptedFileName As Boolean = True
                Dim fileNoExtension As String = Nothing 'Keep compiler happy.

                Try 'Exception can happen if it is not a valid Windows file
                    '  name (say from mismatch between the parse rule
                    '  and the file content),
                    '  e.g. "20080117JEA_SILAC2_1_06.RAW<BR>Score" ( "<"
                    '  and ">" are invalid).

                    'Changed PM_ACCEPT_UPPER_CASE 2008-02-05. Accept
                    '  upper-case as well.
                    'Dim result2 As Match = _
                    '  Regex.Match(aValueString, "(.*)\.(wiff|raw|idx)")
                    fileNoExtension = _
                      Path.GetFileNameWithoutExtension(aValueString)
                    fExtension2 = Path.GetExtension(aValueString)
                    'Why is the dot returned??? Sample: ".raw".

                    Dim extensionAccepted As Boolean = False
                    Dim someVal As Integer
                    If mAcceptedRawFileExtensionsWithDot.TryGetValue( _
                      fExtension2, someVal) Then
                        extensionAccepted = True
                    End If

                    If Not extensionAccepted Then
                        acceptedFileName = False
                    End If

                Catch exceptionObject As Exception
                    acceptedFileName = False

                    'For now avoid getting the message "Bad raw
                    '  file ID..." first.
                    Trace.Assert(False, _
                      "PIL ASSERT. " & _
                      AppConstants.LONG_APP & " can not continue. " & _
                      "The extracted raw file name, " & _
                      aValueString & _
                      ", is not proper. " & _
                      "It may help to change the MGF generator (in menu Tools/Options) to " & _
                      "something that match the information in the  " & _
                      "Mascot result file.")
                End Try

                'If result2.Success Then
                If acceptedFileName Then
                    'Changed PM_ACCEPT_UPPER_CASE 2008-02-05.
                    'Dim file1 As String = result2.Groups(1).Value
                    'Dim extension As String = result2.Groups(2).Value
                    'periodValueString = file1 & "." & extension
                    Dim file1 As String = fileNoExtension

                    'Do we need to coerce to lower case? - do other
                    'parts of the program expect it??.
                    'Dim extension2 As String = fExtension

                    periodValueString = file1 & fExtension2 'The system
                    '  function above, GetExtension(), includes the dot...

                    rawFileNumberToUse = mCurrentRawFileNumber
                Else
                    anOutNoProblem = False
                    aNumParseErrors += 1
                End If
            End If

            mRecentlyAddedRawFile = False

            'Would be so much more compact in Perl!
            Dim rawFileRefNum As Integer = -1
            If mRawFileNamesHash.TryGetValue(periodValueString, rawFileRefNum) Then 'What
                '  is the difference between Contains and ContainsKey?.

                'Already in hash. Result is in rawFileRefNum
                Dim peter2 As Integer = 2
            Else
                'Note: we are only here very few times (equal to number
                '      of raw files). Thus memory efficiency is
                '  of ***no*** concern here.

                'Changed PM_NOMULTIPLE_ASSERT 2004-01-28. Don't check for
                'non-combined raw files...
                'Changed PM_RAWFILE_CASESENSITIVITY 2004-01-26
                'Note: the association between file name extension and raw file
                '      type should be moved to .
                If rawFileNumberToUse <> RAWFILEREF_NOTMULTIPLE Then

                    Dim defaultExtension As String = Nothing 'Keep compiler happy.
                    Select Case mRawFileType
                        Case rawFileModeEnum.enumAnalyst
                            defaultExtension = ".wiff"
                        Case rawFileModeEnum.enumFinnegan
                            defaultExtension = ".raw"
                        Case rawFileModeEnum.enumMicromass
                            defaultExtension = ".idx"
                        Case Else
                            Trace.Assert(False, _
                              "PIL ASSERT. Select Case never fall-through")
                    End Select

                    'Changed PM_REFACTOR 2008-02-05
                    Dim extLower As String = fExtension2.ToLower()

                    If extLower.Length > 0 Then
                        Trace.Assert(defaultExtension = extLower, _
                          "PIL ASSERT. " & _
                          AppConstants.LONG_APP & _
                          " can not continue. The extension for " & _
                          periodValueString & _
                          " does not match the expected extension (" & _
                          defaultExtension & _
                          ") for the current raw data file mode. " & _
                          "It may help to change the Raw file type " & _
                          "in menu Tools/Options.")
                    Else
                        Trace.Assert(aValueString.Length > 0, _
                          "PIL ASSERT. " & _
                          AppConstants.LONG_APP & " can not continue. " & _
                          "The ""period"" field or the ""rawFile"" field " & _
                          "(or similar) does not exist at all. " & _
                          "Please make sure the input file to the Mascot search is in " & _
                          "the proper format. The expected format can e.g. be seen on " & _
                          "the " & AppConstants.LONG_APP & _
                          " home page, <" & AppConstants.APP_HOMEPAGE & ">, " & _
                          "section ""Preparing input files for " & _
                          "the Mascot search engine"". " _
                          )
                    End If
                Else
                    'Changed PM_NOMULTIPLE_ASSERT 2004-01-28
                    Dim peter6 As Integer = 6
                End If

                mRawFileNamesHash.Add(periodValueString, rawFileNumberToUse)
                rawFileRefNum = rawFileNumberToUse
                mCurrentRawFileNumber += 1

                'Update array and find full path, etc.

                ReDim Preserve mRawFiles(mArrIndex)

                'Dim key As String = myEnumerator.Key

                Dim fileName As String = periodValueString
                Dim fileID As Integer = rawFileNumberToUse

                'Changed PM_SEVERALWIFFS_BACKCOMPATIBILITY 2003-08-28
                Dim fullPath As String
                If fileName = "NO_MULTIPLE" Then
                    fullPath = mOneFullRawFileNamePath
                Else
                    fullPath = mDirPath & fileName
                End If

                Dim newItem2 As fileSpecStructure
                newItem2.path = fullPath
                newItem2.tag2 = fileID
                mRawFiles(mArrIndex) = newItem2

                mRecentlyAddedRawFile = True
                mArrIndex += 1
            End If

            toReturn = rawFileRefNum

            Return toReturn
        End Function 'addPossibleFileName


        '****************************************************************************
        '*  SUBROUTINE NAME:   getRawFiles                                          *
        'd$ <summary> N/A </summary>
        Public Function getRawFiles() _
        As fileSpecStructure()

            If Not mRawFiles Is Nothing Then
                Dim len As Integer = mRawFiles.Length
                Trace.Assert(len > 0, "PIL ASSERT. Empty raw file list!")
            End If
            Return mRawFiles
        End Function 'getRawFiles


        'Changed PM_REFACTOR 2004-11-24
        '****************************************************************************
        '*  SUBROUTINE NAME:   possibleNewFileHandling                              *
        'd$ <summary> Helper function to create a new instance of a raw file
        'd$           object if it does not exist already.
        'd$ </summary>
        Private Sub possibleNewFileHandling(ByVal aRawFileRefNum As Integer)

            'If we have got more raw files since last call then we
            'recreate the raw file object.

            'Changed PM_MULTIPLE_FINNEGAN_TROUBLE 2004-01-06
            'mRecentlyAddedRawFile = True

            Dim len As Integer = mRawFiles.Length
            Trace.Assert(len > 0, "PIL ASSERT. Empty raw file list!")
            If mRecentlyAddedRawFile Or _
               mForConversionRawDataFileHandling Is Nothing Then

                Trace.Assert(mGeneralSampleNumber > 0, _
                  "PIL ASSERT. Internal error. mGeneralSampleNumber is not initialised.")

                'Changed PM_MULTIPLE_FINNEGAN_TROUBLE 2004-01-06
                'If Not mForConversionRawDataFileHandling Is Nothing Then
                '    mForConversionRawDataFileHandling.doCleanUp()
                'End If

                Dim sharedSpectrumClassifier As spectrumClassifier = _
                  mApplication.getClassifierObject()

                quantApplication.createAndInitRawFileObject( _
                   mRawFileType, mRawFiles, mForConversionRawDataFileHandling, _
                   mGeneralSampleNumber, _
                   sharedSpectrumClassifier, _
                   mCOMversion)

                mRecentlyAddedRawFile = False
            End If

            mForConversionRawDataFileHandling.newCurrentFile(aRawFileRefNum)
        End Sub 'possibleNewFileHandling


        '****************************************************************************
        '*  SUBROUTINE NAME:   retentionTime2                                       *
        'd$ <summary> Returns retentiontime in seconds </summary>
        Public Function retentionTime2( _
          ByVal aSpectrumNumber As Integer, _
          ByVal aRawFileRefNum As Integer, _
          ByVal aCOMversion As COMversionEnum) _
          As Double

            Dim toReturn As Double = -10.0

            'Changed PM_REFACTOR 2004-11-24
            possibleNewFileHandling(aRawFileRefNum)

            toReturn = _
              mForConversionRawDataFileHandling.SpectrumNumber2RetentionTime( _
                aSpectrumNumber, 0) '0: ????????????

            Return toReturn
        End Function 'retentionTime


        '****************************************************************************
        '*  SUBROUTINE NAME:   MaybeToBeChanged_MSMSretentionTime                   *
        'd$ <summary> Returns retentiontime in seconds. 
        'd$           Temperarily function that converts the scannumber 
        'd$           of fragment spectra into retention time
        'd$           
        'd$           Micromass: The scannumber in the MSMS channels is not
        'd$                      corresponding to the scan numbers in the MS channel.
        'd$                      The number (passed as aMSMSspecification) in the
        'd$                      FinneganScanNumber field in the Mascot result file is
        'd$                      a number from the PKL file. That number is the scan 
        'd$                      number for the corresponding MSMS channel number.
        'd$                      The MSMS channel number (passed as aExtraNumber) is 
        'd$                      encoded in the experiment field inthe Mascot result 
        'd$                      file. These conversions are defined by how the Perl
        'd$                      script PKL2Mascot.pl is written.
        'd$           
        'd$           Finnigan:  The Finnigan spectrum numbering scheme is flat. A
        'd$                      spectrum with a particular number can be either MS 
        'd$                      or MSMS. The type is encoded in the spectrumtitle.
        'd$                      aMSMSspecification is taken as Finnigan spectrum
        'd$                      number. aExtraNumber is ignored.
        '</summary>
        Public Function MaybeToBeChanged_MSMSretentionTime( _
          ByVal aMSMSspecification As Integer, _
          ByVal aExtraNumber As Integer, _
          ByVal aRawFileRefNum As Integer) _
          As Double

            Dim toReturn As Double = -10.0

            'Changed PM_REFACTOR 2004-11-24
            possibleNewFileHandling(aRawFileRefNum)

            toReturn = _
              mForConversionRawDataFileHandling.MaybeToBeChanged_MSMSretentionTime( _
                aMSMSspecification, aExtraNumber)
            Return toReturn
        End Function 'MaybeToBeChanged_MSMSretentionTime


        'Changed PM_FRAGMENTS_FOR_INSERTED 2004-11-24
        '****************************************************************************
        '*  SUBROUTINE NAME:   retentionTimeForNearestMSMSspectrum                  *
        'd$ <summary> Given an approximate retention for an MS-MS spectrum,
        'd$           find nearest...
        'd$           If no MS-MS spectrum with the right precursor mass could
        'd$           be found then ............
        'd$ 
        'd$           This function is a convenience for the client. The alternative
        'd$           would be to expose more of the raw file objects methods.
        'd$ </summary>
        Public Function retentionTimeForNearestMSMSspectrum( _
          ByVal aRawFileRefNum As Integer, _
          ByVal anInApproximateRetentionTimeSecs As Double, _
          ByVal anInLeftRetT_ToleranceSecs As Double, _
          ByVal anInRightRetT_ToleranceSecs As Double, _
          ByVal anInPrecursorMCR As Double, _
          ByRef anOutFoundMSMS As Boolean, _
          ByRef anOutRetentionTimeWithinFile As Boolean) _
          As Double

            'Note: at this time we do NOT see if the precursor mass is matching...

            Dim toReturn As Double = -10.0

            anOutFoundMSMS = True 'Default
            anOutRetentionTimeWithinFile = True

            'Changed PM_REFACTOR 2004-11-24
            possibleNewFileHandling(aRawFileRefNum)

            Dim sRange As spectrumRangeStructure = _
              mForConversionRawDataFileHandling.getSpectrumRange()

            If anInApproximateRetentionTimeSecs > sRange.firstRetentionTimeSeconds AndAlso _
               anInApproximateRetentionTimeSecs < sRange.lastRetentionTimeSeconds Then

                'toReturn = mForConversionRawDataFileHandling.SpectrumNumber2RetentionTime( _
                '  aSpectrumNumber)
                Dim dummy3 As spectrumSpecStructure
                Dim dummy4 As SpectrumClassificationStructure
                dummy4.dataSourceName = Nothing 'Keep compiler happy.

                'Note: new ***MS*** spectrum.
                Dim spectrumNum As Integer = _
                  mForConversionRawDataFileHandling.newSpectrumByRetentionTime( _
                    anInApproximateRetentionTimeSecs, dummy3, dummy4)

                Dim precursorMass As Double
                Dim classification As SpectrumClassificationStructure
                classification.dataSourceName = Nothing 'Keep compiler happy.

                Dim experimentNumber As Integer
                Dim MSMSspectrumNumber As Integer = _
                  mForConversionRawDataFileHandling.nextFragmentspectrum2( _
                    spectrumNum, precursorMass, classification, experimentNumber)

                'Changed PM_CORR_TROUBLE 2008-03-06. 
                If MSMSspectrumNumber > 0 Then

                    'Before the check above this was the error message:
                    '  "PIL ASSERT. Spectrum number, 18981, is outside the
                    '    range for the raw file, [1; 18980]."

                    Dim nearRetSecs As Double = _
                      mForConversionRawDataFileHandling.SpectrumNumber2RetentionTime( _
                        MSMSspectrumNumber, experimentNumber)

                    'For informational/debugging purposes only.
                    Dim secsDiff As Double = _
                      nearRetSecs - anInApproximateRetentionTimeSecs

                    toReturn = nearRetSecs
                Else
                    Dim peter2 As Integer = 2 'No fragment spectra in the rest of 
                    '  the file. E.g. in "20071123jd_auto4_06.RAW",
                    '  spectra 18952 through 18980 are all MS spectra (29 MS spectra
                    '  in a row).

                    anOutFoundMSMS = False
                    anOutRetentionTimeWithinFile = False
                End If

            Else
                anOutFoundMSMS = False
                anOutRetentionTimeWithinFile = False
            End If

            If Not anOutFoundMSMS Then
                toReturn = anInApproximateRetentionTimeSecs
            End If

            'Note: should return input retention time if MS-MS not found....
            Return toReturn
        End Function 'retentionTimeForNearestMSMSspectrum

    End Class 'MultipleRawFilesHandling

End Namespace 'massSpectrometryBase


    

    

Generated by script codePublish.pl at 2009-01-05T15:20:59.