Source code for MSQuant: retentionTimeCorrelator.vb, MSQuant/msquant/src/main/massbase/retentionTimeCorrelator.vb.

Table of contents page.

Home page for MSQuant.

'****************************************************************************
'* Copyright (C) 2004 Peter Mortensen and Matthias Mann                     *
'* This file is part of MSQuant.                                            *
'*                                                                          *
'* MSQuant is distributed under the terms of                                *
'* the GNU General Public License. See src/COPYING.TXT or                   *
'* <http://www.gnu.org/licenses/gpl.txt> for details.                       *
'*                                                                          *
'* MSQuant is free software; you can redistribute it                        *
'* and/or modify it under the terms of the GNU                              *
'* General Public License as published by the Free                          *
'* Software Foundation; either version 2 of the                             *
'* License, or (at your option) any later version.                          *
'*                                                                          *
'* MSQuant is distributed in the hope that it will be                       *
'* useful, but WITHOUT ANY WARRANTY; without even the                       *
'* implied warranty of MERCHANTABILITY or FITNESS FOR                       *
'* A PARTICULAR PURPOSE.  See the GNU General Public                        *
'* License for more details.                                                *
'*                                                                          *
'* You should have received a copy of the GNU General                       *
'* Public License along with MSQuant; if not, write to                      *
'* the Free Software Foundation, Inc., 59 Temple                            *
'* Place, Suite 330, Boston, MA  02111-1307  USA                            *
'*                                                                          *
'* Purpose: has detailed knowledge of how to correlate retention time       *
'*          between two or more raw files.                                  *
'*          Holds Class retentionTimeCorrelator, see below for              *
'*          documentation.                                                  *
'*                                                                          *
'****************************************************************************

'****************************************************************************
'*                               CEBI                                       *
'*                    Software Development Group                            *
'*                         Peter Mortensen                                  *
'*                E-mail: NUKESPAMMERSdrmortensen@get2netZZZZZZ.dk          *
'*                 WWW: http://www.cebi.sdu.dk/                             *
'*                                                                          *
'*  Program for post-processing of result from search in mass               *
'*    spectrometric data.                                                   *
'*                                                                          *
'*    FILENAME:   retentionTimeCorrelator.vb                                *
'*    TYPE:  VISUAL_BASIC                                                   *
'*                                                                          *
'* CREATED: PM 2003-09-11   Vrs 1.0.                                        *
'* UPDATED: PM 2003-xx-xx                                                   *
'*                                                                          *
'****************************************************************************

Option Strict On
Option Explicit On

Imports System.Text  'For StringBuilder
Imports System.Collections.Generic 'For Dictionary and List.

Imports MolecularSharedStructures


'Changed PM_SUPERCHARGE 2003-11-03
'Imports Mascot_Parser.SDUPutility
Imports SDUPutility


'Imports System.xyz

'****************************************************************************
'd$ <summary>
'd$   Purpose: Namespace for lower layers of mass spectrometric
'd$            applications: raw data file handling, descriptive statistics,
'd$            fragment masses, digestion, file associations, etc.
'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$   <isUnitTest></isUnitTest>
'd$   <applicationname>test_rawDataFileHandling</applicationname>
'd$   <author>Peter Mortensen</author>
'd$   <seealso>http://www.cebi.sdu.dk/</seealso>
'd$   <codetype>PLATFORM independent</codetype>
'd$ </summary>
Namespace massSpectrometryBase

    'Changed PM_RETCORR_ANYTIME 2006-10-26
    Public Enum retentionTimeTypeEnum
        enumMSMSevent = 331
        enumLCprofileCentroid
        enumLCprofileCentroidWithFallBackToMSMS
    End Enum 'retentionTimeTypeEnum


    Public Structure peptideCorrStruct
        Dim AAsequence4 As String
        Dim corr_rawFileId As Integer

        'Changed PM_REFACTOR 2006-03-15
        'Dim peptIndex As Integer 'Index/ID into some other datastructure
        ''  to find out more about a peptide.
        Dim peptideRef As Integer 'ID into some other datastructure
        '  to find out more about a peptide.

        Dim useForCorrelation As Boolean

        'Changed PM_CORR_RETENTIONTIME_DESTINCT_CHARGEANDMASS 2003-09-24
        Dim charge3 As Integer
        Dim mass3 As Double

        'Changed PM_UNIQUEMODS_FOR_RETCORR 2006-10-22
        Dim modsHashValue As Integer 'To be able to easier sort on modification set.

        'Mostly for diagnostics/debugging.
        Dim query3 As Integer
    End Structure 'peptideCorrStruct


    Public Structure mapCoordinatesStruct
        Dim rawFileIDindex As Integer
        Dim uniquePeptideIndex As Integer
        Dim useForCorrelation As Boolean
    End Structure 'mapCoordinatesStruct


    Public Structure coorMapcolumnDescriptorStruct
        Dim proteinIndex As Integer
        Dim peptideUsedForCorrelation As Boolean
    End Structure 'coorMapcolumnDescriptorStruct


    'Needed?
    'Public Structure rawFilePropertiesStruct
    '    Dim peptides As Integer 'How many of the union of peptides are
    '    '  in this raw file.
    '    Dim someOTher As Integer
    'End Structure
    Public Structure retCorrDataPointStruct
        Dim xRetentionTimeSecs As Double
        Dim yRetentionTimeSecs As Double
        Dim isOutLier As Boolean
    End Structure 'retCorrDataPointStruct


    Public Structure retentionTimeCalibrationStruct
        Dim A As Double 'Slope
        Dim B As Double 'Offset
    End Structure 'retentionTimeCalibrationStruct


    Public Structure peptideToBeAddedInfoStruct
        Dim proteinIndex As Integer 'Index into the protein list at
        '  the client side.
        Dim calibratedRetentionTimeSecs As Double 'Computed from a retention
        '  time from a peptide from another raw file.

        Dim pepSequence As String

        'Changed PM_SHORT_EXPORT 2005-12-01
        'What does PDBA mean??
        Dim PDBA_leftFlankAA As Int16
        Dim PDBA_rightFlankAA As Int16

        Dim measuredMCR As Double
        Dim measuredMass As Double
        Dim calculatedMass As Double
        Dim charge As Integer

        'Changed PM_BADPEAKREASON_BUG 2003-09-26
        Dim goodMSpeak As Boolean
        Dim badPeakReason As String

        'What does PDBA mean??
        Dim PDBA_rawFileID As Integer

        Dim modHits4 As List(Of modificationCountStruct)

        'Changed PM_CORR_CHECKEDINHERIT_FOR_INSERTED 2003-10-08
        Dim peptVerified As Boolean

        'Changed PM_FAST_SERIALISATION_BUG_SAVE_INSERTED_PEPTIDES 2007-01-08
        Dim uncalibratedSILACmasses4 As List(Of Double)

        'Changed PM_FAST_SERIALISATION_BUG_LOAD_INSERTED_PEPTIDES 2007-01-08
        Dim version2 As Integer 'Same as in PeptideHitStructure.
    End Structure 'peptideToBeAddedInfoStruct


    'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-11
    Public Structure corrResultStruct
        Dim correlationSuccesful As Boolean
        Dim reportStr As String

        Dim failureCount As Integer
        Dim totalCount As Integer 'Same as number of raw files
    End Structure 'corrResultStruct


    Public Structure statsStruct 'To collect information/statistics about the
        '  operations in the classs: how many peptides were used for
        '  correlation, etc.

        'Future for these: median error value,
        'Note: it may no longer be the first, determined by FILEID_FOR_SAMPLECORR....
        Dim pointsUsedForCorreltation_SomeRawFile_Initial As Integer 'E.g. 94
        Dim pointsUsedForCorreltation_SomeRawFile_AfterOutLierRemoval As Integer 'E.g. 73
        Dim pointsUsedForCorreltation_SomeRawFile_Final As Integer 'E.g. 36

        Dim peptidesToBeAdded As Integer

        Dim peptides As Integer 'Number of non NOPEPTIDE_CODE in our 2D map. E.g. 336

        Dim uniquePeptides As Integer 'Number of columns in our 2D map
        Dim uniquePeptides_UsedForCorrelation As Integer 'Number of columns in
        '  our 2D map with 2 or more non NOPEPTIDE_CODE.

        Dim rawFiles As Integer 'E.g. 3

        Dim duplicatesNotInserted As Integer

        'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
        Dim massesResetToWildType As Integer
        Dim addedWithAResetMass As Integer

        'Changed PM_NEGATIVE_RETENTION_TIMES 2003-10-16
        Dim negativeRetentionTimes As Integer
    End Structure 'statsStruct


    'Changed PM_CORRELATION_DIALOG 2003-10-21
    <Serializable()> _
    Public Structure correlationSettingsStructure
        Dim doNotInsertNewPeptides As Boolean

        Dim insertPeptidesInLCTimeRange As Boolean
        Dim startLCtimeForInsertion_Secs As Double
        Dim endLCtimeForInsertion_Secs As Double

        'Changed PM_LCPEAK_WINDOW_USERDEFINED 2003-12-18
        Dim leftLCwindowSecs As Double
        Dim rightLCwindowSecs As Double

        'Changed PM_LCPEAKDETECTION_OPTIONS 2006-06-16
        Dim SILACdishForLCpeakDetection As Integer
        Dim useHighestLCprofileForLCpeakDetection As Boolean 'If false
        '  field SILACdishForLCpeakDetection decides which LC profile
        '  to use - "fixed" option in the user interface.
    End Structure 'correlationSettingsStructure


    '****************************************************************************
    '*  SUBROUTINE NAME:   SortBySequenceComparer                               *
    'd$ <summary> N/A. ...
    'd$      Note: ascending sort                    </summary>
    Class SortBySequenceComparer
        'Implements IComparer
        Implements System.Collections.Generic.IComparer(Of peptideCorrStruct)

        Function Compare( _
          ByVal aItem1 As peptideCorrStruct, ByVal aItem2 As peptideCorrStruct) _
          As Integer _
          Implements _
            System.Collections.Generic.IComparer(Of peptideCorrStruct).Compare

            Dim toReturn As Integer = 0
            If aItem1.AAsequence4 < aItem2.AAsequence4 Then
                toReturn = -1
            Else
                If aItem1.AAsequence4 > aItem2.AAsequence4 Then
                    toReturn = 1
                Else
                    If aItem1.modsHashValue < aItem2.modsHashValue Then
                        toReturn = -1
                    Else
                        If aItem1.modsHashValue > aItem2.modsHashValue Then
                            toReturn = 1
                        Else
                            'Same modifications, use second key: charge
                            If aItem1.charge3 < aItem2.charge3 Then
                                toReturn = -1
                            Else
                                If aItem1.charge3 > aItem2.charge3 Then
                                    toReturn = 1
                                Else
                                    'Also same charge, use third key: mass
                                    If aItem1.mass3 < aItem2.mass3 Then
                                        toReturn = -1
                                    Else
                                        If aItem1.mass3 > aItem2.mass3 Then
                                            toReturn = 1
                                        Else
                                            'All three keys equal....
                                            Dim peter9 As Integer = 9

                                        End If 'Mass
                                    End If 'Mass

                                End If 'Charge
                            End If 'Charge

                        End If 'Modifications
                    End If 'Modifications

                End If 'Sequence
            End If 'Sequence

            Return toReturn
        End Function 'Compare
    End Class 'SortBySequenceComparer


    '****************************************************************************
    'd$ <summary>
    'd$   Purpose: general description
    'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
    'd$   <applicationname>test_rawDataFileHandling</applicationname>
    'd$   <author>Peter Mortensen</author>
    'd$   <seealso>http://www.cebi.sdu.dk/</seealso>
    'd$   <codetype>PLATFORM independent / GUI</codetype>
    'd$ </summary>
    Public NotInheritable Class retentionTimeCorrelator

        Private PEPTIDE_UNIT_DISTANCE As Double = 1663.75 / 1663 'About 1.000451

        'Move somewhere more general.

        Private mPeptideList2 As List(Of PeptideHitStructure) 'Long list
        '  of peptides from many proteins for retention time correlation.
        '  Indexes into this list are used in other data structures.
        'List of PeptideHitStructure objects.

        'Parallel to mPeptideList
        'Changed PM_TYPESAFE 2008-11-27
        'Private mPeptideList_mapCoordinates As ArrayList 'Type: mapCoordinatesStruct
        Private mPeptideList_mapCoordinates2 As List(Of mapCoordinatesStruct)

        Private mUniquePeptideIndexCounter As Integer
        Private m2D_array As Integer(,) 'Better name wanted! Each rows is for a
        '  raw file. Each column is for a peptide identified in 2 or more
        '  raw files. Sample: 3 rows and 1225 columns.

        'Changed PM_TYPESAFE 2008-11-27
        'Private mProteinIndex_array As ArrayList 'Type: coorMapcolumnDescriptorStruct
        Private mProteinIndex_array2 As List(Of coorMapcolumnDescriptorStruct)

        'Changed PM_CORR_ADD_EVEN_MORE_PEPTIDES 2003-09-25
        'These 3 are parallel to the 3 above.

        'Changed PM_TYPESAFE 2006-10-25
        'Dim mExtraPeptidesList As ArrayList
        Private mExtraPeptidesList2 As List(Of PeptideHitStructure) 'Long
        '  list of peptides from many proteins. For inserting extra peptides
        '  from next best score peptides, those with different charge and a
        '  lower score than the best score peptide (for a particular raw
        '  file). Indexes into this list are used in other data structures.
        'List of PeptideHitStructure objects.

        'Parallel to mExtraPeptidesList
        'Changed PM_TYPESAFE 2008-11-27
        'Private mExtraPeptidesList_mapCoordinates As ArrayList 'Type: mapCoordinatesStruct
        Private mExtraPeptidesList_mapCoordinates2 As List(Of mapCoordinatesStruct)

        Private mExtraPeptidesIndexCounter As Integer
        Private mExtra2D_array As Integer(,) 'Better name wanted!

        'Changed PM_TYPESAFE 2008-11-27
        'Private mProteinIndex_ExtraArray As ArrayList 'Type: coorMapcolumnDescriptorStruct
        Private mProteinIndex_ExtraArray2 As List(Of coorMapcolumnDescriptorStruct)

        'Changed PM_TYPESAFE 2008-11-27
        'Private mRawFile2IndexHash As Hashtable
        Private mRawFile2IndexHash2 As Dictionary(Of Integer, Integer)

        'Private mRawFile2IndexHash_Reverse As Hashtable
        Private mRawFile2IndexHash_Reverse2 As Dictionary(Of Integer, Integer)

        Private mCurrentRawFileIndex As Integer

        Private mReferenceRawFileIndex As Integer

        'Changed PM_TYPESAFE 2008-11-27
        'Private mRetentionTimeCalibrations As ArrayList 'Type is retentionTimeCalibrationStruct.
        Private mRetentionTimeCalibrations2 As List(Of retentionTimeCalibrationStruct)

        'Changed PM_TYPESAFE 2006-10-25
        'Dim mPeptidesToBeAdded As ArrayList 'Type is peptideToBeAddedInfoStruct.
        Private mPeptidesToBeAdded2 As List(Of peptideToBeAddedInfoStruct)

        Private mStats As statsStruct

        Private mCorrReport As System.Text.StringBuilder

        Private NOPEPTIDE_CODE As Integer = -2300000
        Private FILEID_FOR_SAMPLECORR As Integer = 1 'Now second file, because the reference ID is
        '  now 0 in the standard example...

        'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
        Private mCorrFailureReasonArray() As String

        'Changed PM_INSERTED_FALSENEGATIVE 2004-11-25
        Private mMaxDiffIsoAbs As Double = -1.0E+20 'Only for diagnostics, can be removed later.

        'Changed PM_RETCORR_ANYTIME 2006-10-26
        Private mParsingCompletedCount As Integer

        'Changed PM_RETCORR_LCCENTROID 2006-10-26
        Private mRetentionTimeType As retentionTimeTypeEnum


        '****************************************************************************
        '*  SUBROUTINE NAME:   New                                                  *
        'd$ <summary>Constructor</summary>
        Public Sub New(ByVal aRetentionTimeType As retentionTimeTypeEnum)

            MyBase.New() 'Is this necessary? Yes!

            'InitializeComponent()    Is this necessary??

            'Changed PM_RETCORR_LCCENTROID 2006-10-26
            mRetentionTimeType = aRetentionTimeType

            'Changed PM_TYPESAFE 2006-10-25
            'mPeptideList = New ArrayList
            mPeptideList2 = New List(Of PeptideHitStructure)

            mPeptideList_mapCoordinates2 = New List(Of mapCoordinatesStruct)
            mUniquePeptideIndexCounter = 0
            mProteinIndex_array2 = New List(Of coorMapcolumnDescriptorStruct)

            'Changed PM_TYPESAFE 2006-10-25
            'mExtraPeptidesList = New ArrayList
            mExtraPeptidesList2 = New List(Of PeptideHitStructure)

            mExtraPeptidesList_mapCoordinates2 = New List(Of mapCoordinatesStruct)
            mExtraPeptidesIndexCounter = 0
            mProteinIndex_ExtraArray2 = New List(Of coorMapcolumnDescriptorStruct)

            mRawFile2IndexHash2 = New Dictionary(Of Integer, Integer)

            mRawFile2IndexHash_Reverse2 = New Dictionary(Of Integer, Integer)

            mCurrentRawFileIndex = 0

            mReferenceRawFileIndex = -1 'Meaning undefined. Useful for error detection.

            mRetentionTimeCalibrations2 = New List(Of retentionTimeCalibrationStruct)

            'Changed PM_TYPESAFE 2006-10-25
            'mPeptidesToBeAdded = New ArrayList
            mPeptidesToBeAdded2 = New List(Of peptideToBeAddedInfoStruct)

            mCorrReport = New System.Text.StringBuilder(3000)

            mStats.duplicatesNotInserted = 0

            'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
            mStats.massesResetToWildType = 0
            mStats.addedWithAResetMass = 0

            'Changed PM_NEGATIVE_RETENTION_TIMES 2003-10-16
            mStats.negativeRetentionTimes = 0

            'Changed PM_RETCORR_ANYTIME 2006-10-26
            mParsingCompletedCount = 0
        End Sub 'New()


        '****************************************************************************
        '*    <placeholder for header>                                              *
        '****************************************************************************
        Public Function insertedPeptides() As Integer
            Return mPeptideList2.Count
        End Function 'insertedPeptides


        '****************************************************************************
        '*  SUBROUTINE NAME:   extractXYarrays                                     *
        'd$ <summary>
        'd$   Purpose: helper function for ParsingCompleted(), putting non-outliers
        'd$            into arrays for subsequent linear regression.
        'd$
        'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
        'd$ </summary>
        'd$ <param name="RemoveMode">
        'd$   Parameter of type <see cref="T:System.Object" />
        'd$ </param>
        'd$ <param name="aCount">
        'd$   Parameter of type <see cref="T:Extensibility.ex_ConnectMode" />.
        'd$ </param>
        'd$ <remarks>
        'd$   <para>
        'd$   </para>
        'd$   <para>
        'd$   </para>
        'd$   <seealso cref="E:EnvDTE.BuildEvents.OnBuildDone" /> event. This
        'd$ </remarks>
        Private Shared Sub extractXYarrays( _
          ByRef aInCoorPointList2 As List(Of retCorrDataPointStruct), _
          ByRef anOutXarray() As Double, ByRef anOutYarray() As Double)

            Dim lastIndex As Integer = aInCoorPointList2.Count() - 1
            ReDim anOutXarray(lastIndex) 'So it is big enough
            ReDim anOutYarray(lastIndex) 'So it is big enough
            Dim n As Integer
            Dim arrayIndex As Integer = 0
            For n = 0 To lastIndex
                Dim curItem As retCorrDataPointStruct = aInCoorPointList2(n)
                If Not curItem.isOutLier Then
                    anOutXarray(arrayIndex) = curItem.xRetentionTimeSecs
                    anOutYarray(arrayIndex) = curItem.yRetentionTimeSecs
                    arrayIndex += 1
                Else
                    Dim peter81 As Integer = 81
                End If
            Next
            Dim lastArrayIndex As Integer = arrayIndex - 1
            ReDim Preserve anOutXarray(lastArrayIndex)
            ReDim Preserve anOutYarray(lastArrayIndex)
        End Sub 'extractXYarrays


        '****************************************************************************
        '*  SUBROUTINE NAME:   markOutLiers                                     *
        'd$ <summary>
        'd$   Purpose: xyz.
        'd$
        'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
        'd$ </summary>
        'd$ <param name="RemoveMode">
        'd$   Parameter of type <see cref="T:System.Object" />
        'd$ </param>
        'd$ <param name="aCount">
        'd$   Parameter of type <see cref="T:Extensibility.ex_ConnectMode" />.
        'd$ </param>
        'd$ <remarks>
        'd$   <para>
        'd$   </para>
        'd$   <para>
        'd$   </para>
        'd$   <seealso cref="E:EnvDTE.BuildEvents.OnBuildDone" /> event. This
        'd$ </remarks>
        Private Shared Sub markOutLiers( _
          ByRef anInYErrors() As Double, _
          ByRef aInOutCoorPointList2 As List(Of retCorrDataPointStruct), _
          ByVal anOutLierLimit As Double)

            'Old:
            '  ByRef aInOutCoorPointList As ArrayList
            '

            Dim arrayIndex As Integer = 0

            Dim lastIndex As Integer = aInOutCoorPointList2.Count() - 1
            Dim n As Integer
            For n = 0 To lastIndex
                Dim curItem As retCorrDataPointStruct = _
                  aInOutCoorPointList2(n)
                If Not curItem.isOutLier Then
                    Dim errY As Double = anInYErrors(arrayIndex)
                    Dim absErr As Double = Math.Abs(errY)

                    If absErr >= anOutLierLimit Then 'E.g. +/- 40 seconds
                        curItem.isOutLier = True
                        aInOutCoorPointList2(n) = curItem 'Write-back.
                    End If
                    arrayIndex += 1
                Else
                    Dim peter43 As Integer = 43
                End If
            Next

            Dim arraySize As Integer = anInYErrors.Length
            'Dim arraySize As Integer = 7

            Trace.Assert(arraySize = arrayIndex, _
              "PIL ASSERT. arraySize different arrayIndex.")
        End Sub 'markOutLiers


        'Changed PM_REFACTOR 2003-09-25
        '****************************************************************************
        '*  SUBROUTINE NAME:   fillIn2Dmap                                          *
        'd$ <summary>
        'd$   Purpose: xyz.
        'd$
        'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
        'd$ </summary>
        Private Sub fillIn2Dmap( _
          ByRef anInPeptideList_mapCoordinates2 As List(Of mapCoordinatesStruct), _
          ByRef anInPeptideList2 As List(Of PeptideHitStructure), _
          ByVal aLastFileIDindex As Integer, ByVal aLastPeptideindex As Integer, _
          ByRef anOut_m2D_array As Integer(,))
            'Old:
            '  ByRef anInPeptideList As ArrayList

            ReDim anOut_m2D_array(aLastFileIDindex, aLastPeptideindex) 'Note: for
            '  ReDim it is last index and not size.....

            Dim coordinateforZeroValue As mapCoordinatesStruct

            Dim lastIndex_PeptideList As Integer = anInPeptideList2.Count - 1
            Dim j As Integer
            For j = 0 To lastIndex_PeptideList
                Dim curCoor As mapCoordinatesStruct = _
                  anInPeptideList_mapCoordinates2(j)

                Dim curPep As PeptideHitStructure = anInPeptideList2(j)

                If j = 0 Then
                    coordinateforZeroValue = curCoor
                End If

                'Encode as negative index: peptides that do not have sufficient
                'high(score) to qualify for use in correlation.
                Dim peptideIndex2 As Integer = j
                If Not curCoor.useForCorrelation Then
                    peptideIndex2 = -peptideIndex2
                End If
                anOut_m2D_array( _
                  curCoor.rawFileIDindex, curCoor.uniquePeptideIndex) = peptideIndex2
            Next
            If True Then 'Adjust zeros to NOPEPTIDE_CODE in the 2D entire table.
                Dim peptIndex2 As Integer
                For peptIndex2 = 0 To aLastPeptideindex
                    Dim fileID2 As Integer
                    For fileID2 = 0 To aLastFileIDindex
                        Dim curIndex As Integer = anOut_m2D_array(fileID2, peptIndex2)

                        'Note: (0,0) is always index 0 as the raw file ID is
                        '  the first that is encountered in the Mascot result file.
                        If peptIndex2 = coordinateforZeroValue.uniquePeptideIndex AndAlso _
                           fileID2 = coordinateforZeroValue.rawFileIDindex Then
                            Dim peter89 As Integer = 89
                        Else
                            If curIndex = 0 Then
                                'A blank: fill in a marker for it. We encode blank as NOPEPTIDE_CODE.
                                anOut_m2D_array(fileID2, peptIndex2) = NOPEPTIDE_CODE
                            End If
                        End If
                    Next 'Through raw file, rows.
                Next 'Through peptides
            End If
        End Sub 'fillIn2Dmap


        'Changed PM_REFACTOR 2006-10-26
        '****************************************************************************
        '*    <placeholder for header>                                              *
        '****************************************************************************
        Private Function getEffectiveRetentionTime_Secs( _
          ByRef anInPept As PeptideHitStructure) _
          As Double

            Dim toReturn As Double = -7777.7777

            'What about this field:
            '  LCpeakDetectionTimeCentroidSeconds

            Select Case mRetentionTimeType
                Case retentionTimeTypeEnum.enumMSMSevent
                    toReturn = 60.0 * anInPept.MSMSretentionTimeMinutes
                Case retentionTimeTypeEnum.enumLCprofileCentroid
                    toReturn = anInPept.retentionTimeCentroid_secs
                Case retentionTimeTypeEnum.enumLCprofileCentroidWithFallBackToMSMS
                    If anInPept.retentionTimeCentroid_secs > 0.001 Then
                        toReturn = anInPept.retentionTimeCentroid_secs
                    Else
                        'Fall-back
                        toReturn = 60.0 * anInPept.MSMSretentionTimeMinutes
                    End If
                Case Else
                    Trace.Assert(False, "PIL ASSERT. Select Case never fall-through")
            End Select

            Return toReturn
        End Function 'getEffectiveRetentionTime_Secs


        'Changed PM_REFACTOR 2003-09-25
        '****************************************************************************
        '*  SUBROUTINE NAME:   insertNewPeptides                                    *
        'd$ <summary>
        'd$   Purpose: xyz.
        'd$
        'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
        'd$ </summary>
        Private Sub insertNewPeptides( _
          ByRef aIn2D_array As Integer(,), ByVal aFileID As Integer, _
          ByVal aPeptIndex As Integer, _
          ByRef anInPeptideList2 As List(Of PeptideHitStructure), _
          ByVal aLastFileIDindex As Integer, ByVal aProteinIndex As Integer, _
          ByRef anInRetentionTimeCalibrations2 As List(Of retentionTimeCalibrationStruct), _
          ByRef anInOutAlreadyInsertedHash2 As Dictionary(Of String, Integer), _
          ByRef anInOutPeptidesToBeAdded2 As List(Of peptideToBeAddedInfoStruct) _
          )
            'Old:
            '  ByRef anInPeptideList As ArrayList
            '  ByRef anInOutPeptidesToBeAdded As ArrayList

            Dim curIndex As Integer = aIn2D_array(aFileID, aPeptIndex)

            'Changed PM_BAD_INSERT 2003-11-07
            'Changed PM_BAD_INSERT 2003-10-29. Window made wider
            'to accomodate double inserts for some measured values
            'that differed by approx. 0.1 Da.
            'Dim keyWindowSize As Double = PEPTIDE_UNIT_DISTANCE / 5
            Dim keyWindowSize2 As Double = PEPTIDE_UNIT_DISTANCE / 1
            Dim keyHalfWindowSize As Double = 0.5 * keyWindowSize2

            'Changed PM_CORR_RETENTIONTIME_DESTINCT_CHARGEANDMASS 2003-09-24
            'Note: even if the place in our map is occopied with a peptide
            '      we will still insert a new peptide if there is no other
            '      peptide in one of the other raw files with the same mass
            '      and charge.

            If curIndex = NOPEPTIDE_CODE Then
                'Nothing to do. Instead of look for NOPEPTIDE_CODE we now
                'look at each defined and loop through the other files.
                Dim peter18 As Integer = 18
            Else
                'For all defined peptides we insert new peptides at all other
                'raw files; at empty places and for non-identical peptides.

                If curIndex < 0 Then
                    curIndex = -curIndex
                End If
                Dim pepToClone As PeptideHitStructure = _
                  anInPeptideList2(curIndex)

                'Changed PM_INSERTION_OF_Z_PEPTIDES 2004-06-30
                Trace.Assert(pepToClone.AASequence <> _
                  peptideConstants.SEQUENCE_BADPEPTIDE, _
                  "PIL ASSERT. Bad peptide to be inserted: " & _
                  pepToClone.AASequence)

                Dim fileID2 As Integer
                For fileID2 = 0 To aLastFileIDindex
                    If fileID2 <> aFileID Then
                        'Changed PM_INSERTPEPTIDES_ASSERT 2005-07-29
                        'Dim pepToCloneMeasMass_Wild As Double = _
                        '  (pepToClone.lowerMZuncalib - PROTON_MASS) * pepToClone.charge
                        Dim lowerMass_Clone As Double = _
                          PeptideHitStructure.getLowerMCRuncalib(pepToClone)

                        'Changed PM_REFACTOR 2008-05-20
                        'Dim pepToCloneMeasMass_Wild As Double = _
                        '  (lowerMass_Clone - MSconstants.PROTON_MASS) * _
                        '  pepToClone.charge
                        Dim pepToCloneMeasMass_Wild As Double = _
                          PILmassCalc.chargeTransform(lowerMass_Clone, pepToClone.charge, 0)


                        Trace.Assert( _
                          lowerMass_Clone > 20.0, _
                          "PIL ASSERT. Unreasonable mass for " & _
                          "pepToClone.lowerMZuncalib: " & _
                          lowerMass_Clone)

                        'Changed PM_BAD_INSERT 2003-10-29
                        Dim pepToCloneBaseMass As Double = _
                          pepToClone.calculatedMassNoMods 'Only for use in
                        '  key (to not insert several peptides with nearly
                        '  the same mass.

                        Dim wildTypeReset As Boolean = False

                        Dim insertNewPeptide As Boolean = True
                        Dim curIndex2 As Integer = _
                          aIn2D_array(fileID2, aPeptIndex)
                        If curIndex2 = NOPEPTIDE_CODE Then
                            Dim peter19 As Integer = 19
                        Else
                            If curIndex2 < 0 Then
                                curIndex2 = -curIndex2
                            End If

                            Dim curPept As PeptideHitStructure = _
                              anInPeptideList2(curIndex2)

                            Dim curLowerMCR As Double = _
                              PeptideHitStructure.getLowerMCRuncalib(curPept)

                            'Changed PM_REFACTOR 2008-05-20
                            'Dim curPeptMeasMass_Wild As Double = _
                            '  (curLowerMCR - MSconstants.PROTON_MASS) * _
                            '  pepToClone.charge
                            Dim curPeptMeasMass_Wild2 As Double = _
                              PILmassCalc.chargeTransform( _
                                curLowerMCR, pepToClone.charge, 0)


                            'Test
                            If True Then
                                'Should not be equal to 0.0 and also should be
                                'close to pepToClone.measuredMW.
                                Trace.Assert(lowerMass_Clone > 10.0, _
                                  "PIL ASSERT. pepToClone.lowerMZuncalib has " & _
                                  "not been set.")

                                Dim diffIso As Double = _
                                  pepToClone.measuredMass - _
                                  pepToCloneMeasMass_Wild
                                Dim diffIsoAbs As Double = Math.Abs(diffIso)
                                If diffIsoAbs > 0.2 Then
                                    wildTypeReset = True
                                    mStats.massesResetToWildType += 1

                                    'Changed PM_INSERTED_FALSENEGATIVE 2004-11-25
                                    If diffIsoAbs > mMaxDiffIsoAbs Then
                                        mMaxDiffIsoAbs = diffIsoAbs
                                    End If
                                End If
                                Trace.Assert(diffIsoAbs < 100.0, _
                                  "PIL ASSERT. diffIsoAbs < 100.0.")
                            End If

                            Dim cloneCharge As Integer = pepToClone.charge

                            'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
                            'Dim cloneMass As Double = pepToClone.measuredMW
                            Dim cloneMass As Double = pepToCloneMeasMass_Wild

                            'Check if the peptide is identical, otherwise insert
                            'a new peptide

                            Dim curCharge As Integer = curPept.charge

                            'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
                            'Dim curMass As Double = curPept.measuredMW
                            Dim curMass As Double = curPeptMeasMass_Wild2

                            Dim massDiff As Double = curMass - cloneMass
                            Dim absMassDiff As Double = Math.Abs(massDiff)
                            Dim lowMassDifference As Boolean = absMassDiff < 0.2

                            If cloneCharge = curCharge AndAlso _
                               lowMassDifference Then

                                'Identical. Do not insert a new peptide.
                                insertNewPeptide = False
                            Else
                                Dim peter94 As Integer = 94
                            End If
                        End If 'curIndex2 = NOPEPTIDE_CODE

                        Dim rawFileIDforNewPeptide As Integer = _
                          mRawFile2IndexHash_Reverse2(fileID2)
                        If insertNewPeptide Then
                            'For a particular raw file: prevent inserting several
                            'peptides that are essential the same, except for
                            'small differences in mass.

                            'Changed PM_BAD_INSERT 2003-10-29
                            ''Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
                            ''dim massToUse as double = pepToClone.measuredMW
                            'Dim massToUse As Double = pepToCloneMeasMass_Wild
                            Dim massToUse As Double = _
                              pepToCloneMeasMass_Wild - pepToCloneBaseMass + _
                              keyHalfWindowSize

                            '1. cint, round.
                            '2. Do NOT use absolute mass, use mass diff from some
                            '   close by peptide mass, e.g. theoretical mass.
                            '3. Use calibrated measured value.
                            '4. Use unmodified mass.

                            'Changed PM_BAD_INSERT 2003-10-29. CInt round!!!! and the
                            'mass we use should be centered on the expected peptide
                            'mass at our integer mass.
                            'Dim massStr As String = _
                            '  CInt(5.0 * massToUse + 0.5).ToString '5.0 is to make
                            ''  all masses within a 0.2 Da window appear the same.
                            ''  Future: perhaps we should divide the mass
                            ''          by 1.002323 (or similar).
                            Dim massInt As Integer = _
                              CInt(massToUse / keyWindowSize2 - 0.5) + 100
                            '-0.5 to have truncking, not round. 100 to avoid
                            'negative values.
                            Trace.Assert(massInt > 0, _
                              "PIL ASSERT. Bad mass values, base mass: " & _
                              pepToCloneBaseMass & " Da, measured (wild): " & _
                              pepToCloneMeasMass_Wild & " Da.")

                            '  All masses within a 1.0 Da window appear the same.

                            Dim keySB As StringBuilder = New StringBuilder(32)
                            keySB.Append(pepToClone.AASequence)
                            keySB.Append("_")
                            keySB.Append(rawFileIDforNewPeptide)
                            keySB.Append("_")
                            keySB.Append(pepToClone.charge)
                            keySB.Append("_")
                            keySB.Append(massInt)

                            If anInOutAlreadyInsertedHash2.ContainsKey( _
                              keySB.ToString) Then
                                insertNewPeptide = False
                                mStats.duplicatesNotInserted += 1
                            Else
                                anInOutAlreadyInsertedHash2.Add( _
                                  keySB.ToString, 1)
                            End If
                        End If

                        If insertNewPeptide Then
                            Dim newPep As peptideToBeAddedInfoStruct
                            newPep.badPeakReason = Nothing 'Keep compiler happy.
                            newPep.modHits4 = Nothing 'Keep compiler happy.
                            newPep.uncalibratedSILACmasses4 = Nothing 'Keep compiler happy.
                            newPep.pepSequence = Nothing 'Keep compiler happy.

                            newPep.proteinIndex = aProteinIndex

                            'Changed PM_RETCORR_LCCENTROID 2006-10-26
                            'Dim retentionTimeForDefinedPeptide As Double = _
                            '  60.0 * pepToClone.MSMSretentionTimeMinutes
                            Dim retentionTimeForDefinedPeptide As Double = _
                              getEffectiveRetentionTime_Secs(pepToClone)

                            Dim definedPeptideCalib As retentionTimeCalibrationStruct = _
                              anInRetentionTimeCalibrations2(aFileID)

                            Dim curCalib As retentionTimeCalibrationStruct = _
                              anInRetentionTimeCalibrations2(fileID2)

                            'First transform to reference raw files retention
                            'time scale and then to the new peptide's raw file
                            'retention time scale.

                            Dim refRetTime As Double = _
                              (retentionTimeForDefinedPeptide - definedPeptideCalib.B) / _
                                definedPeptideCalib.A

                            Dim retTimeForNewPeptide As Double = _
                              curCalib.A * refRetTime + curCalib.B

                            'Changed PM_NEGATIVE_RETENTION_TIMES 2003-10-16
                            If retTimeForNewPeptide < 0.5 Then
                                'If predicted retention time is negative (or low
                                'positive) set it to some safe value that will
                                'also flag for later quantitation that the
                                'quantitation result should be set to zero.
                                retTimeForNewPeptide = 9.9
                                mStats.negativeRetentionTimes += 1
                            End If

                            newPep.calibratedRetentionTimeSecs = _
                              retTimeForNewPeptide

                            newPep.PDBA_rawFileID = rawFileIDforNewPeptide

                            'Note: when new fields are added: also add
                            '  in Parse()/MascotResultParser.vb.
                            If True Then 'Copy fields from the identified peptide.
                                newPep.pepSequence = pepToClone.AASequence

                                'Changed PM_SHORT_EXPORT 2005-12-01
                                newPep.PDBA_leftFlankAA = pepToClone.leftFlankAA
                                newPep.PDBA_rightFlankAA = pepToClone.rightFlankAA

                                newPep.measuredMCR = pepToClone.measuredMCR

                                'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
                                'newPep.measuredMass = pepToClone.measuredMW
                                newPep.measuredMass = pepToCloneMeasMass_Wild

                                newPep.calculatedMass = _
                                  pepToClone.MascotCalculatedMass
                                newPep.charge = pepToClone.charge

                                'Changed PM_BADPEAKREASON_BUG 2003-09-26
                                newPep.goodMSpeak = pepToClone.goodMSpeak
                                newPep.badPeakReason = pepToClone.badPeakReason

                                'Changed PM_GENERALISED_QUANT_MODE 2003-12-08
                                ''Changed PM_CORR_MODS_FOR_INSERTED 2003-10-03
                                'newPep.modHit = pepToClone.modHit
                                newPep.modHits4 = pepToClone.modHits2 'Should we
                                '  deep copy?

                                If wildTypeReset Then
                                    'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
                                    mStats.addedWithAResetMass += 1
                                    MascotResultParser.resetModFields( _
                                      newPep.modHits4)
                                Else
                                    Dim peter2 As Integer = 2
                                End If

                                'Changed PM_CORR_CHECKEDINHERIT_FOR_INSERTED 2003-10-08
                                newPep.peptVerified = pepToClone.verified

                                'Changed PM_FAST_SERIALISATION_BUG_SAVE_INSERTED_PEPTIDES 2007-01-08
                                newPep.uncalibratedSILACmasses4 = pepToClone.uncalibratedSILACmasses2

                                'Changed PM_ELIMINATE_PEPTIDEFIELD 2008-11-25. Delete at any time.
                                ''Changed PM_FAST_SERIALISATION_BUG_LOAD_INSERTED_PEPTIDES 2007-01-08
                                'newPep.version2 = pepToClone.version
                            End If

                            Trace.Assert(newPep.PDBA_leftFlankAA <> 0, _
                              "PIL ASSERT. PDBA_leftFlankAA is 0 for peptide " & _
                              newPep.pepSequence & _
                              ". Excel does not like that.")
                            Trace.Assert(newPep.PDBA_rightFlankAA <> 0, _
                              "PIL ASSERT. PDBA_rightFlankAA is 0 for peptide " & _
                              newPep.pepSequence & _
                              ". Excel does not like that.")

                            anInOutPeptidesToBeAdded2.Add(newPep)
                        End If
                    End If 'No self compare
                Next 'Inner loop through raw files.
            End If
        End Sub 'insertNewPeptides


        'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
        '****************************************************************************
        '*  SUBROUTINE NAME:   correlationFailure                                   *
        'd$ <summary>
        'd$   Purpose: helper function to do reporting, etc. in case of a
        'd$            retention time correlation failure for a raw file pair.
        'd$
        'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
        'd$ </summary>
        'd$ <param name="xyz">
        'd$   Parameter of type <see cref="T:System.Object" />
        'd$ </param>
        'd$ <param name="abc">
        'd$   Parameter of type <see cref="T:Extensibility.ex_ConnectMode" />.
        'd$ </param>
        'd$ <remarks>
        'd$   <para>
        'd$   </para>
        'd$   <para>
        'd$   </para>
        'd$   <seealso cref="E:EnvDTE.BuildEvents.OnBuildDone" /> event. This
        'd$ </remarks>
        Private Sub correlationFailure( _
          ByVal anInFileID As Integer, _
          ByRef aInxVals As Double(), _
          ByRef aInyVals As Double(), _
          ByVal aInFailureReasonString As String, _
          ByRef anOutUsableCorrelationForPair As Boolean)

            anOutUsableCorrelationForPair = False

            Dim correlationFailureReason As String = aInFailureReasonString
            mCorrFailureReasonArray(anInFileID) = correlationFailureReason

            Dim dumpHeader As String = _
              "Correlation failed (" & correlationFailureReason & _
              "). Original data (ignore column ""yErrors""):" & _
              ControlChars.NewLine

            SDUPstatistics.dumpXandYandYerrors(dumpHeader, _
              aInxVals, -1.0, 0.0, aInyVals, Nothing, _
              "[secs]", "[secs]", mCorrReport)
        End Sub 'correlationFailure


        '****************************************************************************
        '*    <placeholder for header>                                              *
        '****************************************************************************
        Private Function cloneCorrList( _
          ByRef aInList As List(Of retCorrDataPointStruct) _
          ) _
          As List(Of retCorrDataPointStruct)

            'Not tested!!!!

            Dim len As Integer = aInList.Count()

            Dim toReturn As List(Of retCorrDataPointStruct) = _
              New List(Of retCorrDataPointStruct)(len)

            Dim someItem As retCorrDataPointStruct
            For Each someItem In aInList
                toReturn.Add(someItem) 'Works because we only have
                '  value types in retCorrDataPointStruct.
            Next 'Through aInList

            Return toReturn
        End Function 'cloneCorrList


        '****************************************************************************
        '*  SUBROUTINE NAME:   ParsingCompleted                                     *
        'd$ <summary>
        'd$   Purpose: signal from the client that parsing the done for
        'd$            a Mascot result. We can do
        'd$            the retention time correlation etc. or we can be lazy and
        'd$
        'd$  wait until it is needed.
        'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
        'd$ </summary>
        'd$ <param name="RemoveMode">
        'd$   Parameter of type <see cref="T:System.Object" />
        'd$ </param>
        'd$ <param name="aCount">
        'd$   Parameter of type <see cref="T:Extensibility.ex_ConnectMode" />.
        'd$ </param>
        'd$ <remarks>
        'd$   <para>
        'd$   </para>
        'd$   <para>
        'd$   </para>
        'd$   <seealso cref="E:EnvDTE.BuildEvents.OnBuildDone" /> event. This
        'd$ </remarks>
        Public Sub ParsingCompleted( _
          ByRef anInRawFilesMap() As massSpectrometryBase.fileSpecStructure)

            mParsingCompletedCount += 1

            'Detect client error.
            Trace.Assert( _
              mParsingCompletedCount = 1, _
              "PIL ASSERT. ParsingCompleted() called more than once!. " & _
              "This indicates a client code error.")

            Dim totPeptides As Integer = mPeptideList2.Count

            mStats.peptides = totPeptides

            Dim rowsForMap As Integer = mRawFile2IndexHash2.Count
            'Dim columnsForMap As Integer = mPeptideList_mapCoordinates.Count
            Dim columnsForMap As Integer = mUniquePeptideIndexCounter

            mStats.uniquePeptides = columnsForMap
            mStats.rawFiles = rowsForMap

            Dim lastFileIDindex As Integer = rowsForMap - 1
            Dim lastPeptideindex As Integer = columnsForMap - 1

            'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
            ReDim mCorrFailureReasonArray(lastFileIDindex)

            If True Then 'Fill in our map.
                fillIn2Dmap( _
                  mPeptideList_mapCoordinates2, mPeptideList2, _
                  lastFileIDindex, lastPeptideindex, _
                  m2D_array)

                'Changed PM_CORR_ADD_EVEN_MORE_PEPTIDES 2003-09-25
                fillIn2Dmap( _
                  mExtraPeptidesList_mapCoordinates2, mExtraPeptidesList2, _
                  lastFileIDindex, mExtraPeptidesIndexCounter - 1, _
                  mExtra2D_array)
            End If 'Fill in our map.

            Dim outlierMap(lastFileIDindex, lastPeptideindex) As Boolean
            Dim fileID As Integer

            Dim peptIndex As Integer
            If True Then 'Find some properties (min/max values of score,
                '  retention time, ...).
                '  Also find reference raw file.

                Dim minScore2 As Double = 1000000000.0
                Dim maxScore2 As Double = -1000000000.0

                'Changed PM_TYPESAFE 2008-11-27
                'Dim peptidesForCorr As ArrayList = New ArrayList 'Type is Integer
                Dim peptidesForCorr2 As List(Of Integer) = New List(Of Integer)

                For fileID = 0 To lastFileIDindex
                    peptidesForCorr2.Add(0)
                Next

                For peptIndex = 0 To lastPeptideindex

                    Dim peptidesForCorrelation As Integer = 0

                    For fileID = 0 To lastFileIDindex
                        Dim curIndex2 As Integer = m2D_array(fileID, peptIndex)

                        'What about the negative values, for high scoring
                        'peptides for correlation????

                        'Meaning of test: is it a high scoring peptide that can be
                        '  used for retention time correlation (because we are sure
                        '  of the peptides identity) ? There are two other
                        '  possibilities: low negative values are peptides with too
                        '  low scores. Som large negative value means that there is
                        '  no identified peptide - it is going to be inserted at a
                        '  predicted retention time.
                        If curIndex2 >= 0 Then

                            peptidesForCorr2(fileID) = _
                              peptidesForCorr2(fileID) + 1

                            Dim pept As PeptideHitStructure = _
                              mPeptideList2(curIndex2)

                            'Changed PM_MASCOTSCORE_ASDOUBLE 2008-11-25. No
                            'longer implicit conversion from integer to
                            'double...
                            Dim score As Double = pept.MascotScore2

                            If score < minScore2 Then
                                minScore2 = score
                            End If
                            If score > maxScore2 Then
                                maxScore2 = score
                            End If

                            peptidesForCorrelation += 1
                        Else
                            Dim peter7 As Integer = 7 'Low scoring or to be inserted.
                        End If
                    Next 'Through raw file, rows.

                    Dim forCorrelation As Boolean = False
                    If peptidesForCorrelation >= 2 Then
                        forCorrelation = True
                    End If
                    Dim curColumnDescriptor As coorMapcolumnDescriptorStruct = _
                      mProteinIndex_array2(peptIndex)
                    curColumnDescriptor.peptideUsedForCorrelation = forCorrelation

                    mProteinIndex_array2(peptIndex) = curColumnDescriptor 'Write-back.
                Next 'Through peptides

                If True Then 'Selects reference raw file to use as a base.
                    Dim maxVal As Integer = -1
                    For fileID = 0 To lastFileIDindex
                        Dim curVal As Integer = peptidesForCorr2(fileID)
                        If curVal > maxVal Then
                            maxVal = curVal
                            mReferenceRawFileIndex = fileID
                        End If
                    Next
                End If
            End If

            Dim usableCorrelation As Boolean = True
            If True Then 'Finally, find the correlations: mapping between
                '  the retention times.

                'Changed PM_REFACTOR_TROUBLE 2008-12-01. Adapt to old
                '  behavior. Perhaps it would be better to explicitly
                '  test mReferenceRawFileIndex for being negative and
                '  skip most of the rest of this function. E.g. there
                '  is no point in generating a correlation report for 
                '  single raw file searches.
                '
                'Why is HashTable apparently different from Dictionary?
                'Answer: HashTable accepts keys that do not exist
                '        whereas Dictionary requires the key to
                '        exist, otherwise an exception is thrown.
                'Trace.Assert(mReferenceRawFileIndex >= 0, _
                '  "PIL ASSERT. mReferenceRawFileIndex is undefined.")
                'refRealFileID = _
                '  mRawFile2IndexHash_Reverse2(mReferenceRawFileIndex)
                Dim refRealFileID As Integer = 0
                If mRawFile2IndexHash_Reverse2.TryGetValue( _
                  mReferenceRawFileIndex, refRealFileID) Then
                    Dim peter2 As Integer = 2
                Else
                    Dim peter3 As Integer = 3 'Does not exist. Single file.
                End If

                Dim refFullRawFilePath As String = _
                  rawDataFileHandling.getFullRawFilePath( _
                    anInRawFilesMap, refRealFileID)

                Dim xValsOrig(columnsForMap) As Double 'Max value, will be
                '  redimmed later.
                Dim yValsOrig(columnsForMap) As Double 'Max value, will be
                '  redimmed later.

                For fileID = 0 To lastFileIDindex
                    'We must repeat increasing it to columnsForMap because it is
                    'reduced at the end of the loop...
                    ReDim xValsOrig(columnsForMap)
                    ReDim yValsOrig(columnsForMap)

                    'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
                    'Now reset for each file to correlate - to let use continue
                    'with the correlation even if one fails.
                    Dim usableCorrelationForPair As Boolean = True
                    Dim pointsToCorrelate As Integer = 0

                    Dim xySet1 As List(Of retCorrDataPointStruct) = _
                      New List(Of retCorrDataPointStruct)
                    Dim xySet2 As List(Of retCorrDataPointStruct) = _
                      New List(Of retCorrDataPointStruct) 'Result of
                    '  the first attempt at fitting, includes first wave of
                    '  outliers.

                    Dim currentRealFileID As Integer = _
                      mRawFile2IndexHash_Reverse2(fileID)

                    Dim currentFullRawFilePath As String = _
                      rawDataFileHandling.getFullRawFilePath( _
                      anInRawFilesMap, currentRealFileID)

                    mCorrReport.Append( _
                      "Current raw data file: " & _
                      vbTab & currentFullRawFilePath & PILInputOutput.LINEEND & _
                      "  Reference raw data file: " & _
                      vbTab & refFullRawFilePath & PILInputOutput.LINEEND)

                    For peptIndex = 0 To lastPeptideindex
                        Dim curIndex As Integer = m2D_array(fileID, peptIndex)

                        'Meaning of test: is it a high scoring peptide that can be
                        '  used for retention time correlation (because we are sure
                        '  of the peptides identity) ? There are two other
                        '  possibilities: low negative values are peptides with too
                        '  low scores. Som large negative value means that there is
                        '  no identified peptide - it is going to be inserted at a
                        '  predicted retention time.
                        If curIndex >= 0 Then
                            Dim curPept As PeptideHitStructure = _
                              mPeptideList2(curIndex)

                            'Changed PM_RETCORR_LCCENTROID 2006-10-26
                            'Dim curRetentionTimeSecs As Double = _
                            '  60.0 * curPept.MSMSretentionTimeMinutes
                            Dim curRetentionTimeSecs As Double = _
                              getEffectiveRetentionTime_Secs(curPept)

                            Dim refIndex As Integer = _
                              m2D_array(mReferenceRawFileIndex, peptIndex)

                            'Meaning of test: is it a high scoring peptide that
                            '  can be used for retention time correlation (because
                            '  we are sure of the peptides identity) ? There are
                            '  two other possibilities: low negative values are
                            '  peptides with too low scores. Som large negative
                            '  value means that there is no identified
                            '  peptide - it is going to be inserted at a
                            '  predicted retention time.
                            If refIndex >= 0 Then
                                'For Mascot identified peptides.

                                Dim refPept As PeptideHitStructure = _
                                    mPeptideList2(refIndex)

                                'Changed PM_RETCORR_LCCENTROID 2006-10-26
                                'Dim refRetentionTimeSecs As Double = _
                                '  60.0 * refPept.MSMSretentionTimeMinutes
                                Dim refRetentionTimeSecs As Double = _
                                  getEffectiveRetentionTime_Secs(refPept)

                                Dim somePoint As retCorrDataPointStruct
                                somePoint.xRetentionTimeSecs = _
                                  refRetentionTimeSecs
                                somePoint.yRetentionTimeSecs = _
                                  curRetentionTimeSecs
                                somePoint.isOutLier = False
                                xySet1.Add(somePoint)

                                pointsToCorrelate += 1
                            Else
                                'Low scoring or to be inserted.
                                Dim peter8 As Integer = 8
                            End If
                        Else
                            'Low scoring or to be inserted.
                            Dim peter7 As Integer = 7
                        End If
                    Next 'Through peptides

                    'Changed PM_CORR_RETENTIONTIME_DESTINCT_CHARGEANDMASS 2003-09-24
                    extractXYarrays(xySet1, xValsOrig, yValsOrig) 'Moved up here
                    Dim forCorr As Integer = xySet1.Count
                    If forCorr < 2 Then
                        'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
                        Me.correlationFailure( _
                          fileID, xValsOrig, yValsOrig, _
                          "Too few initial points for correlation. Count: " & _
                            forCorr, _
                          usableCorrelationForPair _
                          )
                    End If

                    If usableCorrelationForPair Then
                        Dim yErrors() As Double = Nothing 'Keep compiler happy.
                        Dim minErr2 As Double
                        Dim maxErr2 As Double
                        Dim medianErr2 As Double

                        Dim A1 As Double
                        Dim B1 As Double
                        Dim minXUsed As Double
                        Dim maxXUsed As Double
                        If True Then 'First attempt at fitting
                            SDUPstatistics.linearRegression( _
                              xValsOrig, yValsOrig, A1, B1, _
                              yErrors, minErr2, maxErr2, medianErr2, _
                              Nothing, _
                              minXUsed, maxXUsed) 'Perhaps use the exclude feature and the
                            '  algorithm from SDUPrecalibrator.vb?

                            If fileID = FILEID_FOR_SAMPLECORR Then 'Now second,
                                '  because the reference ID is now 0 in the
                                '  standard example...
                                mStats.pointsUsedForCorreltation_SomeRawFile_Initial = _
                                  xValsOrig.Length
                            End If
                        End If

                        Dim A2 As Double
                        Dim B2 As Double
                        Dim pointsToCorrelate2 As Integer = 0

                        Dim xVals2() As Double = Nothing 'Keep compiler happy.
                        Dim yVals2() As Double = Nothing 'Keep compiler happy.

                        'Second attempt - loop through to find outliers,
                        '  build new list for final regression.

                        If True Then
                            markOutLiers(yErrors, xySet1, 40.0) '+/- 40 seconds.

                            'Changed PM_TYPESAFE 2007-11-20
                            'xySet2 = DirectCast(xySet1.Clone(), ArrayList) 'Need a
                            ''  copy here because we don't want to exclude peptides
                            ''  that are in the other half.
                            xySet2 = Me.cloneCorrList(xySet1) 'For now. Isn't there
                            '  an easier way to clone a list of structures???

                            extractXYarrays(xySet1, xVals2, yVals2)

                            Dim forCorr2 As Integer = xVals2.Length
                            If forCorr2 < 2 Then
                                'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
                                Me.correlationFailure( _
                                  fileID, xValsOrig, yValsOrig, _
                                  "Too few points after removing outliers. Count:  " & _
                                    forCorr2, _
                                  usableCorrelationForPair _
                                  )
                            End If

                            If usableCorrelationForPair Then
                                SDUPstatistics.linearRegression( _
                                  xVals2, yVals2, A2, B2, _
                                  yErrors, minErr2, maxErr2, medianErr2, _
                                  Nothing, _
                                  minXUsed, maxXUsed) 'Perhaps use the exclude feature and the
                                '  algorithm from SDUPrecalibrator.vb?

                                If fileID = FILEID_FOR_SAMPLECORR Then
                                    mStats.pointsUsedForCorreltation_SomeRawFile_AfterOutLierRemoval = _
                                      xVals2.Length
                                End If
                            End If 'usableCorrelationForPair
                        End If

                        If usableCorrelationForPair Then
                            Dim A3 As Double
                            Dim B3 As Double
                            If True Then 'Third attempt: exclude half of the
                                '  datapoints, those with the worst errors.

                                Dim medianErrorToUse As Double = _
                                  1.001 * medianErr2 + 0.001 'To avoid empty sets
                                '  when e.g. medianErr is 0.0 for the identical
                                '  correlation.
                                markOutLiers(yErrors, xySet1, medianErrorToUse)
                                extractXYarrays(xySet1, xVals2, yVals2)

                                Dim forCorr3 As Integer = xVals2.Length
                                If forCorr3 < 2 Then
                                    'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
                                    Me.correlationFailure( _
                                      fileID, xValsOrig, yValsOrig, _
                                      "Too few points after retaining " & _
                                        "the best xx %. Count:  " & _
                                        forCorr3, _
                                      usableCorrelationForPair)
                                End If

                                If usableCorrelationForPair Then
                                    SDUPstatistics.linearRegression( _
                                      xVals2, yVals2, A3, B3, _
                                      yErrors, minErr2, maxErr2, medianErr2, _
                                      Nothing, _
                                      minXUsed, maxXUsed) 'Perhaps use the exclude feature and the
                                    '  algorithm from SDUPrecalibrator.vb?)

                                End If 'usableCorrelationForPair
                            End If

                            If usableCorrelationForPair Then
                                Dim calib As retentionTimeCalibrationStruct
                                calib.A = A3
                                calib.B = B3
                                mRetentionTimeCalibrations2.Add(calib)

                                mCorrReport.Append("Final calibration constants:" & _
                                 "A: " & vbTab & calib.A & vbTab & _
                                 "B: " & vbTab & calib.B & vbTab & _
                                  PILInputOutput.LINEEND)

                                SDUPstatistics.dumpXandYandYerrors( _
                                  "Values for final fitting", _
                                  xVals2, A3, B3, yVals2, Nothing, _
                                  "[secs]", "[secs]", mCorrReport)

                                SDUPstatistics.dumpXandYandYerrors( _
                                  "Final fitting applied to original data", _
                                  xValsOrig, A3, B3, yValsOrig, Nothing, _
                                  "[secs]", "[secs]", _
                                  mCorrReport)

                                'Test only.....
                                If fileID = FILEID_FOR_SAMPLECORR Then
                                    mStats.pointsUsedForCorreltation_SomeRawFile_Final = _
                                      xVals2.Length
                                End If

                                Dim xyLen As Integer = xySet1.Count
                                Dim ind As Integer = 0
                                Dim outLierCount As Integer = 0 'Only for debugging
                                Dim pi As Integer = 0
                                For pi = 0 To lastPeptideindex
                                    Dim curIndex As Integer = m2D_array(fileID, pi)
                                    If curIndex >= 0 Then
                                        Dim refIndex As Integer = _
                                          m2D_array(mReferenceRawFileIndex, pi)
                                        If refIndex >= 0 Then
                                            Dim somePoint2 As _
                                              retCorrDataPointStruct = _
                                                DirectCast(xySet2(ind), _
                                                  retCorrDataPointStruct)
                                            Dim outLierValue As Boolean = _
                                              somePoint2.isOutLier
                                            If outLierValue = True Then
                                                outlierMap(fileID, pi) = True
                                                outLierCount += 1
                                            End If
                                            ind += 1
                                        End If
                                    End If
                                Next
                                Trace.Assert(ind = xyLen, "PIL ASSERT. ind = xyLen.")
                            End If 'usableCorrelationForPair
                        End If 'usableCorrelationForPair
                    End If 'usableCorrelationForPair

                    'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
                    If Not usableCorrelationForPair Then
                        usableCorrelation = False
                    End If

                    'Changed PM_CORRREPORT_CHOKE_EQUALSIGNS 2008-09-19. Spreaksheet
                    '  may not like equal signs... E.g. "Fejl:510"
                    'mCorrReport.Append("===================" & _
                    '  PILInputOutput.LINEEND & PILInputOutput.LINEEND)
                    mCorrReport.Append("###################" & _
                      PILInputOutput.LINEEND & PILInputOutput.LINEEND)
                Next 'Through raw files

                If usableCorrelation Then 'Find blank entries for peptides
                    '  and collect information for peptides to be inserted
                    '  into protein list on the client side.

                    'Changed PM_TYPESAFE 2008-11-27
                    'Dim alreadyInsertedHash As Hashtable = New Hashtable
                    Dim alreadyInsertedHash2 As Dictionary(Of String, Integer) = _
                      New Dictionary(Of String, Integer)

                    For peptIndex = 0 To lastPeptideindex
                        Dim oneOrMoreOutLiers As Integer = 0
                        For fileID = 0 To lastFileIDindex
                            Dim curOutLierValue As Boolean = _
                              outlierMap(fileID, peptIndex)
                            If curOutLierValue Then
                                oneOrMoreOutLiers += 1
                            End If
                        Next

                        'Changed PM_TOO_FEW_INSERTED_WAS_OUTLIER_COLUMN_REJECT 2004-02-10
                        oneOrMoreOutLiers = 0 'To disable reject of peptide
                        '  insertion if just one peptide is an outlier (wrt
                        '  retention time correlation).

                        If oneOrMoreOutLiers = 0 Then 'We are not going to add
                            'this peptide because there is at least one
                            'outlier - retention time can not be predicted
                            'reliably and therefore adding a peptide would
                            'just add 'noise.

                            For fileID = 0 To lastFileIDindex
                                Dim isRetentionTimeOutLier As Boolean = _
                                  outlierMap(fileID, peptIndex)

                                'Raw file independent.
                                Dim curDescr As coorMapcolumnDescriptorStruct = _
                                  mProteinIndex_array2(peptIndex)
                                Dim proteinIndex As Integer = _
                                  curDescr.proteinIndex

                                'Changed PM_TOO_FEW_INSERTED_WAS_OUTLIER_COLUMN_REJECT 2004-02-10
                                If Not isRetentionTimeOutLier Then 'If the current
                                    ' peptide is an outlier (in retention time
                                    ' correlation) then we don't insert new
                                    ' peptides based on that peptide...

                                    insertNewPeptides( _
                                      m2D_array, fileID, peptIndex, _
                                      mPeptideList2, lastFileIDindex, proteinIndex, _
                                      mRetentionTimeCalibrations2, _
                                      alreadyInsertedHash2, mPeptidesToBeAdded2)
                                End If
                            Next 'Outer loop through raw files.
                        Else
                            Dim peter51 As Integer = 51 'Outliers, breakpoint for.
                        End If 'No outliers for current peptide.
                    Next 'Through peptides (columns in our map)

                    For peptIndex = 0 To mExtraPeptidesIndexCounter - 1
                        For fileID = 0 To lastFileIDindex

                            Dim curDescr As coorMapcolumnDescriptorStruct = _
                              mProteinIndex_ExtraArray2(peptIndex)
                            Dim proteinIndex2 As Integer = curDescr.proteinIndex

                            'Extra peptides. We use the same output as for the
                            'the "normal" added peptides, mPeptidesToBeAdded.
                            insertNewPeptides(mExtra2D_array, fileID, peptIndex, _
                              mExtraPeptidesList2, lastFileIDindex, proteinIndex2, _
                              mRetentionTimeCalibrations2, alreadyInsertedHash2, _
                              mPeptidesToBeAdded2)
                        Next
                    Next
                End If 'Find blank entries
            End If 'End of block, find the correlations.

            mStats.peptidesToBeAdded = mPeptidesToBeAdded2.Count

            Dim corrPairStr As String = _
              "Correlation pair number " & FILEID_FOR_SAMPLECORR

            Dim statSummary As String = "Summary:" & PILInputOutput.LINEEND & _
              "Raw files:" & vbTab & mStats.rawFiles & PILInputOutput.LINEEND & _
              "New peptides added:" & vbTab & _
                mStats.peptidesToBeAdded & PILInputOutput.LINEEND & _
              "Duplicates not inserted: " & vbTab & _
                mStats.duplicatesNotInserted & PILInputOutput.LINEEND & _
              "Peptides used in correlation and insertion:" & vbTab & _
                mStats.peptides & PILInputOutput.LINEEND & _
              "Unique peptides used in correlation and insertion:" & vbTab & _
                mStats.uniquePeptides & PILInputOutput.LINEEND & _
              corrPairStr & ", initial points:" & vbTab & _
                mStats.pointsUsedForCorreltation_SomeRawFile_Initial & _
                PILInputOutput.LINEEND & _
              corrPairStr & ", points after outlier removal:" & vbTab & _
                mStats.pointsUsedForCorreltation_SomeRawFile_AfterOutLierRemoval & _
                PILInputOutput.LINEEND & _
              corrPairStr & ", points for final correlation:" & vbTab & _
                mStats.pointsUsedForCorreltation_SomeRawFile_Final & _
                PILInputOutput.LINEEND & _
              "Peptide masses reset to wild type mass:" & vbTab & _
                mStats.massesResetToWildType & PILInputOutput.LINEEND & _
              "Peptide added where masses and mods were reset:" & vbTab & _
                mStats.addedWithAResetMass & PILInputOutput.LINEEND & _
              "Negative predicted retention times reset to 9.9 secs:" & vbTab & _
                mStats.negativeRetentionTimes & PILInputOutput.LINEEND & _
              PILInputOutput.LINEEND
            'Not filled in yet:  "Unique peptides used for correlation:" & vbTab &
            'mStats.uniquePeptides_UsedForCorrelation & LINEEND & _

            mCorrReport.Insert(0, statSummary, 1)
        End Sub 'ParsingCompleted


        'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-16
        '****************************************************************************
        '*  SUBROUTINE NAME:   correlationResult                                    *
        'd$ <summary> N/A </summary>
        Public Function correlationResult() As corrResultStruct

            'Future:
            ' 1. Perhaps include some summary information
            '        about the parsing and inserting, maybe to show
            '        always.
            ' 2. More specific about file names, not only indexes
            '    specify filenames (without path).
            '    Use mRawFile2IndexHash  and/or mRawFile2IndexHash_Reverse.

            Dim toReturn As corrResultStruct
            toReturn.correlationSuccesful = False

            Dim failureReportString As String = ""
            Dim failureCount As Integer = 0
            Dim index As Integer = 0
            Dim failStr As String
            For Each failStr In mCorrFailureReasonArray
                If Not failStr Is Nothing Then
                    If failStr.Length > 0 Then
                        failureReportString &= _
                          " Pair(" & index & "," & mReferenceRawFileIndex & "): " & _
                          failStr & "."
                        failureCount += 1
                    End If
                Else
                    Dim peter9 As Integer = 9
                End If
                index += 1
            Next
            If failureReportString.Length = 0 Then
                toReturn.correlationSuccesful = True
            End If
            toReturn.reportStr = failureReportString
            toReturn.failureCount = failureCount
            toReturn.totalCount = index

            Return toReturn
        End Function 'correlationResult


        '****************************************************************************
        '*  SUBROUTINE NAME:   peptidesToBeAdded                                    *
        'd$ <summary> N/A </summary>
        Public Function peptidesToBeAdded() _
          As List(Of peptideToBeAddedInfoStruct)
            'Old return value:
            '  As ArrayList

            Return mPeptidesToBeAdded2
        End Function 'peptidesToBeAdded


        '****************************************************************************
        '*  SUBROUTINE NAME:   updateWithPeptidesFromOneProtein                     *
        'd$ <summary> N/A </summary>
        Public Sub updateWithPeptidesFromOneProtein( _
          ByRef aProtHitStru2 As ProteinHitStructure, _
          ByVal aProteinIndex As Integer, _
          ByVal aPreselectedPeptidesScoreThreshold As Double, _
          ByVal aIsNoIsotopeMode As Boolean _
          )

            Dim plst As PILpeptides = aProtHitStru2.peptides 'For notational
            '  convenience.

            Dim CORR_PEPTIDES_THRESHOLD As Integer = 2 'Better name??
            Dim PEPTIDE_SCORE_THRESHOLD_FOR_CORR As Double = 30.0
            'Dim PEPTIDE_SCORE_THRESHOLD_FOR_CORR As Double = 0.0

            'Update information for auto-corellating retention times between
            'different raw files. Done if there are the same
            'peptide (identified) in several raw files.

            Dim peptidesAdded As Integer = 0 'Only for debugging purposes
            Dim minScore2 As Double = 1000000000.0

            'Changed PM_TYPESAFE 2006-11-09
            'Dim protPeptideList As ArrayList = New ArrayList
            Dim protPeptideList2 As List(Of peptideCorrStruct) = _
              New List(Of peptideCorrStruct)

            'Changed PM_REFACTOR 2006-03-15
            Dim pept3 As PeptideHitStructure = _
              PeptideHitStructure.blankPeptide() 'Keep compiler happy.

            Dim pepIter As peptideListIterator = _
              New peptideListIterator(plst)
            Dim peptideToken As Integer
            'For j = 0 To aProtHitStru.pepts.Count - 1
            While Not pepIter.nextPeptide(pept3, peptideToken)

                'Changed PM_REFACTOR 2004-06-30
                Dim usePeptide As Boolean = True

                'Changed PM_MASCOTSCORE_ASDOUBLE 2008-11-25. No
                'longer implicit conversion from integer to
                'double...
                ''Changed PM_SCORETHRESHOLD_BOUNDARY_BUG 2006-10-26
                ''If Not pept3.MascotScore > aPreselectedPeptidesScoreThreshold Then
                If pept3.MascotScore2 < aPreselectedPeptidesScoreThreshold Then
                    usePeptide = False
                End If

                If pept3.AASequence = _
                  peptideConstants.SEQUENCE_BADPEPTIDE Then

                    usePeptide = False
                End If

                'Changed PM_ONLYINSERT_FOR_QUANTIFIABLE 2004-11-23
                If Not pept3.someAAsMatchingTheFilter AndAlso _
                   aIsNoIsotopeMode = False Then

                    usePeptide = False
                Else
                    Dim peter2 As Integer = 2 'For breakpoints.
                End If

                'Changed PM_RETCORR_ANYTIME 2006-10-25
                'Do not use peptides that were inserted for correlation, etc.
                If pept3.queryNumber < 0 Then
                    usePeptide = False
                End If

                'Changed PM_RETCORR_LCCENTROID 2006-10-26
                Dim retSecs As Double = _
                  getEffectiveRetentionTime_Secs(pept3)
                If retSecs < 0.01 Then
                    usePeptide = False
                End If

                If usePeptide Then
                    'Later: check for for uniqueness of retentiontime to avoid
                    'using double points in the correlation - there is no need
                    'some retention times should have more weight that others.
                    'It will also prevent division by zero/undefined fitting
                    'if only identical pairs are left, e.g.:
                    '  X      Y      X     yErrors
                    ' 12446  12380  12446  NaN
                    ' 12446  12380  12446  NaN

                    Dim pc As peptideCorrStruct
                    pc.AAsequence4 = pept3.AASequence

                    'Changed PM_REFACTOR 2006-03-15
                    'pc.peptIndex = j
                    pc.peptideRef = peptideToken

                    pc.corr_rawFileId = pept3.rawFileID

                    'Changed PM_MASCOTSCORE_ASDOUBLE 2008-11-25. No
                    'longer implicit conversion from integer to
                    'double...
                    pc.useForCorrelation = _
                      pept3.MascotScore2 > PEPTIDE_SCORE_THRESHOLD_FOR_CORR

                    'Changed PM_CORR_RETENTIONTIME_DESTINCT_CHARGEANDMASS 2003-09-24
                    pc.charge3 = pept3.charge
                    pc.mass3 = pept3.measuredMass

                    'Changed PM_CORR_NONINCLUDEDPEPTIDES_BUG 2006-10-20
                    pc.query3 = pept3.queryNumber

                    pc.modsHashValue = _
                      PILpeptides.hashValueForModification( _
                        pept3.modHits2, Nothing, True)

                    protPeptideList2.Add(pc)
                Else
                    Dim peter8 As Integer = 8
                End If
            End While  'Through peptides.

            'Insert pseudo sequence to make the following easier
            Dim pc2 As peptideCorrStruct
            pc2.AAsequence4 = "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ"
            pc2.peptideRef = -10000
            pc2.corr_rawFileId = -29000
            protPeptideList2.Add(pc2)

            protPeptideList2.Sort(New SortBySequenceComparer)

            'Dim lastSequence As String = ""
            Dim lastPept As peptideCorrStruct
            lastPept.AAsequence4 = ""

            Dim startIndex As Integer = 0 'For a particular peptide, index
            '  for first. Inclusive.
            Dim endIndex As Integer = 0 'For a particular peptide, index
            '  for last. Exclusive.

            Dim prevQnum As Integer = -1

            'For debugging only
            Dim pept2 As PeptideHitStructure = _
              PeptideHitStructure.blankPeptide() 'Keep compiler happy.

            'Changed PM_MARKER 2006-10-22

            Dim k As Integer
            Dim lastIndex As Integer = protPeptideList2.Count - 1
            For k = 0 To lastIndex
                Dim curPep As peptideCorrStruct = protPeptideList2(k)

                Dim massDiff As Double = curPep.mass3 - lastPept.mass3
                Dim absMassDiff As Double = Math.Abs(massDiff)

                'Changed PM_MASS_UNIQUENESS 2006-10-23. Why was it 10.0??
                '  To account for SILAC modifications with the same retention time??
                '
                '  We set it to 0.2 in order to account for modifications not
                '  recognised by the program (either not set up by the user or
                '  of a type that the program can not handle - e.g. terminal
                '  modifications).
                '
                ''Changed PM_RETT_TROUBLE 2006-08-10
                '' ''Dim lowMassDifference As Boolean = absMassDiff < 0.2
                ' ''Dim lowMassDifference As Boolean = absMassDiff < 10.0
                ''Dim lowMassDifference As Boolean = absMassDiff < 0.2
                'Dim lowMassDifference As Boolean = absMassDiff < 10.0
                Dim lowMassDifference As Boolean = absMassDiff < 0.2

                Dim sameSequence As Boolean = _
                  curPep.AAsequence4 = lastPept.AAsequence4

                Dim sameCharge As Boolean = _
                  curPep.charge3 = lastPept.charge3

                Dim sameMods As Boolean = _
                  curPep.modsHashValue = lastPept.modsHashValue

                Dim identicalPeptide As Boolean = _
                  sameSequence AndAlso _
                  lowMassDifference AndAlso _
                  sameCharge AndAlso _
                  sameMods AndAlso _
                  True

                'Changed PM_MARKER 2006-10-18
                plst.peptideByToken(curPep.peptideRef, pept2)
                Dim qNum As Integer = pept2.queryNumber

                'For debugging.
                If aProteinIndex = 7 Then
                    If k > 70 AndAlso k < 80 Then
                        Dim peter2 As Integer = 2
                    End If
                End If

                If sameSequence AndAlso sameCharge Then
                    If Not lowMassDifference Then
                        Dim peter2 As Integer = 2
                    End If

                    If absMassDiff > 17.0 Then
                        Dim peter7 As Integer = 7
                    End If

                    If absMassDiff < 10.0 Then
                        Dim peter10 As Integer = 10
                    End If '10.0

                    If absMassDiff > 0.0001 Then
                        Dim peter3 As Integer = 3
                    End If

                    If absMassDiff > 0.01 Then
                        Dim peter4 As Integer = 4
                    End If

                    If absMassDiff > 0.1 Then
                        Dim peter5 As Integer = 5
                    End If

                    If absMassDiff > 0.6 Then
                        Dim hit As Boolean = False

                        If absMassDiff > 0.9 AndAlso _
                           absMassDiff < 1.1 Then

                            Dim peter1 As Integer = 1
                            hit = True
                        End If

                        If absMassDiff > 2.9 AndAlso _
                           absMassDiff < 3.1 Then

                            Dim peter3 As Integer = 3
                            hit = True
                        End If

                        If absMassDiff > 3.9 AndAlso _
                           absMassDiff < 4.1 Then

                            Dim peter4 As Integer = 4
                            hit = True
                        End If

                        If absMassDiff > 4.9 AndAlso _
                           absMassDiff < 5.1 Then

                            Dim peter5 As Integer = 5
                            hit = True
                        End If

                        If absMassDiff > 5.9 AndAlso _
                           absMassDiff < 6.1 Then

                            Dim peter6 As Integer = 6
                            hit = True
                        End If

                        If absMassDiff > 6.9 AndAlso _
                           absMassDiff < 7.1 Then

                            Dim peter7 As Integer = 7
                            hit = True
                        End If

                        If absMassDiff > 7.9 AndAlso _
                           absMassDiff < 8.1 Then

                            Dim peter8 As Integer = 8
                            hit = True
                        End If

                        If absMassDiff > 8.9 AndAlso _
                           absMassDiff < 9.1 Then

                            Dim peter9 As Integer = 9
                            hit = True
                        End If

                        If absMassDiff > 9.9 AndAlso _
                           absMassDiff < 10.1 Then

                            Dim peter10 As Integer = 10
                            hit = True
                        End If

                        If absMassDiff > 10.9 AndAlso _
                           absMassDiff < 11.1 Then

                            Dim peter11 As Integer = 11
                            hit = True
                        End If

                        If absMassDiff > 11.9 AndAlso _
                           absMassDiff < 12.1 Then

                            Dim peter12 As Integer = 12
                            hit = True
                        End If

                        If absMassDiff > 12.9 AndAlso _
                           absMassDiff < 13.1 Then

                            Dim peter13 As Integer = 13
                            hit = True
                        End If

                        If absMassDiff > 14.9 AndAlso _
                           absMassDiff < 15.1 Then

                            Dim peter15 As Integer = 15
                            hit = True
                        End If

                        If absMassDiff > 15.9 AndAlso _
                           absMassDiff < 16.1 Then

                            Dim peter16 As Integer = 16
                            hit = True
                        End If

                        If absMassDiff > 16.9 AndAlso _
                           absMassDiff < 17.1 Then

                            Dim peter17 As Integer = 17
                            hit = True
                        End If

                        If absMassDiff > 17.9 AndAlso _
                           absMassDiff < 18.1 Then

                            Dim peter18 As Integer = 18
                            hit = True
                        End If

                        If absMassDiff > 19.9 AndAlso _
                           absMassDiff < 20.1 Then

                            Dim peter20 As Integer = 20
                            hit = True
                        End If

                        If absMassDiff > 20.9 AndAlso _
                           absMassDiff < 21.1 Then

                            Dim peter21 As Integer = 21
                            hit = True
                        End If

                        If absMassDiff > 22.9 AndAlso _
                           absMassDiff < 23.1 Then

                            Dim peter23 As Integer = 23
                            hit = True
                        End If

                        If absMassDiff > 31.9 AndAlso _
                           absMassDiff < 32.1 Then

                            Dim peter32 As Integer = 32
                            hit = True
                        End If

                        If absMassDiff > 32.9 AndAlso _
                           absMassDiff < 33.1 Then

                            Dim peter33 As Integer = 33
                            hit = True
                        End If

                        If absMassDiff > 45.9 AndAlso _
                           absMassDiff < 46.1 Then

                            Dim peter46 As Integer = 46
                            hit = True
                        End If

                        If Not hit Then
                            Dim peter81 As Integer = 81
                        End If
                    End If

                    If absMassDiff > 7 Then
                        Dim peter69 As Integer = 69
                    End If
                End If 'Same sequence. For debugging only.
                prevQnum = qNum

                'Note: for retention time correlation charge og precise mass
                '      are NOT important.

                'Changed PM_CORR_DISTINCTCHARGE 2006-10-20
                'If sameSequence AndAlso lowMassDifference Then
                If identicalPeptide Then
                    endIndex += 1
                Else
                    If True Then 'Processing for particular peptide.

                        'Changed PM_TYPESAFE 2008-11-27
                        'Dim bestScoreHash As New Hashtable 'List (hash) of
                        ''  best peptides; one for each raw file.
                        Dim bestScoreHash2 As _
                          Dictionary(Of Integer, peptideCorrStruct) = _
                            New Dictionary(Of Integer, peptideCorrStruct)

                        'Changed PM_TYPESAFE 2008-11-27
                        'Dim nextBestScoreHash As New Hashtable 'List (hash) of
                        ''  next best peptides; with distinct charge from the
                        ''  corresponding in bestScoreHash.
                        ''  one for each raw file.
                        Dim nextBestScoreHash2 As _
                          Dictionary(Of String, peptideCorrStruct) = _
                            New Dictionary(Of String, peptideCorrStruct)

                        'Changed PM_TYPESAFE 2008-11-27
                        'Dim notBestScore As ArrayList = New ArrayList 'Type
                        '  is peptideCorrStruct.
                        Dim notBestScore2 As List(Of peptideCorrStruct) = _
                          New List(Of peptideCorrStruct)

                        Dim m As Integer
                        Dim lastIndex2 As Integer = endIndex - 1
                        For m = startIndex To lastIndex2
                            Dim curPep2 As peptideCorrStruct = _
                              protPeptideList2(m)

                            Dim curRawFileID2 As Integer = curPep2.corr_rawFileId

                            If True Then
                                '  Build our map of rawfileID to arbitrary zero
                                '  based index. This may take several peptides
                                '  to complete. So it may not be completely
                                '  build in one run of this loop.
                                Dim key As Integer = curRawFileID2

                                If Not mRawFile2IndexHash2.ContainsKey(key) Then

                                    mRawFile2IndexHash2.Add( _
                                      key, mCurrentRawFileIndex)

                                    mRawFile2IndexHash_Reverse2.Add( _
                                      mCurrentRawFileIndex, key)

                                    mCurrentRawFileIndex += 1
                                End If
                            End If

                            If bestScoreHash2.ContainsKey(curRawFileID2) Then
                                Dim oldStruct As peptideCorrStruct = _
                                  bestScoreHash2(curRawFileID2)

                                Dim oldScore As Double = _
                                  plst.peptideScoreByToken( _
                                    oldStruct.peptideRef)

                                Dim newScore As Double = _
                                  plst.peptideScoreByToken( _
                                    curPep2.peptideRef)

                                If newScore > oldScore Then
                                    'Better score, replace.

                                    'Note: old value in oldStruct.

                                    notBestScore2.Add(oldStruct)

                                    bestScoreHash2(curRawFileID2) = curPep2
                                Else
                                    notBestScore2.Add(curPep2)
                                End If
                            Else
                                bestScoreHash2.Add(curRawFileID2, curPep2)
                            End If
                        Next

                        If bestScoreHash2.Count = 1 Then
                            Dim peter9 As Integer = 9
                        End If

                        If True Then
                            'Add to our linear list of peptides and insert
                            'indexes into that in other datastructures.
                            Dim bestPeptides As Integer = bestScoreHash2.Count

                            'Changed PM_TYPESAFE 2008-11-27
                            'Dim hashEnumerator As IDictionaryEnumerator = _
                            '  bestScoreHash2.GetEnumerator()
                            Dim hashEnumerator2 As Dictionary( _
                              Of Integer, peptideCorrStruct).Enumerator = _
                                bestScoreHash2.GetEnumerator()

                            While hashEnumerator2.MoveNext()

                                Dim curPep2 As peptideCorrStruct = _
                                  hashEnumerator2.Current.Value

                                If True Then 'Next best score handling
                                    'Only now can we decide which of the
                                    'not-best-scores we want to use - because
                                    'we must know the charge for the best
                                    'scoring peptide.

                                    Dim bestScoreCharge As Integer = curPep2.charge3

                                    Dim bestScoreRawFileID As Integer = _
                                      curPep2.corr_rawFileId

                                    'Same as:
                                    Dim rawID As Integer = _
                                      hashEnumerator2.Current.Key

                                    Dim curNextBest As peptideCorrStruct
                                    For Each curNextBest In notBestScore2
                                        If curNextBest.corr_rawFileId = _
                                             bestScoreRawFileID AndAlso _
                                           curNextBest.charge3 <> bestScoreCharge Then

                                            'To correct: add or replace
                                            'in nextBestScoreHash.

                                            Dim nextBestScoreKey As String = _
                                              curNextBest.corr_rawFileId & "_" & _
                                              curNextBest.charge3
                                            If nextBestScoreHash2.ContainsKey( _
                                                 nextBestScoreKey) Then

                                                Dim oldStruct2 As peptideCorrStruct = _
                                                  nextBestScoreHash2( _
                                                    nextBestScoreKey)

                                                Dim oldScore2 As Double = _
                                                  plst.peptideScoreByToken( _
                                                    oldStruct2.peptideRef)
                                                Dim score As Double = _
                                                  plst.peptideScoreByToken( _
                                                    curNextBest.peptideRef)

                                                If score > oldScore2 Then
                                                    'Better score, replace.
                                                    nextBestScoreHash2( _
                                                      nextBestScoreKey) = _
                                                        curNextBest
                                                Else
                                                    'Then we finally forget about
                                                    'that peptide!
                                                    Dim peter3 As Integer = 3
                                                End If
                                            Else
                                                nextBestScoreHash2.Add( _
                                                  nextBestScoreKey, curNextBest)
                                            End If
                                        End If
                                    Next 'Iterating best score peptides
                                    '     for each raw file.
                                End If 'Next best score handling

                                Dim peptRef As Integer = curPep2.peptideRef

                                Dim peptideToAdd As PeptideHitStructure = _
                                  PeptideHitStructure.blankPeptide() 'Keep compiler happy.

                                plst.peptideByToken( _
                                  peptRef, peptideToAdd)

                                'Changed PM_INSERTPEPTIDES_ASSERT 2005-07-29
                                Dim lowerMass As Double = _
                                  PeptideHitStructure.getLowerMCRuncalib(peptideToAdd)
                                Trace.Assert( _
                                  lowerMass > 20.0, _
                                  "PIL ASSERT. Unreasonable mass for lowerMass: " & _
                                  lowerMass)

                                mPeptideList2.Add(peptideToAdd)

                                Dim coor As mapCoordinatesStruct
                                coor.uniquePeptideIndex = mUniquePeptideIndexCounter
                                coor.rawFileIDindex = _
                                  mRawFile2IndexHash2(curPep2.corr_rawFileId)
                                coor.useForCorrelation = curPep2.useForCorrelation
                                mPeptideList_mapCoordinates2.Add(coor)

                                peptidesAdded += 1

                                'Changed PM_MASCOTSCORE_ASDOUBLE 2008-11-25. No
                                'longer implicit conversion from integer to
                                'double...
                                If peptideToAdd.MascotScore2 < minScore2 Then
                                    minScore2 = peptideToAdd.MascotScore2
                                End If
                            End While

                            If peptidesAdded > 0 Then 'Will only be false for the
                                '  very first iteration of the loop...

                                Dim columnDescriptor As coorMapcolumnDescriptorStruct
                                columnDescriptor.proteinIndex = aProteinIndex
                                'Note: field peptideUsedForCorrelation will be filled
                                '      in later, in ParsingCompleted().

                                mProteinIndex_array2.Add(columnDescriptor)
                                mUniquePeptideIndexCounter += 1
                            End If

                            If True Then 'Next best score handling.

                                'Dim hashEnumerator3 As IDictionaryEnumerator = _
                                '  nextBestScoreHash.GetEnumerator()
                                Dim hashEnumerator4 As Dictionary( _
                                  Of String, peptideCorrStruct).Enumerator = _
                                    nextBestScoreHash2.GetEnumerator()

                                While hashEnumerator4.MoveNext()
                                    Dim curPep3 As peptideCorrStruct = _
                                      hashEnumerator4.Current.Value
                                    Dim peptRef3 As Integer = curPep3.peptideRef

                                    Dim peptideToAdd3 As PeptideHitStructure = _
                                      PeptideHitStructure.blankPeptide() 'Keep compiler happy.

                                    plst.peptideByToken( _
                                      peptRef3, peptideToAdd3)

                                    mExtraPeptidesList2.Add(peptideToAdd3)

                                    Dim coor3 As mapCoordinatesStruct
                                    coor3.uniquePeptideIndex = _
                                      mExtraPeptidesIndexCounter
                                    coor3.rawFileIDindex = _
                                      mRawFile2IndexHash2(curPep3.corr_rawFileId)
                                    coor3.useForCorrelation = False 'Note: not
                                    '  curPep3.useForCorrelation because it may
                                    '  be true just because of a high (Mascot) score.

                                    Trace.Assert(coor3.useForCorrelation = False, _
                                      "PIL ASSERT. " & _
                                      "Extra peptide was unexpectedly used for correlation....")

                                    mExtraPeptidesList_mapCoordinates2.Add(coor3)

                                    Dim columnDescriptor As _
                                      coorMapcolumnDescriptorStruct
                                    columnDescriptor.proteinIndex = aProteinIndex
                                    mProteinIndex_ExtraArray2.Add(columnDescriptor)

                                    mExtraPeptidesIndexCounter += 1 'Note: increased
                                    '  every time - in mExtra2D_array there will only
                                    '  be one value different from NOPEPTIDE_CODE in
                                    '  each row.
                                End While
                            End If 'Next best score handling
                        End If 'Add to our linear list of peptides, etc.
                    End If 'True. Processing for one peptide.

                    startIndex = k
                    endIndex = startIndex + 1 '+1: because we already have one
                    '  peptide, the current.

                    'Changed PM_CORR_NONINCLUDEDPEPTIDES_BUG 2006-10-20
                    'lastPept = curPep
                End If

                'Changed PM_CORR_NONINCLUDEDPEPTIDES_BUG 2006-10-20
                lastPept = curPep 'Moved to here...
            Next
            Dim totPeptides As Integer = mPeptideList2.Count

            'Assert equal length, mPeptideList, mPeptideListCoordinates
            '(or mPeptideList_mapCoordinates ??)
            Dim len2 As Integer = mPeptideList2.Count
            Trace.Assert(len2 = mPeptideList_mapCoordinates2.Count, _
              "PIL ASSERT. mPeptideList2 is of length " & len2 & _
              ". This is not the expected length...")
        End Sub 'updateWithPeptidesFromOneProtein


        '****************************************************************************
        '*  SUBROUTINE NAME:   PeptidesNotActuallyInserted                          *
        'd$ <summary>
        'd$   Purpose: Signal from client so we can adjust our report.
        'd$
        'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
        'd$ </summary>
        Public Sub PeptidesNotActuallyInserted()
            mCorrReport.Insert( _
              0, _
              "Note: peptides were actually NOT inserted. Repeat: NOT inserted." & _
              "Possible reasons: The checkbox ""Do not insert new peptides"" in " & _
              "the correlation settings dialog is checked. Uncheck it and try again. " & _
              vbCr & vbLf & "See above." & vbCr & vbLf & vbCr & vbLf & vbCr & vbLf)
        End Sub 'PeptidesNotActuallyInserted


        '****************************************************************************
        '*  SUBROUTINE NAME:   getCorrelationReport                                 *
        'd$ <summary>
        'd$   Purpose: xyz.
        'd$
        'd$   <see cref="T:VBXMLDoc.CVBXMLDoc" />.
        'd$ </summary>
        Public Function getCorrelationReport() As String
            Return mCorrReport.ToString
        End Function


    End Class 'retentionTimeCorrelator

End Namespace 'massSpectrometryBase

    

    

Generated by script codePublish.pl at 2009-01-05T15:20:59.