'****************************************************************************
'* Copyright (C) 2004 Peter Mortensen and Matthias Mann *
'* This file is part of MSQuant. *
'* *
'* MSQuant is distributed under the terms of *
'* the GNU General Public License. See src/COPYING.TXT or *
'* <http://www.gnu.org/licenses/gpl.txt> for details. *
'* *
'* MSQuant is free software; you can redistribute it *
'* and/or modify it under the terms of the GNU *
'* General Public License as published by the Free *
'* Software Foundation; either version 2 of the *
'* License, or (at your option) any later version. *
'* *
'* MSQuant is distributed in the hope that it will be *
'* useful, but WITHOUT ANY WARRANTY; without even the *
'* implied warranty of MERCHANTABILITY or FITNESS FOR *
'* A PARTICULAR PURPOSE. See the GNU General Public *
'* License for more details. *
'* *
'* You should have received a copy of the GNU General *
'* Public License along with MSQuant; if not, write to *
'* the Free Software Foundation, Inc., 59 Temple *
'* Place, Suite 330, Boston, MA 02111-1307 USA *
'* *
'* Purpose: has detailed knowledge of how to correlate retention time *
'* between two or more raw files. *
'* Holds Class retentionTimeCorrelator, see below for *
'* documentation. *
'* *
'****************************************************************************
'****************************************************************************
'* CEBI *
'* Software Development Group *
'* Peter Mortensen *
'* E-mail: NUKESPAMMERSdrmortensen@get2netZZZZZZ.dk *
'* WWW: http://www.cebi.sdu.dk/ *
'* *
'* Program for post-processing of result from search in mass *
'* spectrometric data. *
'* *
'* FILENAME: retentionTimeCorrelator.vb *
'* TYPE: VISUAL_BASIC *
'* *
'* CREATED: PM 2003-09-11 Vrs 1.0. *
'* UPDATED: PM 2003-xx-xx *
'* *
'****************************************************************************
Option Strict On
Option Explicit On
Imports System.Text 'For StringBuilder
Imports System.Collections.Generic 'For Dictionary and List.
Imports MolecularSharedStructures
'Changed PM_SUPERCHARGE 2003-11-03
'Imports Mascot_Parser.SDUPutility
Imports SDUPutility
'Imports System.xyz
'****************************************************************************
'd$ <summary>
'd$ Purpose: Namespace for lower layers of mass spectrometric
'd$ applications: raw data file handling, descriptive statistics,
'd$ fragment masses, digestion, file associations, etc.
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ <isUnitTest></isUnitTest>
'd$ <applicationname>test_rawDataFileHandling</applicationname>
'd$ <author>Peter Mortensen</author>
'd$ <seealso>http://www.cebi.sdu.dk/</seealso>
'd$ <codetype>PLATFORM independent</codetype>
'd$ </summary>
Namespace massSpectrometryBase
'Changed PM_RETCORR_ANYTIME 2006-10-26
Public Enum retentionTimeTypeEnum
enumMSMSevent = 331
enumLCprofileCentroid
enumLCprofileCentroidWithFallBackToMSMS
End Enum 'retentionTimeTypeEnum
Public Structure peptideCorrStruct
Dim AAsequence4 As String
Dim corr_rawFileId As Integer
'Changed PM_REFACTOR 2006-03-15
'Dim peptIndex As Integer 'Index/ID into some other datastructure
'' to find out more about a peptide.
Dim peptideRef As Integer 'ID into some other datastructure
' to find out more about a peptide.
Dim useForCorrelation As Boolean
'Changed PM_CORR_RETENTIONTIME_DESTINCT_CHARGEANDMASS 2003-09-24
Dim charge3 As Integer
Dim mass3 As Double
'Changed PM_UNIQUEMODS_FOR_RETCORR 2006-10-22
Dim modsHashValue As Integer 'To be able to easier sort on modification set.
'Mostly for diagnostics/debugging.
Dim query3 As Integer
End Structure 'peptideCorrStruct
Public Structure mapCoordinatesStruct
Dim rawFileIDindex As Integer
Dim uniquePeptideIndex As Integer
Dim useForCorrelation As Boolean
End Structure 'mapCoordinatesStruct
Public Structure coorMapcolumnDescriptorStruct
Dim proteinIndex As Integer
Dim peptideUsedForCorrelation As Boolean
End Structure 'coorMapcolumnDescriptorStruct
'Needed?
'Public Structure rawFilePropertiesStruct
' Dim peptides As Integer 'How many of the union of peptides are
' ' in this raw file.
' Dim someOTher As Integer
'End Structure
Public Structure retCorrDataPointStruct
Dim xRetentionTimeSecs As Double
Dim yRetentionTimeSecs As Double
Dim isOutLier As Boolean
End Structure 'retCorrDataPointStruct
Public Structure retentionTimeCalibrationStruct
Dim A As Double 'Slope
Dim B As Double 'Offset
End Structure 'retentionTimeCalibrationStruct
Public Structure peptideToBeAddedInfoStruct
Dim proteinIndex As Integer 'Index into the protein list at
' the client side.
Dim calibratedRetentionTimeSecs As Double 'Computed from a retention
' time from a peptide from another raw file.
Dim pepSequence As String
'Changed PM_SHORT_EXPORT 2005-12-01
'What does PDBA mean??
Dim PDBA_leftFlankAA As Int16
Dim PDBA_rightFlankAA As Int16
Dim measuredMCR As Double
Dim measuredMass As Double
Dim calculatedMass As Double
Dim charge As Integer
'Changed PM_BADPEAKREASON_BUG 2003-09-26
Dim goodMSpeak As Boolean
Dim badPeakReason As String
'What does PDBA mean??
Dim PDBA_rawFileID As Integer
Dim modHits4 As List(Of modificationCountStruct)
'Changed PM_CORR_CHECKEDINHERIT_FOR_INSERTED 2003-10-08
Dim peptVerified As Boolean
'Changed PM_FAST_SERIALISATION_BUG_SAVE_INSERTED_PEPTIDES 2007-01-08
Dim uncalibratedSILACmasses4 As List(Of Double)
'Changed PM_FAST_SERIALISATION_BUG_LOAD_INSERTED_PEPTIDES 2007-01-08
Dim version2 As Integer 'Same as in PeptideHitStructure.
End Structure 'peptideToBeAddedInfoStruct
'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-11
Public Structure corrResultStruct
Dim correlationSuccesful As Boolean
Dim reportStr As String
Dim failureCount As Integer
Dim totalCount As Integer 'Same as number of raw files
End Structure 'corrResultStruct
Public Structure statsStruct 'To collect information/statistics about the
' operations in the classs: how many peptides were used for
' correlation, etc.
'Future for these: median error value,
'Note: it may no longer be the first, determined by FILEID_FOR_SAMPLECORR....
Dim pointsUsedForCorreltation_SomeRawFile_Initial As Integer 'E.g. 94
Dim pointsUsedForCorreltation_SomeRawFile_AfterOutLierRemoval As Integer 'E.g. 73
Dim pointsUsedForCorreltation_SomeRawFile_Final As Integer 'E.g. 36
Dim peptidesToBeAdded As Integer
Dim peptides As Integer 'Number of non NOPEPTIDE_CODE in our 2D map. E.g. 336
Dim uniquePeptides As Integer 'Number of columns in our 2D map
Dim uniquePeptides_UsedForCorrelation As Integer 'Number of columns in
' our 2D map with 2 or more non NOPEPTIDE_CODE.
Dim rawFiles As Integer 'E.g. 3
Dim duplicatesNotInserted As Integer
'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
Dim massesResetToWildType As Integer
Dim addedWithAResetMass As Integer
'Changed PM_NEGATIVE_RETENTION_TIMES 2003-10-16
Dim negativeRetentionTimes As Integer
End Structure 'statsStruct
'Changed PM_CORRELATION_DIALOG 2003-10-21
<Serializable()> _
Public Structure correlationSettingsStructure
Dim doNotInsertNewPeptides As Boolean
Dim insertPeptidesInLCTimeRange As Boolean
Dim startLCtimeForInsertion_Secs As Double
Dim endLCtimeForInsertion_Secs As Double
'Changed PM_LCPEAK_WINDOW_USERDEFINED 2003-12-18
Dim leftLCwindowSecs As Double
Dim rightLCwindowSecs As Double
'Changed PM_LCPEAKDETECTION_OPTIONS 2006-06-16
Dim SILACdishForLCpeakDetection As Integer
Dim useHighestLCprofileForLCpeakDetection As Boolean 'If false
' field SILACdishForLCpeakDetection decides which LC profile
' to use - "fixed" option in the user interface.
End Structure 'correlationSettingsStructure
'****************************************************************************
'* SUBROUTINE NAME: SortBySequenceComparer *
'd$ <summary> N/A. ...
'd$ Note: ascending sort </summary>
Class SortBySequenceComparer
'Implements IComparer
Implements System.Collections.Generic.IComparer(Of peptideCorrStruct)
Function Compare( _
ByVal aItem1 As peptideCorrStruct, ByVal aItem2 As peptideCorrStruct) _
As Integer _
Implements _
System.Collections.Generic.IComparer(Of peptideCorrStruct).Compare
Dim toReturn As Integer = 0
If aItem1.AAsequence4 < aItem2.AAsequence4 Then
toReturn = -1
Else
If aItem1.AAsequence4 > aItem2.AAsequence4 Then
toReturn = 1
Else
If aItem1.modsHashValue < aItem2.modsHashValue Then
toReturn = -1
Else
If aItem1.modsHashValue > aItem2.modsHashValue Then
toReturn = 1
Else
'Same modifications, use second key: charge
If aItem1.charge3 < aItem2.charge3 Then
toReturn = -1
Else
If aItem1.charge3 > aItem2.charge3 Then
toReturn = 1
Else
'Also same charge, use third key: mass
If aItem1.mass3 < aItem2.mass3 Then
toReturn = -1
Else
If aItem1.mass3 > aItem2.mass3 Then
toReturn = 1
Else
'All three keys equal....
Dim peter9 As Integer = 9
End If 'Mass
End If 'Mass
End If 'Charge
End If 'Charge
End If 'Modifications
End If 'Modifications
End If 'Sequence
End If 'Sequence
Return toReturn
End Function 'Compare
End Class 'SortBySequenceComparer
'****************************************************************************
'd$ <summary>
'd$ Purpose: general description
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ <applicationname>test_rawDataFileHandling</applicationname>
'd$ <author>Peter Mortensen</author>
'd$ <seealso>http://www.cebi.sdu.dk/</seealso>
'd$ <codetype>PLATFORM independent / GUI</codetype>
'd$ </summary>
Public NotInheritable Class retentionTimeCorrelator
Private PEPTIDE_UNIT_DISTANCE As Double = 1663.75 / 1663 'About 1.000451
'Move somewhere more general.
Private mPeptideList2 As List(Of PeptideHitStructure) 'Long list
' of peptides from many proteins for retention time correlation.
' Indexes into this list are used in other data structures.
'List of PeptideHitStructure objects.
'Parallel to mPeptideList
'Changed PM_TYPESAFE 2008-11-27
'Private mPeptideList_mapCoordinates As ArrayList 'Type: mapCoordinatesStruct
Private mPeptideList_mapCoordinates2 As List(Of mapCoordinatesStruct)
Private mUniquePeptideIndexCounter As Integer
Private m2D_array As Integer(,) 'Better name wanted! Each rows is for a
' raw file. Each column is for a peptide identified in 2 or more
' raw files. Sample: 3 rows and 1225 columns.
'Changed PM_TYPESAFE 2008-11-27
'Private mProteinIndex_array As ArrayList 'Type: coorMapcolumnDescriptorStruct
Private mProteinIndex_array2 As List(Of coorMapcolumnDescriptorStruct)
'Changed PM_CORR_ADD_EVEN_MORE_PEPTIDES 2003-09-25
'These 3 are parallel to the 3 above.
'Changed PM_TYPESAFE 2006-10-25
'Dim mExtraPeptidesList As ArrayList
Private mExtraPeptidesList2 As List(Of PeptideHitStructure) 'Long
' list of peptides from many proteins. For inserting extra peptides
' from next best score peptides, those with different charge and a
' lower score than the best score peptide (for a particular raw
' file). Indexes into this list are used in other data structures.
'List of PeptideHitStructure objects.
'Parallel to mExtraPeptidesList
'Changed PM_TYPESAFE 2008-11-27
'Private mExtraPeptidesList_mapCoordinates As ArrayList 'Type: mapCoordinatesStruct
Private mExtraPeptidesList_mapCoordinates2 As List(Of mapCoordinatesStruct)
Private mExtraPeptidesIndexCounter As Integer
Private mExtra2D_array As Integer(,) 'Better name wanted!
'Changed PM_TYPESAFE 2008-11-27
'Private mProteinIndex_ExtraArray As ArrayList 'Type: coorMapcolumnDescriptorStruct
Private mProteinIndex_ExtraArray2 As List(Of coorMapcolumnDescriptorStruct)
'Changed PM_TYPESAFE 2008-11-27
'Private mRawFile2IndexHash As Hashtable
Private mRawFile2IndexHash2 As Dictionary(Of Integer, Integer)
'Private mRawFile2IndexHash_Reverse As Hashtable
Private mRawFile2IndexHash_Reverse2 As Dictionary(Of Integer, Integer)
Private mCurrentRawFileIndex As Integer
Private mReferenceRawFileIndex As Integer
'Changed PM_TYPESAFE 2008-11-27
'Private mRetentionTimeCalibrations As ArrayList 'Type is retentionTimeCalibrationStruct.
Private mRetentionTimeCalibrations2 As List(Of retentionTimeCalibrationStruct)
'Changed PM_TYPESAFE 2006-10-25
'Dim mPeptidesToBeAdded As ArrayList 'Type is peptideToBeAddedInfoStruct.
Private mPeptidesToBeAdded2 As List(Of peptideToBeAddedInfoStruct)
Private mStats As statsStruct
Private mCorrReport As System.Text.StringBuilder
Private NOPEPTIDE_CODE As Integer = -2300000
Private FILEID_FOR_SAMPLECORR As Integer = 1 'Now second file, because the reference ID is
' now 0 in the standard example...
'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
Private mCorrFailureReasonArray() As String
'Changed PM_INSERTED_FALSENEGATIVE 2004-11-25
Private mMaxDiffIsoAbs As Double = -1.0E+20 'Only for diagnostics, can be removed later.
'Changed PM_RETCORR_ANYTIME 2006-10-26
Private mParsingCompletedCount As Integer
'Changed PM_RETCORR_LCCENTROID 2006-10-26
Private mRetentionTimeType As retentionTimeTypeEnum
'****************************************************************************
'* SUBROUTINE NAME: New *
'd$ <summary>Constructor</summary>
Public Sub New(ByVal aRetentionTimeType As retentionTimeTypeEnum)
MyBase.New() 'Is this necessary? Yes!
'InitializeComponent() Is this necessary??
'Changed PM_RETCORR_LCCENTROID 2006-10-26
mRetentionTimeType = aRetentionTimeType
'Changed PM_TYPESAFE 2006-10-25
'mPeptideList = New ArrayList
mPeptideList2 = New List(Of PeptideHitStructure)
mPeptideList_mapCoordinates2 = New List(Of mapCoordinatesStruct)
mUniquePeptideIndexCounter = 0
mProteinIndex_array2 = New List(Of coorMapcolumnDescriptorStruct)
'Changed PM_TYPESAFE 2006-10-25
'mExtraPeptidesList = New ArrayList
mExtraPeptidesList2 = New List(Of PeptideHitStructure)
mExtraPeptidesList_mapCoordinates2 = New List(Of mapCoordinatesStruct)
mExtraPeptidesIndexCounter = 0
mProteinIndex_ExtraArray2 = New List(Of coorMapcolumnDescriptorStruct)
mRawFile2IndexHash2 = New Dictionary(Of Integer, Integer)
mRawFile2IndexHash_Reverse2 = New Dictionary(Of Integer, Integer)
mCurrentRawFileIndex = 0
mReferenceRawFileIndex = -1 'Meaning undefined. Useful for error detection.
mRetentionTimeCalibrations2 = New List(Of retentionTimeCalibrationStruct)
'Changed PM_TYPESAFE 2006-10-25
'mPeptidesToBeAdded = New ArrayList
mPeptidesToBeAdded2 = New List(Of peptideToBeAddedInfoStruct)
mCorrReport = New System.Text.StringBuilder(3000)
mStats.duplicatesNotInserted = 0
'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
mStats.massesResetToWildType = 0
mStats.addedWithAResetMass = 0
'Changed PM_NEGATIVE_RETENTION_TIMES 2003-10-16
mStats.negativeRetentionTimes = 0
'Changed PM_RETCORR_ANYTIME 2006-10-26
mParsingCompletedCount = 0
End Sub 'New()
'****************************************************************************
'* <placeholder for header> *
'****************************************************************************
Public Function insertedPeptides() As Integer
Return mPeptideList2.Count
End Function 'insertedPeptides
'****************************************************************************
'* SUBROUTINE NAME: extractXYarrays *
'd$ <summary>
'd$ Purpose: helper function for ParsingCompleted(), putting non-outliers
'd$ into arrays for subsequent linear regression.
'd$
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ </summary>
'd$ <param name="RemoveMode">
'd$ Parameter of type <see cref="T:System.Object" />
'd$ </param>
'd$ <param name="aCount">
'd$ Parameter of type <see cref="T:Extensibility.ex_ConnectMode" />.
'd$ </param>
'd$ <remarks>
'd$ <para>
'd$ </para>
'd$ <para>
'd$ </para>
'd$ <seealso cref="E:EnvDTE.BuildEvents.OnBuildDone" /> event. This
'd$ </remarks>
Private Shared Sub extractXYarrays( _
ByRef aInCoorPointList2 As List(Of retCorrDataPointStruct), _
ByRef anOutXarray() As Double, ByRef anOutYarray() As Double)
Dim lastIndex As Integer = aInCoorPointList2.Count() - 1
ReDim anOutXarray(lastIndex) 'So it is big enough
ReDim anOutYarray(lastIndex) 'So it is big enough
Dim n As Integer
Dim arrayIndex As Integer = 0
For n = 0 To lastIndex
Dim curItem As retCorrDataPointStruct = aInCoorPointList2(n)
If Not curItem.isOutLier Then
anOutXarray(arrayIndex) = curItem.xRetentionTimeSecs
anOutYarray(arrayIndex) = curItem.yRetentionTimeSecs
arrayIndex += 1
Else
Dim peter81 As Integer = 81
End If
Next
Dim lastArrayIndex As Integer = arrayIndex - 1
ReDim Preserve anOutXarray(lastArrayIndex)
ReDim Preserve anOutYarray(lastArrayIndex)
End Sub 'extractXYarrays
'****************************************************************************
'* SUBROUTINE NAME: markOutLiers *
'd$ <summary>
'd$ Purpose: xyz.
'd$
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ </summary>
'd$ <param name="RemoveMode">
'd$ Parameter of type <see cref="T:System.Object" />
'd$ </param>
'd$ <param name="aCount">
'd$ Parameter of type <see cref="T:Extensibility.ex_ConnectMode" />.
'd$ </param>
'd$ <remarks>
'd$ <para>
'd$ </para>
'd$ <para>
'd$ </para>
'd$ <seealso cref="E:EnvDTE.BuildEvents.OnBuildDone" /> event. This
'd$ </remarks>
Private Shared Sub markOutLiers( _
ByRef anInYErrors() As Double, _
ByRef aInOutCoorPointList2 As List(Of retCorrDataPointStruct), _
ByVal anOutLierLimit As Double)
'Old:
' ByRef aInOutCoorPointList As ArrayList
'
Dim arrayIndex As Integer = 0
Dim lastIndex As Integer = aInOutCoorPointList2.Count() - 1
Dim n As Integer
For n = 0 To lastIndex
Dim curItem As retCorrDataPointStruct = _
aInOutCoorPointList2(n)
If Not curItem.isOutLier Then
Dim errY As Double = anInYErrors(arrayIndex)
Dim absErr As Double = Math.Abs(errY)
If absErr >= anOutLierLimit Then 'E.g. +/- 40 seconds
curItem.isOutLier = True
aInOutCoorPointList2(n) = curItem 'Write-back.
End If
arrayIndex += 1
Else
Dim peter43 As Integer = 43
End If
Next
Dim arraySize As Integer = anInYErrors.Length
'Dim arraySize As Integer = 7
Trace.Assert(arraySize = arrayIndex, _
"PIL ASSERT. arraySize different arrayIndex.")
End Sub 'markOutLiers
'Changed PM_REFACTOR 2003-09-25
'****************************************************************************
'* SUBROUTINE NAME: fillIn2Dmap *
'd$ <summary>
'd$ Purpose: xyz.
'd$
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ </summary>
Private Sub fillIn2Dmap( _
ByRef anInPeptideList_mapCoordinates2 As List(Of mapCoordinatesStruct), _
ByRef anInPeptideList2 As List(Of PeptideHitStructure), _
ByVal aLastFileIDindex As Integer, ByVal aLastPeptideindex As Integer, _
ByRef anOut_m2D_array As Integer(,))
'Old:
' ByRef anInPeptideList As ArrayList
ReDim anOut_m2D_array(aLastFileIDindex, aLastPeptideindex) 'Note: for
' ReDim it is last index and not size.....
Dim coordinateforZeroValue As mapCoordinatesStruct
Dim lastIndex_PeptideList As Integer = anInPeptideList2.Count - 1
Dim j As Integer
For j = 0 To lastIndex_PeptideList
Dim curCoor As mapCoordinatesStruct = _
anInPeptideList_mapCoordinates2(j)
Dim curPep As PeptideHitStructure = anInPeptideList2(j)
If j = 0 Then
coordinateforZeroValue = curCoor
End If
'Encode as negative index: peptides that do not have sufficient
'high(score) to qualify for use in correlation.
Dim peptideIndex2 As Integer = j
If Not curCoor.useForCorrelation Then
peptideIndex2 = -peptideIndex2
End If
anOut_m2D_array( _
curCoor.rawFileIDindex, curCoor.uniquePeptideIndex) = peptideIndex2
Next
If True Then 'Adjust zeros to NOPEPTIDE_CODE in the 2D entire table.
Dim peptIndex2 As Integer
For peptIndex2 = 0 To aLastPeptideindex
Dim fileID2 As Integer
For fileID2 = 0 To aLastFileIDindex
Dim curIndex As Integer = anOut_m2D_array(fileID2, peptIndex2)
'Note: (0,0) is always index 0 as the raw file ID is
' the first that is encountered in the Mascot result file.
If peptIndex2 = coordinateforZeroValue.uniquePeptideIndex AndAlso _
fileID2 = coordinateforZeroValue.rawFileIDindex Then
Dim peter89 As Integer = 89
Else
If curIndex = 0 Then
'A blank: fill in a marker for it. We encode blank as NOPEPTIDE_CODE.
anOut_m2D_array(fileID2, peptIndex2) = NOPEPTIDE_CODE
End If
End If
Next 'Through raw file, rows.
Next 'Through peptides
End If
End Sub 'fillIn2Dmap
'Changed PM_REFACTOR 2006-10-26
'****************************************************************************
'* <placeholder for header> *
'****************************************************************************
Private Function getEffectiveRetentionTime_Secs( _
ByRef anInPept As PeptideHitStructure) _
As Double
Dim toReturn As Double = -7777.7777
'What about this field:
' LCpeakDetectionTimeCentroidSeconds
Select Case mRetentionTimeType
Case retentionTimeTypeEnum.enumMSMSevent
toReturn = 60.0 * anInPept.MSMSretentionTimeMinutes
Case retentionTimeTypeEnum.enumLCprofileCentroid
toReturn = anInPept.retentionTimeCentroid_secs
Case retentionTimeTypeEnum.enumLCprofileCentroidWithFallBackToMSMS
If anInPept.retentionTimeCentroid_secs > 0.001 Then
toReturn = anInPept.retentionTimeCentroid_secs
Else
'Fall-back
toReturn = 60.0 * anInPept.MSMSretentionTimeMinutes
End If
Case Else
Trace.Assert(False, "PIL ASSERT. Select Case never fall-through")
End Select
Return toReturn
End Function 'getEffectiveRetentionTime_Secs
'Changed PM_REFACTOR 2003-09-25
'****************************************************************************
'* SUBROUTINE NAME: insertNewPeptides *
'd$ <summary>
'd$ Purpose: xyz.
'd$
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ </summary>
Private Sub insertNewPeptides( _
ByRef aIn2D_array As Integer(,), ByVal aFileID As Integer, _
ByVal aPeptIndex As Integer, _
ByRef anInPeptideList2 As List(Of PeptideHitStructure), _
ByVal aLastFileIDindex As Integer, ByVal aProteinIndex As Integer, _
ByRef anInRetentionTimeCalibrations2 As List(Of retentionTimeCalibrationStruct), _
ByRef anInOutAlreadyInsertedHash2 As Dictionary(Of String, Integer), _
ByRef anInOutPeptidesToBeAdded2 As List(Of peptideToBeAddedInfoStruct) _
)
'Old:
' ByRef anInPeptideList As ArrayList
' ByRef anInOutPeptidesToBeAdded As ArrayList
Dim curIndex As Integer = aIn2D_array(aFileID, aPeptIndex)
'Changed PM_BAD_INSERT 2003-11-07
'Changed PM_BAD_INSERT 2003-10-29. Window made wider
'to accomodate double inserts for some measured values
'that differed by approx. 0.1 Da.
'Dim keyWindowSize As Double = PEPTIDE_UNIT_DISTANCE / 5
Dim keyWindowSize2 As Double = PEPTIDE_UNIT_DISTANCE / 1
Dim keyHalfWindowSize As Double = 0.5 * keyWindowSize2
'Changed PM_CORR_RETENTIONTIME_DESTINCT_CHARGEANDMASS 2003-09-24
'Note: even if the place in our map is occopied with a peptide
' we will still insert a new peptide if there is no other
' peptide in one of the other raw files with the same mass
' and charge.
If curIndex = NOPEPTIDE_CODE Then
'Nothing to do. Instead of look for NOPEPTIDE_CODE we now
'look at each defined and loop through the other files.
Dim peter18 As Integer = 18
Else
'For all defined peptides we insert new peptides at all other
'raw files; at empty places and for non-identical peptides.
If curIndex < 0 Then
curIndex = -curIndex
End If
Dim pepToClone As PeptideHitStructure = _
anInPeptideList2(curIndex)
'Changed PM_INSERTION_OF_Z_PEPTIDES 2004-06-30
Trace.Assert(pepToClone.AASequence <> _
peptideConstants.SEQUENCE_BADPEPTIDE, _
"PIL ASSERT. Bad peptide to be inserted: " & _
pepToClone.AASequence)
Dim fileID2 As Integer
For fileID2 = 0 To aLastFileIDindex
If fileID2 <> aFileID Then
'Changed PM_INSERTPEPTIDES_ASSERT 2005-07-29
'Dim pepToCloneMeasMass_Wild As Double = _
' (pepToClone.lowerMZuncalib - PROTON_MASS) * pepToClone.charge
Dim lowerMass_Clone As Double = _
PeptideHitStructure.getLowerMCRuncalib(pepToClone)
'Changed PM_REFACTOR 2008-05-20
'Dim pepToCloneMeasMass_Wild As Double = _
' (lowerMass_Clone - MSconstants.PROTON_MASS) * _
' pepToClone.charge
Dim pepToCloneMeasMass_Wild As Double = _
PILmassCalc.chargeTransform(lowerMass_Clone, pepToClone.charge, 0)
Trace.Assert( _
lowerMass_Clone > 20.0, _
"PIL ASSERT. Unreasonable mass for " & _
"pepToClone.lowerMZuncalib: " & _
lowerMass_Clone)
'Changed PM_BAD_INSERT 2003-10-29
Dim pepToCloneBaseMass As Double = _
pepToClone.calculatedMassNoMods 'Only for use in
' key (to not insert several peptides with nearly
' the same mass.
Dim wildTypeReset As Boolean = False
Dim insertNewPeptide As Boolean = True
Dim curIndex2 As Integer = _
aIn2D_array(fileID2, aPeptIndex)
If curIndex2 = NOPEPTIDE_CODE Then
Dim peter19 As Integer = 19
Else
If curIndex2 < 0 Then
curIndex2 = -curIndex2
End If
Dim curPept As PeptideHitStructure = _
anInPeptideList2(curIndex2)
Dim curLowerMCR As Double = _
PeptideHitStructure.getLowerMCRuncalib(curPept)
'Changed PM_REFACTOR 2008-05-20
'Dim curPeptMeasMass_Wild As Double = _
' (curLowerMCR - MSconstants.PROTON_MASS) * _
' pepToClone.charge
Dim curPeptMeasMass_Wild2 As Double = _
PILmassCalc.chargeTransform( _
curLowerMCR, pepToClone.charge, 0)
'Test
If True Then
'Should not be equal to 0.0 and also should be
'close to pepToClone.measuredMW.
Trace.Assert(lowerMass_Clone > 10.0, _
"PIL ASSERT. pepToClone.lowerMZuncalib has " & _
"not been set.")
Dim diffIso As Double = _
pepToClone.measuredMass - _
pepToCloneMeasMass_Wild
Dim diffIsoAbs As Double = Math.Abs(diffIso)
If diffIsoAbs > 0.2 Then
wildTypeReset = True
mStats.massesResetToWildType += 1
'Changed PM_INSERTED_FALSENEGATIVE 2004-11-25
If diffIsoAbs > mMaxDiffIsoAbs Then
mMaxDiffIsoAbs = diffIsoAbs
End If
End If
Trace.Assert(diffIsoAbs < 100.0, _
"PIL ASSERT. diffIsoAbs < 100.0.")
End If
Dim cloneCharge As Integer = pepToClone.charge
'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
'Dim cloneMass As Double = pepToClone.measuredMW
Dim cloneMass As Double = pepToCloneMeasMass_Wild
'Check if the peptide is identical, otherwise insert
'a new peptide
Dim curCharge As Integer = curPept.charge
'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
'Dim curMass As Double = curPept.measuredMW
Dim curMass As Double = curPeptMeasMass_Wild2
Dim massDiff As Double = curMass - cloneMass
Dim absMassDiff As Double = Math.Abs(massDiff)
Dim lowMassDifference As Boolean = absMassDiff < 0.2
If cloneCharge = curCharge AndAlso _
lowMassDifference Then
'Identical. Do not insert a new peptide.
insertNewPeptide = False
Else
Dim peter94 As Integer = 94
End If
End If 'curIndex2 = NOPEPTIDE_CODE
Dim rawFileIDforNewPeptide As Integer = _
mRawFile2IndexHash_Reverse2(fileID2)
If insertNewPeptide Then
'For a particular raw file: prevent inserting several
'peptides that are essential the same, except for
'small differences in mass.
'Changed PM_BAD_INSERT 2003-10-29
''Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
''dim massToUse as double = pepToClone.measuredMW
'Dim massToUse As Double = pepToCloneMeasMass_Wild
Dim massToUse As Double = _
pepToCloneMeasMass_Wild - pepToCloneBaseMass + _
keyHalfWindowSize
'1. cint, round.
'2. Do NOT use absolute mass, use mass diff from some
' close by peptide mass, e.g. theoretical mass.
'3. Use calibrated measured value.
'4. Use unmodified mass.
'Changed PM_BAD_INSERT 2003-10-29. CInt round!!!! and the
'mass we use should be centered on the expected peptide
'mass at our integer mass.
'Dim massStr As String = _
' CInt(5.0 * massToUse + 0.5).ToString '5.0 is to make
'' all masses within a 0.2 Da window appear the same.
'' Future: perhaps we should divide the mass
'' by 1.002323 (or similar).
Dim massInt As Integer = _
CInt(massToUse / keyWindowSize2 - 0.5) + 100
'-0.5 to have truncking, not round. 100 to avoid
'negative values.
Trace.Assert(massInt > 0, _
"PIL ASSERT. Bad mass values, base mass: " & _
pepToCloneBaseMass & " Da, measured (wild): " & _
pepToCloneMeasMass_Wild & " Da.")
' All masses within a 1.0 Da window appear the same.
Dim keySB As StringBuilder = New StringBuilder(32)
keySB.Append(pepToClone.AASequence)
keySB.Append("_")
keySB.Append(rawFileIDforNewPeptide)
keySB.Append("_")
keySB.Append(pepToClone.charge)
keySB.Append("_")
keySB.Append(massInt)
If anInOutAlreadyInsertedHash2.ContainsKey( _
keySB.ToString) Then
insertNewPeptide = False
mStats.duplicatesNotInserted += 1
Else
anInOutAlreadyInsertedHash2.Add( _
keySB.ToString, 1)
End If
End If
If insertNewPeptide Then
Dim newPep As peptideToBeAddedInfoStruct
newPep.badPeakReason = Nothing 'Keep compiler happy.
newPep.modHits4 = Nothing 'Keep compiler happy.
newPep.uncalibratedSILACmasses4 = Nothing 'Keep compiler happy.
newPep.pepSequence = Nothing 'Keep compiler happy.
newPep.proteinIndex = aProteinIndex
'Changed PM_RETCORR_LCCENTROID 2006-10-26
'Dim retentionTimeForDefinedPeptide As Double = _
' 60.0 * pepToClone.MSMSretentionTimeMinutes
Dim retentionTimeForDefinedPeptide As Double = _
getEffectiveRetentionTime_Secs(pepToClone)
Dim definedPeptideCalib As retentionTimeCalibrationStruct = _
anInRetentionTimeCalibrations2(aFileID)
Dim curCalib As retentionTimeCalibrationStruct = _
anInRetentionTimeCalibrations2(fileID2)
'First transform to reference raw files retention
'time scale and then to the new peptide's raw file
'retention time scale.
Dim refRetTime As Double = _
(retentionTimeForDefinedPeptide - definedPeptideCalib.B) / _
definedPeptideCalib.A
Dim retTimeForNewPeptide As Double = _
curCalib.A * refRetTime + curCalib.B
'Changed PM_NEGATIVE_RETENTION_TIMES 2003-10-16
If retTimeForNewPeptide < 0.5 Then
'If predicted retention time is negative (or low
'positive) set it to some safe value that will
'also flag for later quantitation that the
'quantitation result should be set to zero.
retTimeForNewPeptide = 9.9
mStats.negativeRetentionTimes += 1
End If
newPep.calibratedRetentionTimeSecs = _
retTimeForNewPeptide
newPep.PDBA_rawFileID = rawFileIDforNewPeptide
'Note: when new fields are added: also add
' in Parse()/MascotResultParser.vb.
If True Then 'Copy fields from the identified peptide.
newPep.pepSequence = pepToClone.AASequence
'Changed PM_SHORT_EXPORT 2005-12-01
newPep.PDBA_leftFlankAA = pepToClone.leftFlankAA
newPep.PDBA_rightFlankAA = pepToClone.rightFlankAA
newPep.measuredMCR = pepToClone.measuredMCR
'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
'newPep.measuredMass = pepToClone.measuredMW
newPep.measuredMass = pepToCloneMeasMass_Wild
newPep.calculatedMass = _
pepToClone.MascotCalculatedMass
newPep.charge = pepToClone.charge
'Changed PM_BADPEAKREASON_BUG 2003-09-26
newPep.goodMSpeak = pepToClone.goodMSpeak
newPep.badPeakReason = pepToClone.badPeakReason
'Changed PM_GENERALISED_QUANT_MODE 2003-12-08
''Changed PM_CORR_MODS_FOR_INSERTED 2003-10-03
'newPep.modHit = pepToClone.modHit
newPep.modHits4 = pepToClone.modHits2 'Should we
' deep copy?
If wildTypeReset Then
'Changed PM_REFACTOR_INSERT_NATIVEPEPTIDE 2003-10-15
mStats.addedWithAResetMass += 1
MascotResultParser.resetModFields( _
newPep.modHits4)
Else
Dim peter2 As Integer = 2
End If
'Changed PM_CORR_CHECKEDINHERIT_FOR_INSERTED 2003-10-08
newPep.peptVerified = pepToClone.verified
'Changed PM_FAST_SERIALISATION_BUG_SAVE_INSERTED_PEPTIDES 2007-01-08
newPep.uncalibratedSILACmasses4 = pepToClone.uncalibratedSILACmasses2
'Changed PM_ELIMINATE_PEPTIDEFIELD 2008-11-25. Delete at any time.
''Changed PM_FAST_SERIALISATION_BUG_LOAD_INSERTED_PEPTIDES 2007-01-08
'newPep.version2 = pepToClone.version
End If
Trace.Assert(newPep.PDBA_leftFlankAA <> 0, _
"PIL ASSERT. PDBA_leftFlankAA is 0 for peptide " & _
newPep.pepSequence & _
". Excel does not like that.")
Trace.Assert(newPep.PDBA_rightFlankAA <> 0, _
"PIL ASSERT. PDBA_rightFlankAA is 0 for peptide " & _
newPep.pepSequence & _
". Excel does not like that.")
anInOutPeptidesToBeAdded2.Add(newPep)
End If
End If 'No self compare
Next 'Inner loop through raw files.
End If
End Sub 'insertNewPeptides
'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
'****************************************************************************
'* SUBROUTINE NAME: correlationFailure *
'd$ <summary>
'd$ Purpose: helper function to do reporting, etc. in case of a
'd$ retention time correlation failure for a raw file pair.
'd$
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ </summary>
'd$ <param name="xyz">
'd$ Parameter of type <see cref="T:System.Object" />
'd$ </param>
'd$ <param name="abc">
'd$ Parameter of type <see cref="T:Extensibility.ex_ConnectMode" />.
'd$ </param>
'd$ <remarks>
'd$ <para>
'd$ </para>
'd$ <para>
'd$ </para>
'd$ <seealso cref="E:EnvDTE.BuildEvents.OnBuildDone" /> event. This
'd$ </remarks>
Private Sub correlationFailure( _
ByVal anInFileID As Integer, _
ByRef aInxVals As Double(), _
ByRef aInyVals As Double(), _
ByVal aInFailureReasonString As String, _
ByRef anOutUsableCorrelationForPair As Boolean)
anOutUsableCorrelationForPair = False
Dim correlationFailureReason As String = aInFailureReasonString
mCorrFailureReasonArray(anInFileID) = correlationFailureReason
Dim dumpHeader As String = _
"Correlation failed (" & correlationFailureReason & _
"). Original data (ignore column ""yErrors""):" & _
ControlChars.NewLine
SDUPstatistics.dumpXandYandYerrors(dumpHeader, _
aInxVals, -1.0, 0.0, aInyVals, Nothing, _
"[secs]", "[secs]", mCorrReport)
End Sub 'correlationFailure
'****************************************************************************
'* <placeholder for header> *
'****************************************************************************
Private Function cloneCorrList( _
ByRef aInList As List(Of retCorrDataPointStruct) _
) _
As List(Of retCorrDataPointStruct)
'Not tested!!!!
Dim len As Integer = aInList.Count()
Dim toReturn As List(Of retCorrDataPointStruct) = _
New List(Of retCorrDataPointStruct)(len)
Dim someItem As retCorrDataPointStruct
For Each someItem In aInList
toReturn.Add(someItem) 'Works because we only have
' value types in retCorrDataPointStruct.
Next 'Through aInList
Return toReturn
End Function 'cloneCorrList
'****************************************************************************
'* SUBROUTINE NAME: ParsingCompleted *
'd$ <summary>
'd$ Purpose: signal from the client that parsing the done for
'd$ a Mascot result. We can do
'd$ the retention time correlation etc. or we can be lazy and
'd$
'd$ wait until it is needed.
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ </summary>
'd$ <param name="RemoveMode">
'd$ Parameter of type <see cref="T:System.Object" />
'd$ </param>
'd$ <param name="aCount">
'd$ Parameter of type <see cref="T:Extensibility.ex_ConnectMode" />.
'd$ </param>
'd$ <remarks>
'd$ <para>
'd$ </para>
'd$ <para>
'd$ </para>
'd$ <seealso cref="E:EnvDTE.BuildEvents.OnBuildDone" /> event. This
'd$ </remarks>
Public Sub ParsingCompleted( _
ByRef anInRawFilesMap() As massSpectrometryBase.fileSpecStructure)
mParsingCompletedCount += 1
'Detect client error.
Trace.Assert( _
mParsingCompletedCount = 1, _
"PIL ASSERT. ParsingCompleted() called more than once!. " & _
"This indicates a client code error.")
Dim totPeptides As Integer = mPeptideList2.Count
mStats.peptides = totPeptides
Dim rowsForMap As Integer = mRawFile2IndexHash2.Count
'Dim columnsForMap As Integer = mPeptideList_mapCoordinates.Count
Dim columnsForMap As Integer = mUniquePeptideIndexCounter
mStats.uniquePeptides = columnsForMap
mStats.rawFiles = rowsForMap
Dim lastFileIDindex As Integer = rowsForMap - 1
Dim lastPeptideindex As Integer = columnsForMap - 1
'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
ReDim mCorrFailureReasonArray(lastFileIDindex)
If True Then 'Fill in our map.
fillIn2Dmap( _
mPeptideList_mapCoordinates2, mPeptideList2, _
lastFileIDindex, lastPeptideindex, _
m2D_array)
'Changed PM_CORR_ADD_EVEN_MORE_PEPTIDES 2003-09-25
fillIn2Dmap( _
mExtraPeptidesList_mapCoordinates2, mExtraPeptidesList2, _
lastFileIDindex, mExtraPeptidesIndexCounter - 1, _
mExtra2D_array)
End If 'Fill in our map.
Dim outlierMap(lastFileIDindex, lastPeptideindex) As Boolean
Dim fileID As Integer
Dim peptIndex As Integer
If True Then 'Find some properties (min/max values of score,
' retention time, ...).
' Also find reference raw file.
Dim minScore2 As Double = 1000000000.0
Dim maxScore2 As Double = -1000000000.0
'Changed PM_TYPESAFE 2008-11-27
'Dim peptidesForCorr As ArrayList = New ArrayList 'Type is Integer
Dim peptidesForCorr2 As List(Of Integer) = New List(Of Integer)
For fileID = 0 To lastFileIDindex
peptidesForCorr2.Add(0)
Next
For peptIndex = 0 To lastPeptideindex
Dim peptidesForCorrelation As Integer = 0
For fileID = 0 To lastFileIDindex
Dim curIndex2 As Integer = m2D_array(fileID, peptIndex)
'What about the negative values, for high scoring
'peptides for correlation????
'Meaning of test: is it a high scoring peptide that can be
' used for retention time correlation (because we are sure
' of the peptides identity) ? There are two other
' possibilities: low negative values are peptides with too
' low scores. Som large negative value means that there is
' no identified peptide - it is going to be inserted at a
' predicted retention time.
If curIndex2 >= 0 Then
peptidesForCorr2(fileID) = _
peptidesForCorr2(fileID) + 1
Dim pept As PeptideHitStructure = _
mPeptideList2(curIndex2)
'Changed PM_MASCOTSCORE_ASDOUBLE 2008-11-25. No
'longer implicit conversion from integer to
'double...
Dim score As Double = pept.MascotScore2
If score < minScore2 Then
minScore2 = score
End If
If score > maxScore2 Then
maxScore2 = score
End If
peptidesForCorrelation += 1
Else
Dim peter7 As Integer = 7 'Low scoring or to be inserted.
End If
Next 'Through raw file, rows.
Dim forCorrelation As Boolean = False
If peptidesForCorrelation >= 2 Then
forCorrelation = True
End If
Dim curColumnDescriptor As coorMapcolumnDescriptorStruct = _
mProteinIndex_array2(peptIndex)
curColumnDescriptor.peptideUsedForCorrelation = forCorrelation
mProteinIndex_array2(peptIndex) = curColumnDescriptor 'Write-back.
Next 'Through peptides
If True Then 'Selects reference raw file to use as a base.
Dim maxVal As Integer = -1
For fileID = 0 To lastFileIDindex
Dim curVal As Integer = peptidesForCorr2(fileID)
If curVal > maxVal Then
maxVal = curVal
mReferenceRawFileIndex = fileID
End If
Next
End If
End If
Dim usableCorrelation As Boolean = True
If True Then 'Finally, find the correlations: mapping between
' the retention times.
'Changed PM_REFACTOR_TROUBLE 2008-12-01. Adapt to old
' behavior. Perhaps it would be better to explicitly
' test mReferenceRawFileIndex for being negative and
' skip most of the rest of this function. E.g. there
' is no point in generating a correlation report for
' single raw file searches.
'
'Why is HashTable apparently different from Dictionary?
'Answer: HashTable accepts keys that do not exist
' whereas Dictionary requires the key to
' exist, otherwise an exception is thrown.
'Trace.Assert(mReferenceRawFileIndex >= 0, _
' "PIL ASSERT. mReferenceRawFileIndex is undefined.")
'refRealFileID = _
' mRawFile2IndexHash_Reverse2(mReferenceRawFileIndex)
Dim refRealFileID As Integer = 0
If mRawFile2IndexHash_Reverse2.TryGetValue( _
mReferenceRawFileIndex, refRealFileID) Then
Dim peter2 As Integer = 2
Else
Dim peter3 As Integer = 3 'Does not exist. Single file.
End If
Dim refFullRawFilePath As String = _
rawDataFileHandling.getFullRawFilePath( _
anInRawFilesMap, refRealFileID)
Dim xValsOrig(columnsForMap) As Double 'Max value, will be
' redimmed later.
Dim yValsOrig(columnsForMap) As Double 'Max value, will be
' redimmed later.
For fileID = 0 To lastFileIDindex
'We must repeat increasing it to columnsForMap because it is
'reduced at the end of the loop...
ReDim xValsOrig(columnsForMap)
ReDim yValsOrig(columnsForMap)
'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
'Now reset for each file to correlate - to let use continue
'with the correlation even if one fails.
Dim usableCorrelationForPair As Boolean = True
Dim pointsToCorrelate As Integer = 0
Dim xySet1 As List(Of retCorrDataPointStruct) = _
New List(Of retCorrDataPointStruct)
Dim xySet2 As List(Of retCorrDataPointStruct) = _
New List(Of retCorrDataPointStruct) 'Result of
' the first attempt at fitting, includes first wave of
' outliers.
Dim currentRealFileID As Integer = _
mRawFile2IndexHash_Reverse2(fileID)
Dim currentFullRawFilePath As String = _
rawDataFileHandling.getFullRawFilePath( _
anInRawFilesMap, currentRealFileID)
mCorrReport.Append( _
"Current raw data file: " & _
vbTab & currentFullRawFilePath & PILInputOutput.LINEEND & _
" Reference raw data file: " & _
vbTab & refFullRawFilePath & PILInputOutput.LINEEND)
For peptIndex = 0 To lastPeptideindex
Dim curIndex As Integer = m2D_array(fileID, peptIndex)
'Meaning of test: is it a high scoring peptide that can be
' used for retention time correlation (because we are sure
' of the peptides identity) ? There are two other
' possibilities: low negative values are peptides with too
' low scores. Som large negative value means that there is
' no identified peptide - it is going to be inserted at a
' predicted retention time.
If curIndex >= 0 Then
Dim curPept As PeptideHitStructure = _
mPeptideList2(curIndex)
'Changed PM_RETCORR_LCCENTROID 2006-10-26
'Dim curRetentionTimeSecs As Double = _
' 60.0 * curPept.MSMSretentionTimeMinutes
Dim curRetentionTimeSecs As Double = _
getEffectiveRetentionTime_Secs(curPept)
Dim refIndex As Integer = _
m2D_array(mReferenceRawFileIndex, peptIndex)
'Meaning of test: is it a high scoring peptide that
' can be used for retention time correlation (because
' we are sure of the peptides identity) ? There are
' two other possibilities: low negative values are
' peptides with too low scores. Som large negative
' value means that there is no identified
' peptide - it is going to be inserted at a
' predicted retention time.
If refIndex >= 0 Then
'For Mascot identified peptides.
Dim refPept As PeptideHitStructure = _
mPeptideList2(refIndex)
'Changed PM_RETCORR_LCCENTROID 2006-10-26
'Dim refRetentionTimeSecs As Double = _
' 60.0 * refPept.MSMSretentionTimeMinutes
Dim refRetentionTimeSecs As Double = _
getEffectiveRetentionTime_Secs(refPept)
Dim somePoint As retCorrDataPointStruct
somePoint.xRetentionTimeSecs = _
refRetentionTimeSecs
somePoint.yRetentionTimeSecs = _
curRetentionTimeSecs
somePoint.isOutLier = False
xySet1.Add(somePoint)
pointsToCorrelate += 1
Else
'Low scoring or to be inserted.
Dim peter8 As Integer = 8
End If
Else
'Low scoring or to be inserted.
Dim peter7 As Integer = 7
End If
Next 'Through peptides
'Changed PM_CORR_RETENTIONTIME_DESTINCT_CHARGEANDMASS 2003-09-24
extractXYarrays(xySet1, xValsOrig, yValsOrig) 'Moved up here
Dim forCorr As Integer = xySet1.Count
If forCorr < 2 Then
'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
Me.correlationFailure( _
fileID, xValsOrig, yValsOrig, _
"Too few initial points for correlation. Count: " & _
forCorr, _
usableCorrelationForPair _
)
End If
If usableCorrelationForPair Then
Dim yErrors() As Double = Nothing 'Keep compiler happy.
Dim minErr2 As Double
Dim maxErr2 As Double
Dim medianErr2 As Double
Dim A1 As Double
Dim B1 As Double
Dim minXUsed As Double
Dim maxXUsed As Double
If True Then 'First attempt at fitting
SDUPstatistics.linearRegression( _
xValsOrig, yValsOrig, A1, B1, _
yErrors, minErr2, maxErr2, medianErr2, _
Nothing, _
minXUsed, maxXUsed) 'Perhaps use the exclude feature and the
' algorithm from SDUPrecalibrator.vb?
If fileID = FILEID_FOR_SAMPLECORR Then 'Now second,
' because the reference ID is now 0 in the
' standard example...
mStats.pointsUsedForCorreltation_SomeRawFile_Initial = _
xValsOrig.Length
End If
End If
Dim A2 As Double
Dim B2 As Double
Dim pointsToCorrelate2 As Integer = 0
Dim xVals2() As Double = Nothing 'Keep compiler happy.
Dim yVals2() As Double = Nothing 'Keep compiler happy.
'Second attempt - loop through to find outliers,
' build new list for final regression.
If True Then
markOutLiers(yErrors, xySet1, 40.0) '+/- 40 seconds.
'Changed PM_TYPESAFE 2007-11-20
'xySet2 = DirectCast(xySet1.Clone(), ArrayList) 'Need a
'' copy here because we don't want to exclude peptides
'' that are in the other half.
xySet2 = Me.cloneCorrList(xySet1) 'For now. Isn't there
' an easier way to clone a list of structures???
extractXYarrays(xySet1, xVals2, yVals2)
Dim forCorr2 As Integer = xVals2.Length
If forCorr2 < 2 Then
'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
Me.correlationFailure( _
fileID, xValsOrig, yValsOrig, _
"Too few points after removing outliers. Count: " & _
forCorr2, _
usableCorrelationForPair _
)
End If
If usableCorrelationForPair Then
SDUPstatistics.linearRegression( _
xVals2, yVals2, A2, B2, _
yErrors, minErr2, maxErr2, medianErr2, _
Nothing, _
minXUsed, maxXUsed) 'Perhaps use the exclude feature and the
' algorithm from SDUPrecalibrator.vb?
If fileID = FILEID_FOR_SAMPLECORR Then
mStats.pointsUsedForCorreltation_SomeRawFile_AfterOutLierRemoval = _
xVals2.Length
End If
End If 'usableCorrelationForPair
End If
If usableCorrelationForPair Then
Dim A3 As Double
Dim B3 As Double
If True Then 'Third attempt: exclude half of the
' datapoints, those with the worst errors.
Dim medianErrorToUse As Double = _
1.001 * medianErr2 + 0.001 'To avoid empty sets
' when e.g. medianErr is 0.0 for the identical
' correlation.
markOutLiers(yErrors, xySet1, medianErrorToUse)
extractXYarrays(xySet1, xVals2, yVals2)
Dim forCorr3 As Integer = xVals2.Length
If forCorr3 < 2 Then
'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
Me.correlationFailure( _
fileID, xValsOrig, yValsOrig, _
"Too few points after retaining " & _
"the best xx %. Count: " & _
forCorr3, _
usableCorrelationForPair)
End If
If usableCorrelationForPair Then
SDUPstatistics.linearRegression( _
xVals2, yVals2, A3, B3, _
yErrors, minErr2, maxErr2, medianErr2, _
Nothing, _
minXUsed, maxXUsed) 'Perhaps use the exclude feature and the
' algorithm from SDUPrecalibrator.vb?)
End If 'usableCorrelationForPair
End If
If usableCorrelationForPair Then
Dim calib As retentionTimeCalibrationStruct
calib.A = A3
calib.B = B3
mRetentionTimeCalibrations2.Add(calib)
mCorrReport.Append("Final calibration constants:" & _
"A: " & vbTab & calib.A & vbTab & _
"B: " & vbTab & calib.B & vbTab & _
PILInputOutput.LINEEND)
SDUPstatistics.dumpXandYandYerrors( _
"Values for final fitting", _
xVals2, A3, B3, yVals2, Nothing, _
"[secs]", "[secs]", mCorrReport)
SDUPstatistics.dumpXandYandYerrors( _
"Final fitting applied to original data", _
xValsOrig, A3, B3, yValsOrig, Nothing, _
"[secs]", "[secs]", _
mCorrReport)
'Test only.....
If fileID = FILEID_FOR_SAMPLECORR Then
mStats.pointsUsedForCorreltation_SomeRawFile_Final = _
xVals2.Length
End If
Dim xyLen As Integer = xySet1.Count
Dim ind As Integer = 0
Dim outLierCount As Integer = 0 'Only for debugging
Dim pi As Integer = 0
For pi = 0 To lastPeptideindex
Dim curIndex As Integer = m2D_array(fileID, pi)
If curIndex >= 0 Then
Dim refIndex As Integer = _
m2D_array(mReferenceRawFileIndex, pi)
If refIndex >= 0 Then
Dim somePoint2 As _
retCorrDataPointStruct = _
DirectCast(xySet2(ind), _
retCorrDataPointStruct)
Dim outLierValue As Boolean = _
somePoint2.isOutLier
If outLierValue = True Then
outlierMap(fileID, pi) = True
outLierCount += 1
End If
ind += 1
End If
End If
Next
Trace.Assert(ind = xyLen, "PIL ASSERT. ind = xyLen.")
End If 'usableCorrelationForPair
End If 'usableCorrelationForPair
End If 'usableCorrelationForPair
'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-10
If Not usableCorrelationForPair Then
usableCorrelation = False
End If
'Changed PM_CORRREPORT_CHOKE_EQUALSIGNS 2008-09-19. Spreaksheet
' may not like equal signs... E.g. "Fejl:510"
'mCorrReport.Append("===================" & _
' PILInputOutput.LINEEND & PILInputOutput.LINEEND)
mCorrReport.Append("###################" & _
PILInputOutput.LINEEND & PILInputOutput.LINEEND)
Next 'Through raw files
If usableCorrelation Then 'Find blank entries for peptides
' and collect information for peptides to be inserted
' into protein list on the client side.
'Changed PM_TYPESAFE 2008-11-27
'Dim alreadyInsertedHash As Hashtable = New Hashtable
Dim alreadyInsertedHash2 As Dictionary(Of String, Integer) = _
New Dictionary(Of String, Integer)
For peptIndex = 0 To lastPeptideindex
Dim oneOrMoreOutLiers As Integer = 0
For fileID = 0 To lastFileIDindex
Dim curOutLierValue As Boolean = _
outlierMap(fileID, peptIndex)
If curOutLierValue Then
oneOrMoreOutLiers += 1
End If
Next
'Changed PM_TOO_FEW_INSERTED_WAS_OUTLIER_COLUMN_REJECT 2004-02-10
oneOrMoreOutLiers = 0 'To disable reject of peptide
' insertion if just one peptide is an outlier (wrt
' retention time correlation).
If oneOrMoreOutLiers = 0 Then 'We are not going to add
'this peptide because there is at least one
'outlier - retention time can not be predicted
'reliably and therefore adding a peptide would
'just add 'noise.
For fileID = 0 To lastFileIDindex
Dim isRetentionTimeOutLier As Boolean = _
outlierMap(fileID, peptIndex)
'Raw file independent.
Dim curDescr As coorMapcolumnDescriptorStruct = _
mProteinIndex_array2(peptIndex)
Dim proteinIndex As Integer = _
curDescr.proteinIndex
'Changed PM_TOO_FEW_INSERTED_WAS_OUTLIER_COLUMN_REJECT 2004-02-10
If Not isRetentionTimeOutLier Then 'If the current
' peptide is an outlier (in retention time
' correlation) then we don't insert new
' peptides based on that peptide...
insertNewPeptides( _
m2D_array, fileID, peptIndex, _
mPeptideList2, lastFileIDindex, proteinIndex, _
mRetentionTimeCalibrations2, _
alreadyInsertedHash2, mPeptidesToBeAdded2)
End If
Next 'Outer loop through raw files.
Else
Dim peter51 As Integer = 51 'Outliers, breakpoint for.
End If 'No outliers for current peptide.
Next 'Through peptides (columns in our map)
For peptIndex = 0 To mExtraPeptidesIndexCounter - 1
For fileID = 0 To lastFileIDindex
Dim curDescr As coorMapcolumnDescriptorStruct = _
mProteinIndex_ExtraArray2(peptIndex)
Dim proteinIndex2 As Integer = curDescr.proteinIndex
'Extra peptides. We use the same output as for the
'the "normal" added peptides, mPeptidesToBeAdded.
insertNewPeptides(mExtra2D_array, fileID, peptIndex, _
mExtraPeptidesList2, lastFileIDindex, proteinIndex2, _
mRetentionTimeCalibrations2, alreadyInsertedHash2, _
mPeptidesToBeAdded2)
Next
Next
End If 'Find blank entries
End If 'End of block, find the correlations.
mStats.peptidesToBeAdded = mPeptidesToBeAdded2.Count
Dim corrPairStr As String = _
"Correlation pair number " & FILEID_FOR_SAMPLECORR
Dim statSummary As String = "Summary:" & PILInputOutput.LINEEND & _
"Raw files:" & vbTab & mStats.rawFiles & PILInputOutput.LINEEND & _
"New peptides added:" & vbTab & _
mStats.peptidesToBeAdded & PILInputOutput.LINEEND & _
"Duplicates not inserted: " & vbTab & _
mStats.duplicatesNotInserted & PILInputOutput.LINEEND & _
"Peptides used in correlation and insertion:" & vbTab & _
mStats.peptides & PILInputOutput.LINEEND & _
"Unique peptides used in correlation and insertion:" & vbTab & _
mStats.uniquePeptides & PILInputOutput.LINEEND & _
corrPairStr & ", initial points:" & vbTab & _
mStats.pointsUsedForCorreltation_SomeRawFile_Initial & _
PILInputOutput.LINEEND & _
corrPairStr & ", points after outlier removal:" & vbTab & _
mStats.pointsUsedForCorreltation_SomeRawFile_AfterOutLierRemoval & _
PILInputOutput.LINEEND & _
corrPairStr & ", points for final correlation:" & vbTab & _
mStats.pointsUsedForCorreltation_SomeRawFile_Final & _
PILInputOutput.LINEEND & _
"Peptide masses reset to wild type mass:" & vbTab & _
mStats.massesResetToWildType & PILInputOutput.LINEEND & _
"Peptide added where masses and mods were reset:" & vbTab & _
mStats.addedWithAResetMass & PILInputOutput.LINEEND & _
"Negative predicted retention times reset to 9.9 secs:" & vbTab & _
mStats.negativeRetentionTimes & PILInputOutput.LINEEND & _
PILInputOutput.LINEEND
'Not filled in yet: "Unique peptides used for correlation:" & vbTab &
'mStats.uniquePeptides_UsedForCorrelation & LINEEND & _
mCorrReport.Insert(0, statSummary, 1)
End Sub 'ParsingCompleted
'Changed PM_CORR_CONTINUE_AND_REPORT 2004-02-16
'****************************************************************************
'* SUBROUTINE NAME: correlationResult *
'd$ <summary> N/A </summary>
Public Function correlationResult() As corrResultStruct
'Future:
' 1. Perhaps include some summary information
' about the parsing and inserting, maybe to show
' always.
' 2. More specific about file names, not only indexes
' specify filenames (without path).
' Use mRawFile2IndexHash and/or mRawFile2IndexHash_Reverse.
Dim toReturn As corrResultStruct
toReturn.correlationSuccesful = False
Dim failureReportString As String = ""
Dim failureCount As Integer = 0
Dim index As Integer = 0
Dim failStr As String
For Each failStr In mCorrFailureReasonArray
If Not failStr Is Nothing Then
If failStr.Length > 0 Then
failureReportString &= _
" Pair(" & index & "," & mReferenceRawFileIndex & "): " & _
failStr & "."
failureCount += 1
End If
Else
Dim peter9 As Integer = 9
End If
index += 1
Next
If failureReportString.Length = 0 Then
toReturn.correlationSuccesful = True
End If
toReturn.reportStr = failureReportString
toReturn.failureCount = failureCount
toReturn.totalCount = index
Return toReturn
End Function 'correlationResult
'****************************************************************************
'* SUBROUTINE NAME: peptidesToBeAdded *
'd$ <summary> N/A </summary>
Public Function peptidesToBeAdded() _
As List(Of peptideToBeAddedInfoStruct)
'Old return value:
' As ArrayList
Return mPeptidesToBeAdded2
End Function 'peptidesToBeAdded
'****************************************************************************
'* SUBROUTINE NAME: updateWithPeptidesFromOneProtein *
'd$ <summary> N/A </summary>
Public Sub updateWithPeptidesFromOneProtein( _
ByRef aProtHitStru2 As ProteinHitStructure, _
ByVal aProteinIndex As Integer, _
ByVal aPreselectedPeptidesScoreThreshold As Double, _
ByVal aIsNoIsotopeMode As Boolean _
)
Dim plst As PILpeptides = aProtHitStru2.peptides 'For notational
' convenience.
Dim CORR_PEPTIDES_THRESHOLD As Integer = 2 'Better name??
Dim PEPTIDE_SCORE_THRESHOLD_FOR_CORR As Double = 30.0
'Dim PEPTIDE_SCORE_THRESHOLD_FOR_CORR As Double = 0.0
'Update information for auto-corellating retention times between
'different raw files. Done if there are the same
'peptide (identified) in several raw files.
Dim peptidesAdded As Integer = 0 'Only for debugging purposes
Dim minScore2 As Double = 1000000000.0
'Changed PM_TYPESAFE 2006-11-09
'Dim protPeptideList As ArrayList = New ArrayList
Dim protPeptideList2 As List(Of peptideCorrStruct) = _
New List(Of peptideCorrStruct)
'Changed PM_REFACTOR 2006-03-15
Dim pept3 As PeptideHitStructure = _
PeptideHitStructure.blankPeptide() 'Keep compiler happy.
Dim pepIter As peptideListIterator = _
New peptideListIterator(plst)
Dim peptideToken As Integer
'For j = 0 To aProtHitStru.pepts.Count - 1
While Not pepIter.nextPeptide(pept3, peptideToken)
'Changed PM_REFACTOR 2004-06-30
Dim usePeptide As Boolean = True
'Changed PM_MASCOTSCORE_ASDOUBLE 2008-11-25. No
'longer implicit conversion from integer to
'double...
''Changed PM_SCORETHRESHOLD_BOUNDARY_BUG 2006-10-26
''If Not pept3.MascotScore > aPreselectedPeptidesScoreThreshold Then
If pept3.MascotScore2 < aPreselectedPeptidesScoreThreshold Then
usePeptide = False
End If
If pept3.AASequence = _
peptideConstants.SEQUENCE_BADPEPTIDE Then
usePeptide = False
End If
'Changed PM_ONLYINSERT_FOR_QUANTIFIABLE 2004-11-23
If Not pept3.someAAsMatchingTheFilter AndAlso _
aIsNoIsotopeMode = False Then
usePeptide = False
Else
Dim peter2 As Integer = 2 'For breakpoints.
End If
'Changed PM_RETCORR_ANYTIME 2006-10-25
'Do not use peptides that were inserted for correlation, etc.
If pept3.queryNumber < 0 Then
usePeptide = False
End If
'Changed PM_RETCORR_LCCENTROID 2006-10-26
Dim retSecs As Double = _
getEffectiveRetentionTime_Secs(pept3)
If retSecs < 0.01 Then
usePeptide = False
End If
If usePeptide Then
'Later: check for for uniqueness of retentiontime to avoid
'using double points in the correlation - there is no need
'some retention times should have more weight that others.
'It will also prevent division by zero/undefined fitting
'if only identical pairs are left, e.g.:
' X Y X yErrors
' 12446 12380 12446 NaN
' 12446 12380 12446 NaN
Dim pc As peptideCorrStruct
pc.AAsequence4 = pept3.AASequence
'Changed PM_REFACTOR 2006-03-15
'pc.peptIndex = j
pc.peptideRef = peptideToken
pc.corr_rawFileId = pept3.rawFileID
'Changed PM_MASCOTSCORE_ASDOUBLE 2008-11-25. No
'longer implicit conversion from integer to
'double...
pc.useForCorrelation = _
pept3.MascotScore2 > PEPTIDE_SCORE_THRESHOLD_FOR_CORR
'Changed PM_CORR_RETENTIONTIME_DESTINCT_CHARGEANDMASS 2003-09-24
pc.charge3 = pept3.charge
pc.mass3 = pept3.measuredMass
'Changed PM_CORR_NONINCLUDEDPEPTIDES_BUG 2006-10-20
pc.query3 = pept3.queryNumber
pc.modsHashValue = _
PILpeptides.hashValueForModification( _
pept3.modHits2, Nothing, True)
protPeptideList2.Add(pc)
Else
Dim peter8 As Integer = 8
End If
End While 'Through peptides.
'Insert pseudo sequence to make the following easier
Dim pc2 As peptideCorrStruct
pc2.AAsequence4 = "ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ"
pc2.peptideRef = -10000
pc2.corr_rawFileId = -29000
protPeptideList2.Add(pc2)
protPeptideList2.Sort(New SortBySequenceComparer)
'Dim lastSequence As String = ""
Dim lastPept As peptideCorrStruct
lastPept.AAsequence4 = ""
Dim startIndex As Integer = 0 'For a particular peptide, index
' for first. Inclusive.
Dim endIndex As Integer = 0 'For a particular peptide, index
' for last. Exclusive.
Dim prevQnum As Integer = -1
'For debugging only
Dim pept2 As PeptideHitStructure = _
PeptideHitStructure.blankPeptide() 'Keep compiler happy.
'Changed PM_MARKER 2006-10-22
Dim k As Integer
Dim lastIndex As Integer = protPeptideList2.Count - 1
For k = 0 To lastIndex
Dim curPep As peptideCorrStruct = protPeptideList2(k)
Dim massDiff As Double = curPep.mass3 - lastPept.mass3
Dim absMassDiff As Double = Math.Abs(massDiff)
'Changed PM_MASS_UNIQUENESS 2006-10-23. Why was it 10.0??
' To account for SILAC modifications with the same retention time??
'
' We set it to 0.2 in order to account for modifications not
' recognised by the program (either not set up by the user or
' of a type that the program can not handle - e.g. terminal
' modifications).
'
''Changed PM_RETT_TROUBLE 2006-08-10
'' ''Dim lowMassDifference As Boolean = absMassDiff < 0.2
' ''Dim lowMassDifference As Boolean = absMassDiff < 10.0
''Dim lowMassDifference As Boolean = absMassDiff < 0.2
'Dim lowMassDifference As Boolean = absMassDiff < 10.0
Dim lowMassDifference As Boolean = absMassDiff < 0.2
Dim sameSequence As Boolean = _
curPep.AAsequence4 = lastPept.AAsequence4
Dim sameCharge As Boolean = _
curPep.charge3 = lastPept.charge3
Dim sameMods As Boolean = _
curPep.modsHashValue = lastPept.modsHashValue
Dim identicalPeptide As Boolean = _
sameSequence AndAlso _
lowMassDifference AndAlso _
sameCharge AndAlso _
sameMods AndAlso _
True
'Changed PM_MARKER 2006-10-18
plst.peptideByToken(curPep.peptideRef, pept2)
Dim qNum As Integer = pept2.queryNumber
'For debugging.
If aProteinIndex = 7 Then
If k > 70 AndAlso k < 80 Then
Dim peter2 As Integer = 2
End If
End If
If sameSequence AndAlso sameCharge Then
If Not lowMassDifference Then
Dim peter2 As Integer = 2
End If
If absMassDiff > 17.0 Then
Dim peter7 As Integer = 7
End If
If absMassDiff < 10.0 Then
Dim peter10 As Integer = 10
End If '10.0
If absMassDiff > 0.0001 Then
Dim peter3 As Integer = 3
End If
If absMassDiff > 0.01 Then
Dim peter4 As Integer = 4
End If
If absMassDiff > 0.1 Then
Dim peter5 As Integer = 5
End If
If absMassDiff > 0.6 Then
Dim hit As Boolean = False
If absMassDiff > 0.9 AndAlso _
absMassDiff < 1.1 Then
Dim peter1 As Integer = 1
hit = True
End If
If absMassDiff > 2.9 AndAlso _
absMassDiff < 3.1 Then
Dim peter3 As Integer = 3
hit = True
End If
If absMassDiff > 3.9 AndAlso _
absMassDiff < 4.1 Then
Dim peter4 As Integer = 4
hit = True
End If
If absMassDiff > 4.9 AndAlso _
absMassDiff < 5.1 Then
Dim peter5 As Integer = 5
hit = True
End If
If absMassDiff > 5.9 AndAlso _
absMassDiff < 6.1 Then
Dim peter6 As Integer = 6
hit = True
End If
If absMassDiff > 6.9 AndAlso _
absMassDiff < 7.1 Then
Dim peter7 As Integer = 7
hit = True
End If
If absMassDiff > 7.9 AndAlso _
absMassDiff < 8.1 Then
Dim peter8 As Integer = 8
hit = True
End If
If absMassDiff > 8.9 AndAlso _
absMassDiff < 9.1 Then
Dim peter9 As Integer = 9
hit = True
End If
If absMassDiff > 9.9 AndAlso _
absMassDiff < 10.1 Then
Dim peter10 As Integer = 10
hit = True
End If
If absMassDiff > 10.9 AndAlso _
absMassDiff < 11.1 Then
Dim peter11 As Integer = 11
hit = True
End If
If absMassDiff > 11.9 AndAlso _
absMassDiff < 12.1 Then
Dim peter12 As Integer = 12
hit = True
End If
If absMassDiff > 12.9 AndAlso _
absMassDiff < 13.1 Then
Dim peter13 As Integer = 13
hit = True
End If
If absMassDiff > 14.9 AndAlso _
absMassDiff < 15.1 Then
Dim peter15 As Integer = 15
hit = True
End If
If absMassDiff > 15.9 AndAlso _
absMassDiff < 16.1 Then
Dim peter16 As Integer = 16
hit = True
End If
If absMassDiff > 16.9 AndAlso _
absMassDiff < 17.1 Then
Dim peter17 As Integer = 17
hit = True
End If
If absMassDiff > 17.9 AndAlso _
absMassDiff < 18.1 Then
Dim peter18 As Integer = 18
hit = True
End If
If absMassDiff > 19.9 AndAlso _
absMassDiff < 20.1 Then
Dim peter20 As Integer = 20
hit = True
End If
If absMassDiff > 20.9 AndAlso _
absMassDiff < 21.1 Then
Dim peter21 As Integer = 21
hit = True
End If
If absMassDiff > 22.9 AndAlso _
absMassDiff < 23.1 Then
Dim peter23 As Integer = 23
hit = True
End If
If absMassDiff > 31.9 AndAlso _
absMassDiff < 32.1 Then
Dim peter32 As Integer = 32
hit = True
End If
If absMassDiff > 32.9 AndAlso _
absMassDiff < 33.1 Then
Dim peter33 As Integer = 33
hit = True
End If
If absMassDiff > 45.9 AndAlso _
absMassDiff < 46.1 Then
Dim peter46 As Integer = 46
hit = True
End If
If Not hit Then
Dim peter81 As Integer = 81
End If
End If
If absMassDiff > 7 Then
Dim peter69 As Integer = 69
End If
End If 'Same sequence. For debugging only.
prevQnum = qNum
'Note: for retention time correlation charge og precise mass
' are NOT important.
'Changed PM_CORR_DISTINCTCHARGE 2006-10-20
'If sameSequence AndAlso lowMassDifference Then
If identicalPeptide Then
endIndex += 1
Else
If True Then 'Processing for particular peptide.
'Changed PM_TYPESAFE 2008-11-27
'Dim bestScoreHash As New Hashtable 'List (hash) of
'' best peptides; one for each raw file.
Dim bestScoreHash2 As _
Dictionary(Of Integer, peptideCorrStruct) = _
New Dictionary(Of Integer, peptideCorrStruct)
'Changed PM_TYPESAFE 2008-11-27
'Dim nextBestScoreHash As New Hashtable 'List (hash) of
'' next best peptides; with distinct charge from the
'' corresponding in bestScoreHash.
'' one for each raw file.
Dim nextBestScoreHash2 As _
Dictionary(Of String, peptideCorrStruct) = _
New Dictionary(Of String, peptideCorrStruct)
'Changed PM_TYPESAFE 2008-11-27
'Dim notBestScore As ArrayList = New ArrayList 'Type
' is peptideCorrStruct.
Dim notBestScore2 As List(Of peptideCorrStruct) = _
New List(Of peptideCorrStruct)
Dim m As Integer
Dim lastIndex2 As Integer = endIndex - 1
For m = startIndex To lastIndex2
Dim curPep2 As peptideCorrStruct = _
protPeptideList2(m)
Dim curRawFileID2 As Integer = curPep2.corr_rawFileId
If True Then
' Build our map of rawfileID to arbitrary zero
' based index. This may take several peptides
' to complete. So it may not be completely
' build in one run of this loop.
Dim key As Integer = curRawFileID2
If Not mRawFile2IndexHash2.ContainsKey(key) Then
mRawFile2IndexHash2.Add( _
key, mCurrentRawFileIndex)
mRawFile2IndexHash_Reverse2.Add( _
mCurrentRawFileIndex, key)
mCurrentRawFileIndex += 1
End If
End If
If bestScoreHash2.ContainsKey(curRawFileID2) Then
Dim oldStruct As peptideCorrStruct = _
bestScoreHash2(curRawFileID2)
Dim oldScore As Double = _
plst.peptideScoreByToken( _
oldStruct.peptideRef)
Dim newScore As Double = _
plst.peptideScoreByToken( _
curPep2.peptideRef)
If newScore > oldScore Then
'Better score, replace.
'Note: old value in oldStruct.
notBestScore2.Add(oldStruct)
bestScoreHash2(curRawFileID2) = curPep2
Else
notBestScore2.Add(curPep2)
End If
Else
bestScoreHash2.Add(curRawFileID2, curPep2)
End If
Next
If bestScoreHash2.Count = 1 Then
Dim peter9 As Integer = 9
End If
If True Then
'Add to our linear list of peptides and insert
'indexes into that in other datastructures.
Dim bestPeptides As Integer = bestScoreHash2.Count
'Changed PM_TYPESAFE 2008-11-27
'Dim hashEnumerator As IDictionaryEnumerator = _
' bestScoreHash2.GetEnumerator()
Dim hashEnumerator2 As Dictionary( _
Of Integer, peptideCorrStruct).Enumerator = _
bestScoreHash2.GetEnumerator()
While hashEnumerator2.MoveNext()
Dim curPep2 As peptideCorrStruct = _
hashEnumerator2.Current.Value
If True Then 'Next best score handling
'Only now can we decide which of the
'not-best-scores we want to use - because
'we must know the charge for the best
'scoring peptide.
Dim bestScoreCharge As Integer = curPep2.charge3
Dim bestScoreRawFileID As Integer = _
curPep2.corr_rawFileId
'Same as:
Dim rawID As Integer = _
hashEnumerator2.Current.Key
Dim curNextBest As peptideCorrStruct
For Each curNextBest In notBestScore2
If curNextBest.corr_rawFileId = _
bestScoreRawFileID AndAlso _
curNextBest.charge3 <> bestScoreCharge Then
'To correct: add or replace
'in nextBestScoreHash.
Dim nextBestScoreKey As String = _
curNextBest.corr_rawFileId & "_" & _
curNextBest.charge3
If nextBestScoreHash2.ContainsKey( _
nextBestScoreKey) Then
Dim oldStruct2 As peptideCorrStruct = _
nextBestScoreHash2( _
nextBestScoreKey)
Dim oldScore2 As Double = _
plst.peptideScoreByToken( _
oldStruct2.peptideRef)
Dim score As Double = _
plst.peptideScoreByToken( _
curNextBest.peptideRef)
If score > oldScore2 Then
'Better score, replace.
nextBestScoreHash2( _
nextBestScoreKey) = _
curNextBest
Else
'Then we finally forget about
'that peptide!
Dim peter3 As Integer = 3
End If
Else
nextBestScoreHash2.Add( _
nextBestScoreKey, curNextBest)
End If
End If
Next 'Iterating best score peptides
' for each raw file.
End If 'Next best score handling
Dim peptRef As Integer = curPep2.peptideRef
Dim peptideToAdd As PeptideHitStructure = _
PeptideHitStructure.blankPeptide() 'Keep compiler happy.
plst.peptideByToken( _
peptRef, peptideToAdd)
'Changed PM_INSERTPEPTIDES_ASSERT 2005-07-29
Dim lowerMass As Double = _
PeptideHitStructure.getLowerMCRuncalib(peptideToAdd)
Trace.Assert( _
lowerMass > 20.0, _
"PIL ASSERT. Unreasonable mass for lowerMass: " & _
lowerMass)
mPeptideList2.Add(peptideToAdd)
Dim coor As mapCoordinatesStruct
coor.uniquePeptideIndex = mUniquePeptideIndexCounter
coor.rawFileIDindex = _
mRawFile2IndexHash2(curPep2.corr_rawFileId)
coor.useForCorrelation = curPep2.useForCorrelation
mPeptideList_mapCoordinates2.Add(coor)
peptidesAdded += 1
'Changed PM_MASCOTSCORE_ASDOUBLE 2008-11-25. No
'longer implicit conversion from integer to
'double...
If peptideToAdd.MascotScore2 < minScore2 Then
minScore2 = peptideToAdd.MascotScore2
End If
End While
If peptidesAdded > 0 Then 'Will only be false for the
' very first iteration of the loop...
Dim columnDescriptor As coorMapcolumnDescriptorStruct
columnDescriptor.proteinIndex = aProteinIndex
'Note: field peptideUsedForCorrelation will be filled
' in later, in ParsingCompleted().
mProteinIndex_array2.Add(columnDescriptor)
mUniquePeptideIndexCounter += 1
End If
If True Then 'Next best score handling.
'Dim hashEnumerator3 As IDictionaryEnumerator = _
' nextBestScoreHash.GetEnumerator()
Dim hashEnumerator4 As Dictionary( _
Of String, peptideCorrStruct).Enumerator = _
nextBestScoreHash2.GetEnumerator()
While hashEnumerator4.MoveNext()
Dim curPep3 As peptideCorrStruct = _
hashEnumerator4.Current.Value
Dim peptRef3 As Integer = curPep3.peptideRef
Dim peptideToAdd3 As PeptideHitStructure = _
PeptideHitStructure.blankPeptide() 'Keep compiler happy.
plst.peptideByToken( _
peptRef3, peptideToAdd3)
mExtraPeptidesList2.Add(peptideToAdd3)
Dim coor3 As mapCoordinatesStruct
coor3.uniquePeptideIndex = _
mExtraPeptidesIndexCounter
coor3.rawFileIDindex = _
mRawFile2IndexHash2(curPep3.corr_rawFileId)
coor3.useForCorrelation = False 'Note: not
' curPep3.useForCorrelation because it may
' be true just because of a high (Mascot) score.
Trace.Assert(coor3.useForCorrelation = False, _
"PIL ASSERT. " & _
"Extra peptide was unexpectedly used for correlation....")
mExtraPeptidesList_mapCoordinates2.Add(coor3)
Dim columnDescriptor As _
coorMapcolumnDescriptorStruct
columnDescriptor.proteinIndex = aProteinIndex
mProteinIndex_ExtraArray2.Add(columnDescriptor)
mExtraPeptidesIndexCounter += 1 'Note: increased
' every time - in mExtra2D_array there will only
' be one value different from NOPEPTIDE_CODE in
' each row.
End While
End If 'Next best score handling
End If 'Add to our linear list of peptides, etc.
End If 'True. Processing for one peptide.
startIndex = k
endIndex = startIndex + 1 '+1: because we already have one
' peptide, the current.
'Changed PM_CORR_NONINCLUDEDPEPTIDES_BUG 2006-10-20
'lastPept = curPep
End If
'Changed PM_CORR_NONINCLUDEDPEPTIDES_BUG 2006-10-20
lastPept = curPep 'Moved to here...
Next
Dim totPeptides As Integer = mPeptideList2.Count
'Assert equal length, mPeptideList, mPeptideListCoordinates
'(or mPeptideList_mapCoordinates ??)
Dim len2 As Integer = mPeptideList2.Count
Trace.Assert(len2 = mPeptideList_mapCoordinates2.Count, _
"PIL ASSERT. mPeptideList2 is of length " & len2 & _
". This is not the expected length...")
End Sub 'updateWithPeptidesFromOneProtein
'****************************************************************************
'* SUBROUTINE NAME: PeptidesNotActuallyInserted *
'd$ <summary>
'd$ Purpose: Signal from client so we can adjust our report.
'd$
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ </summary>
Public Sub PeptidesNotActuallyInserted()
mCorrReport.Insert( _
0, _
"Note: peptides were actually NOT inserted. Repeat: NOT inserted." & _
"Possible reasons: The checkbox ""Do not insert new peptides"" in " & _
"the correlation settings dialog is checked. Uncheck it and try again. " & _
vbCr & vbLf & "See above." & vbCr & vbLf & vbCr & vbLf & vbCr & vbLf)
End Sub 'PeptidesNotActuallyInserted
'****************************************************************************
'* SUBROUTINE NAME: getCorrelationReport *
'd$ <summary>
'd$ Purpose: xyz.
'd$
'd$ <see cref="T:VBXMLDoc.CVBXMLDoc" />.
'd$ </summary>
Public Function getCorrelationReport() As String
Return mCorrReport.ToString
End Function
End Class 'retentionTimeCorrelator
End Namespace 'massSpectrometryBase
Generated by script codePublish.pl at 2009-01-05T15:20:59.