Source code for MSQuant: CEBIspaces.cs, MSQlib1/src/utility/CEBIspaces.cs.

Table of contents page.

Home page for MSQuant.

/****************************************************************************
 * Copyright (C) 2008 Peter Mortensen and Matthias Mann                     *
 * This file is part of MSQuant.                                            *
 *                                                                          *
 * MSQuant is distributed under the terms of                                *
 * the GNU General Public License. See src/COPYING.TXT or                   *
 * <http://www.gnu.org/licenses/gpl.txt> for details.                       *
 *                                                                          *
 * MSQuant is free software; you can redistribute it                        *
 * and/or modify it under the terms of the GNU                              *
 * General Public License as published by the Free                          *
 * Software Foundation; either version 2 of the                             *
 * License, or (at your option) any later version.                          *
 *                                                                          *
 * MSQuant is distributed in the hope that it will be                       *
 * useful, but WITHOUT ANY WARRANTY; without even the                       *
 * implied warranty of MERCHANTABILITY or FITNESS FOR                       *
 * A PARTICULAR PURPOSE.  See the GNU General Public                        *
 * License for more details.                                                *
 *                                                                          *
 * You should have received a copy of the GNU General                       *
 * Public License along with MSQuant; if not, write to                      *
 * the Free Software Foundation, Inc., 59 Temple                            *
 * Place, Suite 330, Boston, MA  02111-1307  USA                            *
 *                                                                          *
 * Purpose: Holds CEBIspaces, see below for documentation.                  *
 *                                                                          *
 ****************************************************************************/

/****************************************************************************
 *                               CEBI                                       *
 *                    Software Development Group                            *
 *                         Peter Mortensen                                  *
 *                E-mail: NUKESPAMMERSdrmortensen@get2netZZZZZZ.dk          *
 *                 WWW: http://www.cebi.sdu.dk/                             *
 *                                                                          *
 *  Program for post-processing of result from search in mass               *
 *    spectrometric data.                                                   *
 *                                                                          *
 *    FILENAME:   CEBIspaces.cs                                             *
 *    TYPE:       CSHARP                                                    *
 *                                                                          *
 * CREATED: PM 2008-07-08   Vrs 1.0.                                        *
 * UPDATED: PM 2008-xx-xx                                                   *
 *                                                                          *
 *                                                                          *
 ****************************************************************************/

//Future:
// 1.
//
// 2.


//using System;
//using System.Collections.Generic;
//using System.Text;

using System.Collections.Generic; //For List
using System.Diagnostics; //For Trace. And its Assert.
using System.Text; //For StringBuilder

using SimmoTech.Utils.Serialization; //For SerializationReader
//  and SerializationWriter.




/****************************************************************************
 *    <placeholder for header>                                              *
 ****************************************************************************/
namespace SDUPutility
{


    //****************************************************************************
    //*    <placeholder for header>                                              *
    //****************************************************************************
    public struct experimentSettingsStruct
    {
        public CEBIspaces JAspace;

        public Dictionary<int, rawFileStruct4> rawFileMapping3;

        //Helper field. "INTERNAL" means it is not saved in a saved
        //parse, e.g. MB4. We can't make it as is - unless we make
        //this struct into a class.
        public Dictionary<int, int> INTERNAL_rawFileID2NpointHash;


        //****************************************************************************
        //* For experimentSettingsStruct.                                            *
        //****************************************************************************
        public static experimentSettingsStruct readFromStream2(
          ref SerializationReader anInReader, 
          int aMBfileVersion, 
          experimentSettingsStruct aDefaultValue)
        {
            experimentSettingsStruct toReturn = aDefaultValue;

            //Allow us to read in the old format (where nothing
            //is read for this struct).
            if (aMBfileVersion >= AppConstants.SAVEDFILEVERSION_MB4)
            {

                { //Block. For field JAspace.

                    int items = anInReader.ReadOptimizedInt32();
                    List<CEBIdimension> dimensions =
                      new List<CEBIdimension>(items);

                    int j;
                    for (j = 1; j <= items; j++)
                    {
                        dimensions.Add(
                          CEBIdimension.readFromStream2(
                            ref anInReader, aMBfileVersion));
                    }

                    toReturn.JAspace = new CEBIspaces();
                    toReturn.JAspace.setDimensionDefinitions(ref dimensions);
                } //Block. For field JAspace.

                { //Block. For field rawFileMapping3.
                    int items = anInReader.ReadOptimizedInt32();

                    //Changed PM_SAVE_ASSERT 2008-12-10. null is not accepted
                    //  internally currently, but empty is.
                    //if (items > 0)
                    if (items >= 0)
                    {
                        //Also do it for zero items??
                        toReturn.rawFileMapping3 =
                          new Dictionary<int, rawFileStruct4>(items);

                        int j;
                        for (j = 1; j <= items; j++)
                        {
                            int curKey = anInReader.ReadInt32();
                            rawFileStruct4 curValue =
                              rawFileStruct4.readFromStream2(
                                ref anInReader, aMBfileVersion, curKey);
                            toReturn.rawFileMapping3.Add(curKey, curValue);
                        }
                    }

                } //Block. For field rawFileMapping3 (a hash).

            } //MB4 or later
            else
            {
                //Older version. Use the default value.
                int peter2 = 2;
            }

            return toReturn;
        } //readFromStream2(). For experimentSettingsStruct.


        //Changed PM_SAVE_RAWFILE_MAPPING_MB4 2008-11-17
        //****************************************************************************
        //*    For experimentSettingsStruct.                                         *
        //****************************************************************************
        public void addToStream2(
          ref SerializationWriter anInOutWriter, int aMBfileVersion)
        {
            //aVersion: e.g. 300 for MB3, 400 for MB4.
            //SERMARK19. A marker. Keep it.

            //Allow us to save in the old format (where nothing
            //is saved for this struct).
            if (aMBfileVersion >= AppConstants.SAVEDFILEVERSION_MB4)
            {

                { //Block. For field JAspace.

                    List<CEBIdimension> dimensions =
                      JAspace.getDimensionDefinitions();

                    //Can this ever happen by user action?? Does the dialog
                    //prevent it?
                    Trace.Assert(
                      dimensions != null && dimensions.Count > 0,
                      "PIL ASSERT. dimensions is null or is empty");

                    int items = dimensions.Count;
                    //This is our own encoding.
                    anInOutWriter.WriteOptimized(items);

                    foreach (CEBIdimension item in dimensions)
                    {
                        item.addToStream2(ref anInOutWriter, aMBfileVersion);
                    }

                } //Block. For field JAspace.


                { //Block. For field rawFileMapping3.

                    if (rawFileMapping3 != null)
                    {
                        //Encode by storing (key,value) pairs, one at a time. Do 
                        //it here on the client side to avoid invoking the .NET 
                        //serialisation (BinaryFormatter).

                        int items = rawFileMapping3.Count;

                        //Changed PM_SAVE_ASSERT 2008-12-10. It is allowed to be zero
                        //length. In fact, currently, it will not work elsewhere 
                        //if rawFileMapping3 is null...
                        ////Assume the field is null if there are no items.
                        //Trace.Assert(
                        //  items>0, 
                        //  "PIL ASSERT. rawFileMapping3 is non-null and empty. ");

                        //This is our own encoding.
                        anInOutWriter.WriteOptimized(items);

                        Dictionary<int, rawFileStruct4>.Enumerator
                          hashEnumerator2 = rawFileMapping3.GetEnumerator();
                        while ((hashEnumerator2.MoveNext()))
                        {
                            int curKey = hashEnumerator2.Current.Key;
                            rawFileStruct4 curValue = hashEnumerator2.Current.Value;

                            anInOutWriter.Write(curKey);

                            curValue.addToStream2(ref anInOutWriter, aMBfileVersion);
                        } //Hash iteration.
                    }
                    else
                    {
                        int items = 0;
                        //Encode nothing as zero length
                        anInOutWriter.WriteOptimized(items);
                    }
                } //Block. For field rawFileMapping3 (a hash).

            } //MB4 or later

        } //addToStream2(). For experimentSettingsStruct.

    } //experimentSettingsStruct


    //Note: rawFileInfoStruct in MascotResultParser.vb is similar in name.
    //      Change one or both?
    //
    //****************************************************************************
    //*    <placeholder for header>                                              *
    //****************************************************************************
    public struct rawFileStruct4
    {
        public string rawFile;

        //Piggy-backed:
        public int rawFileID7;
        
        //public int xyz;

        //For reverse mapping. Needs to be public.
        public int NpointKey4;


        //****************************************************************************
        //* For experimentSettingsStruct.                                            *
        //*                                                                          *
        //*   Parameters:                                                            *
        //*     aHashKey    Key of the hash the returned value                       *
        //*                 is going to be part of.                                  *                                                             *
        //*                                                                          *
        //****************************************************************************
        public static rawFileStruct4 readFromStream2(
          ref SerializationReader anInReader, 
          int aMBfileVersion, 
          int aHashKey)
        {
            rawFileStruct4 toReturn;
            toReturn.rawFile = anInReader.ReadString();
            toReturn.rawFileID7 = anInReader.ReadInt32();

            toReturn.NpointKey4 = aHashKey; //To be consistent with XML deserialisation.
            return toReturn;
        } //readFromStream2(). For rawFileStruct4.


        //Changed PM_SAVE_RAWFILE_MAPPING_MB4 2008-11-17
        //****************************************************************************
        //*    For experimentSettingsStruct.                                               *
        //****************************************************************************
        public void addToStream2(
          ref SerializationWriter anInOutWriter, int aMBfileVersion)
        {
            //aVersion: e.g. 300 for MB3, 400 for MB4.
            //SERMARK21. A marker. Keep it.

            //Assume it has been checked by the caller.
            Trace.Assert(aMBfileVersion >= AppConstants.SAVEDFILEVERSION_MB4,
              "PIL ASSERT. <message>.");

            anInOutWriter.Write( rawFile);
            anInOutWriter.Write( rawFileID7);

            //What about NpointKey2 ??

        } //addToStream2(). For rawFileStruct4.
    
    
    } //rawFileStruct4


    public struct vectorElementStruct //Hide - make private??
    {
        public string name;
        public string shortName3;

        public int ordinal2; //This element's place in the
                            //dimension. Needed? Right name?

        //Type, SILAC or not ???
        //bool xyz;
    } //class vectorElementStruct


    //****************************************************************************
    //* Purpose: representing sets of vectors for the higher                     *
    //*          level computations.                                             *
    //*                                                                          *
    //*          E.g. let the program have                                       *
    //*          knowledge/be aware of an experiment where the                   *
    //*          sample has been separated into 6 by a sucrose                   *
    //*          gradient and then each fraction into 10 gel                     *
    //*          slices (60 samples and 60 raw files in total).                  *
    //*                                                                          *
    //*          Utility functions:                                              *
    //*            1. hash keys as bit-vectors.                                  *
    //*            2. mapping from a vector/point to/from a raw file             *
    //*               (e.g. (fraction 2, gel slice 7) to                         *
    //*               file "20060412LJ_Cen1-02.RAW")                             *
    //*                                                                          *
    //*                                                                          *
    //****************************************************************************
    public class CEBIspaces
    {

        private Dictionary<int, List<int>> mOneDimMapper; //From one dimension 
        //  (e.g. raw file ID 823) to an N-point (e.g. sucrose
        //  fraction 4 / gel slice 7).

        private Dictionary<int, int> mReverseOneDimMapper; //From an N-point, 
        //  represented by a key (e.g. sucrose fraction 4 / gel slice 7) to 
        //  one dimension (e.g. raw file ID 823).


        private Dictionary<int, List<vectorElementStruct>> mVectorSet; //Set
        //  of all vectors/points. E.g. 60 elements for 6 sucrose fractions
        //  and 10 gel slices.
        //
        //  Key is derived from the N-point. E.g. sucrose fraction 4 / 
        //  gel slice 7 is (3,6) and the key may be computed
        //  as 457 + 3 + 6 * 100 = 1060.

        private StringBuilder mScratchSB;


        //Changed PM_HIGHER_LEVEL_PROCESSING 2008-08-24
        private List<CEBIdimension> mDimensionsDefs2; //To remember
        // input... For dialog. Is this the way we want it?


        /****************************************************************************
        *  Purpose:
        ****************************************************************************/
        public CEBIspaces()
        {
            mOneDimMapper = null; //Lazy instantiation.
            mReverseOneDimMapper = null; //Lazy instantiation.
            mVectorSet = null; //Lazy instantiation.

            mScratchSB = new StringBuilder(); //We can afford it...
        } //Constructor.


        /***************************************************************************
        *    <placeholder for header>                                              *
        ****************************************************************************/
        private static List<int> initStack(int aDepth)
        {
            List<int> toReturn = new List<int>(aDepth);

            int len = aDepth;
            for (int i = 0; i < len; i++)
            {
                toReturn.Add(0); //Init value. Should it be 1? Or client defined?
            }

            return toReturn;
        } //initStack().


        /****************************************************************************
        *  Purpose:                                                                 *
        *                                                                           *
        *    Note: assumes auto-generation of names and short names. It is          *
        *          a client error otherwise (to call this function)...              *
        *                                                                           *
        ****************************************************************************/
        public void setDimensionDefinitions(ref List<CEBIdimension> anInDefs2)
        {
            Trace.Assert(anInDefs2 != null, "PIL ASSERT. anInDefs is null.");
            Trace.Assert(anInDefs2 != null && anInDefs2.Count > 0,
              "PIL ASSERT. anInDefs is empty.");

            //Changed PM_HIGHER_LEVEL_PROCESSING 2008-08-24
            mDimensionsDefs2 = anInDefs2;

            mVectorSet = new Dictionary<int, List<vectorElementStruct>>(30);

            //Clear anything added through previous calls to addMapEntry_2().
            mOneDimMapper = null;
            mReverseOneDimMapper = null;

            int dimen = anInDefs2.Count;
            List<int> stack = initStack(dimen);

            //Enumerate all possible points in our N-dimensional space...
            bool done = false;
            while (!done)
            {
                //Do something for current point... (that is
                //represented by the current combination,
                //stored/represented by variable/list "stack").
                //
                List<vectorElementStruct> vector = 
                  new List<vectorElementStruct>(dimen);

                string compositeName = ""; //For testing only. Disable soon.

                int stLen = stack.Count;
                for (int i = 0; i < stLen; i++)
                {
                    CEBIdimension someDefs = anInDefs2[i];

                    //Later: ASSERT on auto-generation of names, in someDefs.

                    vectorElementStruct newPoint;

                    //We could look up in a hash (or array)...
                    int index = stack[i];

                    //Changed PM_HIGHER_LEVEL_PROCESSING 2008-09-26
                    //int indexOneBased = index + 1;
                    int userIndexOneBased = 
                      index + 1 + (someDefs.startNumber() - 1);

                    string userIndexOneBasedStr = userIndexOneBased.ToString();

                    //Name auto-generated here.
                    //Changed PM_HIGHER_LEVEL_PROCESSING_SPACE_SEPERATOR 2008-09-03
                    newPoint.name =
                      someDefs.getBaseName() + 
                      " " +
                      userIndexOneBased;
                    newPoint.shortName3 =
                      someDefs.getBaseShortName() +
                      " " +
                      userIndexOneBased;

                    //No longer zero based, but instead in user
                    //terms (zero based, though).
                    //newPoint.ordinal = index;
                    newPoint.ordinal2 = userIndexOneBased - 1;

                    //For test only.
                    if (compositeName != "")
                    {
                        compositeName += ",";
                    }
                    compositeName += newPoint.name;

                    vector.Add(newPoint);
                } //Through dimensions in the current point.

                int NpointKey2 = computeKey(ref stack);
                mVectorSet.Add(NpointKey2, vector);

                //Step to the next, possibly finding out we have
                //seen/processed the last one.
                //
                //The result of this step is used above - through
                //content of "stack".
                bool noCarry = false;
                for (int i = 0; i < stLen; i++)
                {
                    CEBIdimension someDefs = anInDefs2[i];

                    int maxVal = someDefs.getSize2() - 1;

                    int index = stack[i];

                    index++;

                    if (index > maxVal)
                    {
                        index = 0; //Carry... Make another turn through the loop.

                        if (i == stLen-1)
                        {
                            //If we are here then we have seen all 
                            //combinations and can stop...
                            done = true;
                        }
                    }
                    else
                    {
                        noCarry = true; //No carry. We must stop.
                    }
                    stack[i] = index;

                    if (noCarry)
                    {
                        break; //No carry. We must stop.
                    }

                } //Through dimensions in the current point.

            } //while. Through all possible points in N-dimensional space.

        } //setDimensionDefinitions()


        //Changed PM_HIGHER_LEVEL_PROCESSING 2008-08-24
        /****************************************************************************
        *  Purpose:
        * 
        ****************************************************************************/
        public List<CEBIdimension> getDimensionDefinitions()
        {
            return mDimensionsDefs2;
        }


        //Changed PM_HIGHER_LEVEL_PROCESSING 2008-08-27
        /***************************************************************************
        *    <placeholder for header>                                              *
        ****************************************************************************/
        public Dictionary<int, List<vectorElementStruct>> getVectorSet()
        {
            return mVectorSet;
        } //getVectorSet().


        /****************************************************************************
        *  Purpose:
        ****************************************************************************/
        private int computeKey(ref List<int> anNpoint)
        {
            //Assume there no more than 100 in any one dimension (then keys
            //become non-unqiue) and that the number of dimensions is
            //small (then we get integer overflow).
            const int MULT = 100;
            const int BASEKEYVALUE = 457; //Value is arbitrary, but want to
                                          //avoid special value, like 0...

            int toReturn = BASEKEYVALUE;
            int weight = 1;
            foreach (int dimensionValue in anNpoint)
            {
                toReturn += weight * dimensionValue;
                weight *= MULT;
            }
            return toReturn;
        } //computeKey()


        /****************************************************************************
        *  Purpose:
        ****************************************************************************/
        private void addMapEntry(
          int aOneDimensionalValue,
          List<int> anNpoint)
        {
            if (mOneDimMapper == null)
            {
                mOneDimMapper = new Dictionary<int, List<int>>();
                mReverseOneDimMapper = new Dictionary<int, int>();
            }

            mOneDimMapper.Add(aOneDimensionalValue, anNpoint);

            int NpointKey2 = computeKey(ref anNpoint);
            mReverseOneDimMapper.Add(NpointKey2, aOneDimensionalValue);

            //Let us use the opportunity to do some checking...
            //
            //We also do this check by checking the boundaries
            //for each dimension.
            List<vectorElementStruct> someVector;
            if (! mVectorSet.TryGetValue( NpointKey2, out someVector))
            {
                string msg = 
                    "Value " + aOneDimensionalValue + 
                    " does not map to anything in mVectorSet.";
                System.Windows.Forms.MessageBox.Show(msg);
            }
        } //addMapEntry()


        /****************************************************************************
        *  Purpose:
        ****************************************************************************/
        public void addMapEntry_2(
          int aOneDimensionalValue, 
          int aFirstDimension, 
          int aSecondDimension)
        {
            List<int> Npoint = new List<int>(2);
            Npoint.Add(aFirstDimension);
            Npoint.Add(aSecondDimension);

            addMapEntry(aOneDimensionalValue, Npoint);
        } //addMapEntry_2()


        /****************************************************************************
        *  Purpose: 
        ****************************************************************************/
        public string getMapEntryAsShortString( int aNpointKey)
        {
            //Changed PM_HIGHER_LEVEL_PROCESSING 2008-09-07. We have
            //redefined the integer input value! - from raw file ID to
            //direct hash value for Npoint...
            //List<int> Npoint = mOneDimMapper[aOneDimensionalValue];
            //int NpointKey2 = computeKey(ref Npoint);
            int NpointKey2 = aNpointKey;

            List<vectorElementStruct> vector = mVectorSet[NpointKey2];

            mScratchSB.Length = 0;
            foreach (vectorElementStruct vectorElement in vector)
            {
                string snStr = vectorElement.shortName3;
                if (mScratchSB.Length != 0)
                {
                    mScratchSB.Append(",");
                }
                mScratchSB.Append( snStr);
            } //Through elements in N-point (vector version).

            string toReturn = mScratchSB.ToString();
            return toReturn;
        } //getMapEntryAsShortString()


        //Changed PM_PCP_PLOT 2008-09-30
        /****************************************************************************
        *  Purpose: 
        ****************************************************************************/
        public Dictionary<int, int> getMapHash(
            ref List<int> anInKeyOrder,
            out int aOutMinBucketIndex, out int aOutMaxBucketIndex)
        {
            //The primary key is the first element in aKeyOrder

            int primaryKeyIndex = anInKeyOrder[0];

            Dictionary<int, int> toReturn = new Dictionary<int, int>(10);
            
            //  mVectorSet

            //  setDimensionDefinitions() has .

            //  rawFileMappingSetup(), file frmRawFileMapping.vb.

            int minBucketIndex = 10000;
            int maxBucketIndex = -10000;

            Dictionary<int, List< vectorElementStruct>>.Enumerator 
              hashEnumerator2 = 
                mVectorSet.GetEnumerator();
            while (hashEnumerator2.MoveNext())
            {
                int NpointKey2 = hashEnumerator2.Current.Key;

                List<vectorElementStruct> curVector = hashEnumerator2.Current.Value;

                int bucketIndex = curVector[primaryKeyIndex].ordinal2;

                toReturn[NpointKey2] = bucketIndex;

                if (bucketIndex > maxBucketIndex)
                {
                    maxBucketIndex = bucketIndex;
                }

                if (bucketIndex < minBucketIndex)
                {
                    minBucketIndex = bucketIndex;
                }

            } //while

            //aOutBuckets = maxBucketIndex + 1;

            aOutMinBucketIndex = minBucketIndex;
            if (aOutMinBucketIndex == 10000)
            {
                aOutMinBucketIndex = 0; //Correct value???
            }

            aOutMaxBucketIndex = maxBucketIndex;
            if (aOutMaxBucketIndex == -10000)
            {
                aOutMaxBucketIndex = 0;
            }
            

            return toReturn;
        } //getMapHash


    } //class CEBIspaces


} //namespace SDUPutility


    

    

Generated by script codePublish.pl at 2009-01-05T15:20:59.