USE [GADFly] GO /****** Object: StoredProcedure [Alg].[uspPopulation_Create] Script Date: 05/08/2024 11:19:01 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 20/12/2017 Description: Creates a new population and returns its ID. Amend Date: 28/02/2018 Description: I have inserted a new sub-routine here to allow balancing of the target-true/ target-false subgroups in the true temp table (#tblDataCrosstab) initialised in BE.[uspDataCrosstab_Initialise]. This takes an analysis parameter (TargetBalancing) which lies between 0 and 1, where 0 represents no balancing so all filtered records are included, and 1 where the balancing is perfect. An intermediate value means that if there are end-groups that have some variation, a degree of balancing can be achieved, without reducing the dataset completely to twice the smallest group. Note that the sample is rebalanced for every population. Amend Date: 01/03/2018 Description: I have created a new input parameter which is a toggle between between real and randomised data. It is used in the balancing sub-routine described above. It is also provided to the insert statement as a third variable and passed to uspGenome_Score as a new parameter. Amend Date: 08/09/2020 Description: altered @bytCode to @intCode to allow > 255 populations. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspPopulation_Create] @intAnalysisID int, -- passed to uspGenome_Create @intDiscriminationID int, -- Used locally to return the next population code @blnRandomPop as bit, -- Flag to indicate population is based on random target. @tblAnalysis as Alg.udttConfig READONLY -- Holds analysis parameters AS BEGIN SET NOCOUNT ON; -- **** DECLARATIONS & INITIALISATIONS ******************************************************** -- Local variables for balancing routine DECLARE @fltBalance as float, @fltTarget0 as float, @fltTarget1 as float, @intBalanceRecords as int, @strSQL as varchar(max), @strTarget as varchar(20), @strTargetGroupSmall as varchar(1), @strTargetGroupLarge as varchar(1); -- Declare local variables for control of flow. DECLARE @intCode as smallint, -- The next available population code - used locally. @intPopulationID as int, -- The ID of the population created here - passed on. @bytNewGenomes as tinyint, -- Number of new genomes to create @bytGenomeCounter as tinyint = 1, -- Counter for genome loop. @intGenomeID as int; -- Unique ID for the genome. -- Declare variables used for scoring the genome. DECLARE @fltGenePenalty as float, -- Constant for analysis, passed to uspGenomeScore. @fltCodonPenalty as float; -- Constant for analysis, passed to uspGenomeScore. -- Initialise the local variables. SELECT @fltBalance = fldValue/100 FROM @tblAnalysis WHERE fldText = 'Target Balancing'; SELECT @fltGenePenalty = fldValue/1000 FROM @tblAnalysis WHERE fldText = 'Gene Penalty'; SELECT @fltCodonPenalty = fldValue/1000 FROM @tblAnalysis WHERE fldText = 'Codon Penalty'; -- **** END OF DECLARATIONS & INITIALISATIONS ************************************************* -- **** BALANCING ROUTINE ********************************************************************* -- first use dynamic SQL to get the counts of target-false and target-true (using the target -- defined by @blnRandomPop. CREATE TABLE #tbl (fldTarget bit, fldN int); SET @strTarget = 'fldTarget' + CASE WHEN @blnRandomPop = 0 THEN '' ELSE 'Random' END; SET @strSQL = 'INSERT INTO #tbl SELECT ' + @strTarget + ', COUNT(fldValueID) FROM #tblDataCrosstab GROUP BY ' + @strTarget + ';'; EXEC (@strSQL); SELECT @fltTarget0 = fldN FROM #tbl WHERE fldTarget = 0; SELECT @fltTarget1 = fldN FROM #tbl WHERE fldTarget = 1; -- Now use these values to get the number of balance records IF @fltTarget0 > @fltTarget1 BEGIN SET @intBalanceRecords = CAST( @fltTarget1 + ((@fltTarget0 - @fltTarget1) * (1 - @fltBalance)) as int); SELECT @strTargetGroupSmall = '1', @strTargetGroupLarge = '0'; END; ELSE BEGIN SET @intBalanceRecords = CAST( @fltTarget0 + ((@fltTarget1 - @fltTarget0) * (1 - @fltBalance)) as int); SELECT @strTargetGroupSmall = '0', @strTargetGroupLarge = '1'; END; -- Now use dynamic SQL to set fldBalance to 1 for the small target group -- and fldRandom for the larger target group. SET @strSQL = ' UPDATE #tblDataCrosstab SET fldBalance = NULL, fldRandom = NULL; UPDATE #tblDataCrosstab SET fldBalance = 1 WHERE ' + @strTarget + ' = ' + @strTargetGroupSmall + '; UPDATE #tblDataCrosstab SET fldRandom = RAND(CAST(NEWID() as varbinary)) WHERE ' + @strTarget + ' = ' + @strTargetGroupLarge + ';'; EXEC (@strSQL); -- Finally set fldBalance in the top N random records in the larger target group to 1. SET @strSQL = ' WITH FirstPass AS ( SELECT TOP ' + CAST(@intBalanceRecords as varchar(10)) + ' * FROM #tblDataCrosstab ORDER BY fldRandom DESC) UPDATE FirstPass SET fldBalance = 1;'; EXEC (@strSQL); --drop table Test.tblDataCrosstab; --select * into Test.tblDataCrosstab from #tblDataCrosstab; -- **** END OF BALANCING ROUTINE ************************************************************** -- **** POPULATION CREATION ******************************************************************* -- If blnRandomPop = 0 then get the next available population code, otherwise keep the same. SELECT @intCode = ISNULL(MAX(fldCode), 0) + ABS(CAST(@blnRandomPop as tinyint) - 1) FROM BE.tblPopulation WHERE fldDiscriminationID = @intDiscriminationID; -- Use this to insert a new record and return its ID INSERT INTO BE.tblPopulation (fldDiscriminationID, fldCode, fldRandom) VALUES(@intDiscriminationID, @intCode, @blnRandomPop); SET @intPopulationID = SCOPE_IDENTITY(); -- Now create the system-specified number of new genomes for this population for generation = 0. --SELECT @bytNewGenomes = fldValue FROM @tblAnalysis WHERE fldText = 'New Genomes'; SELECT @bytNewGenomes = fldValue FROM @tblAnalysis WHERE fldText = 'Breeding Pop'; WHILE @bytGenomeCounter <= @bytNewGenomes BEGIN -- Call the Genome_Create procedure to create a new genome in the temp table hierarchy EXEC @intGenomeID = Alg.uspGenome_Create @intAnalysisID, @intPopulationID, 0, @bytGenomeCounter, 1, @tblAnalysis; -- Finally score the genome by calling uspGenome_Score with the scope parameter set to breeding. EXEC Alg.uspGenome_Score @intAnalysisID, @intGenomeID, @fltGenePenalty, @fltCodonPenalty, @blnRandomPop; SET @bytGenomeCounter += 1; END RETURN @intPopulationID; END --IF @fltTarget0 > @fltTarget1 --BEGIN -- SET @intBalanceRecords = CAST( @fltTarget1 + ((@fltTarget0 - @fltTarget1) * (1 - @fltBalance)) as int); -- UPDATE #tblDataCrosstab SET fldBalance = 1 WHERE fldTarget = 1; -- UPDATE #tblDataCrosstab SET fldRandom = RAND(CAST(NEWID() as varbinary)) WHERE fldTarget = 0; --END; --ELSE --BEGIN -- SET @intBalanceRecords = CAST( @fltTarget0 + ((@fltTarget1 - @fltTarget0) * (1 - @fltBalance)) as int); -- UPDATE #tblDataCrosstab SET fldBalance = 1 WHERE fldTarget = 0; -- UPDATE #tblDataCrosstab SET fldRandom = RAND(CAST(NEWID() as varbinary)) WHERE fldTarget = 1; --END; GO /****** Object: StoredProcedure [Alg].[uspPopulation_Evolve] Script Date: 05/08/2024 11:19:01 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* ============================================================================================================= Author: Simon Poulton Create date: 22/12/2016 Description: Evolves a new generation from the existing breeding population. This proc controls the breeding within a single generation. It extracts the breeding population into three tables; a) @tmpBreedingGenome - a local table variable that just holds the IDs of the genomes, b) Alg.tmpBreedingGene - a process-keyed table (with AnalysisID as the processID) and, c) Alg.tmpBreedingCodon - ditto. Then it creates a cursor of all the breeding genomes and loops through it. Within each loop it carries out three main tasks; a) it creates a new genome and stores it in the permanent table, b) it evolves the existing genome in some way and stores the genes and codons in the permanent tables and, c) it scores the genome. ============================================================================================================ */ CREATE PROCEDURE [Alg].[uspPopulation_Evolve] @intAnalysisID int, -- Used for all called procs. @intPopulationID int, -- Used locally. @intGeneration smallint, -- Code for current generation, just used for the record insertion @blnRandomPop bit, -- Flag to indicate population is based on random target. @tblAnalysis as Alg.udttConfig READONLY -- Holds analysis parameters AS BEGIN SET NOCOUNT ON; -- Declare the local variables used in the analysis DECLARE @bytBreedingPop tinyint, -- Number of genomes in the breeding population. @fltGenePenalty float, -- Passed to the uspGenome_Score procedure. @fltCodonPenalty float, -- Passed to the uspGenome_Score procedure. @intGenomeID as int, -- Used in the breeding genome cursor and passed to several procs. @strLevel as nvarchar(6), -- The level at which the evolutionary action will take place. @intNewGenomeID int, -- Holds the ID of the new genome created within the cursor. @bytGenomeCode tinyint = 1, -- Next available genome code. @bytGenes as tinyint, -- Number of genes created by call to uspGenome_Create. @bytCodons as tinyint; -- Number of codons created by call to uspGenome_Create. -- First get the local variables SELECT @bytBreedingPop = fldValue FROM @tblAnalysis WHERE fldText = 'Breeding Pop'; SELECT @fltGenePenalty = (fldValue / 1000) FROM @tblAnalysis WHERE fldText = 'Gene Penalty'; SELECT @fltCodonPenalty = (fldValue / 1000) FROM @tblAnalysis WHERE fldText = 'Codon Penalty'; -- Now reset the breeding flag in tmpGenome to mark the top N scoring genomes -- Use a CTE to get only one genome per score, which prevents it being dominated by in-breeding. UPDATE Alg.tmpGenome SET fldBreeding = 0 WHERE fldPopulationID = @intPopulationID; WITH FirstPass AS ( SELECT DISTINCT TOP (@bytBreedingPop) fldScore FROM Alg.tmpGenome WHERE (fldPopulationID = @intPopulationID) ORDER BY fldScore DESC), SecondPass AS ( SELECT MIN(G.fldID) AS fldID FROM Alg.tmpGenome AS G INNER JOIN FirstPass AS FP ON G.fldScore = FP.fldScore GROUP BY G.fldScore) UPDATE G SET fldBreeding = 1 FROM Alg.tmpGenome AS G INNER JOIN SecondPass AS SP ON G.fldID = SP.fldID; -- Create a cursor of these genomes DECLARE csrBreedingGenomes CURSOR FAST_FORWARD FOR SELECT fldID FROM Alg.tvfBreedingGenomes(@intPopulationID); OPEN csrBreedingGenomes; FETCH NEXT FROM csrBreedingGenomes INTO @intGenomeID; WHILE @@FETCH_STATUS = 0 -- Loop through the genomes in the breeding population, carrying out an evolution on each one in turn. BEGIN -- First get the level at which this evolution will take place. SELECT @strLevel = Alg.svfGetRandomMutation(@tblAnalysis, 'Level', RAND(), 0, 0, 0); IF @strLevel = 'Genome' -- Create an entirely new genome BEGIN EXEC @intNewGenomeID = Alg.uspGenome_Create @intAnalysisID, @intPopulationID, @intGeneration, @bytGenomeCode, 1, @tblAnalysis; END IF @strLevel <> 'Genome' -- If the level is Gene or Codon, create a new blank genome record in tmpGenome and -- then evolve it by calling uspGenome_Evolve BEGIN EXEC @intNewGenomeID = Alg.uspGenome_Create @intAnalysisID, @intPopulationID, @intGeneration, @bytGenomeCode, 0, @tblAnalysis; EXEC Alg.uspGenome_Evolve @intAnalysisID, @intPopulationID, @intGenomeID, @intNewGenomeID, @strLevel, @tblAnalysis; END -- Finally score the genome by calling uspGenome_Score with the scope parameter set to breeding. EXEC Alg.uspGenome_Score @intAnalysisID, @intNewGenomeID, @fltGenePenalty, @fltCodonPenalty, @blnRandomPop; SET @bytGenomeCode +=1; FETCH NEXT FROM csrBreedingGenomes INTO @intGenomeID; END CLOSE csrBreedingGenomes; DEALLOCATE csrBreedingGenomes; END GO /****** Object: StoredProcedure [Alg].[uspGenome_Score] Script Date: 05/08/2024 11:19:01 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 20/12/2017 Description: This proc encapsulates the call to Alg.tvfGetGenomeScores with its parameters and stores the Phi coefficient in tmpGenome. Amend Date: 01/03/2018 Description: New input parameter indicating whether the population uses random target records, which is passed down to uspGenome_Evaluate. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspGenome_Score] @intAnalysisID int, @intGenomeID int, @fltGenePenalty float, @fltCodonPenalty float, @blnRandomPop bit AS BEGIN SET NOCOUNT ON; -- **** DECLARATIONS ************************************************************************** DECLARE @tblTally as Alg.udttGenomeScore; DECLARE @bytGenes as tinyint, -- Number of genes created by call to uspGenome_Create. @bytCodons as tinyint, -- Number of codons created by call to uspGenome_Create. @fltPhi as float; -- Holds the phi coefficient. -- **** END OF DECLARATIONS ******************************************************************* INSERT INTO @tblTally EXEC Alg.uspGenome_Evaluate @intAnalysisID, @intGenomeID, @blnRandomPop; SELECT @bytGenes = COUNT(fldID) FROM Alg.tmpGene WHERE fldGenomeID = @intGenomeID; SELECT @bytCodons = COUNT(G.fldID) FROM Alg.tmpGene AS G INNER JOIN Alg.tmpCodon AS C ON G.fldID = C.fldGeneID WHERE G.fldGenomeID = @intGenomeID; -- Finally score the tally by calling tvfGetGenomeScores and save this value in tmpGenome. -- Note use of TRY/CATCH to capture errors (e.g. divide by zero) in the function. BEGIN try SELECT @fltPhi = fldPhi FROM Alg.tvfGetGenomeScores( @tblTally, 'Breeding', @bytGenes, @bytCodons, @fltGenePenalty, @fltCodonPenalty); END TRY BEGIN CATCH SET @fltPhi = 0; END CATCH UPDATE Alg.tmpGenome SET fldScore = @fltPhi WHERE fldID = @intGenomeID; END GO /****** Object: StoredProcedure [Alg].[uspGenome_Evolve] Script Date: 05/08/2024 11:19:01 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 22/12/2016 Description: Takes the specified Genome and evolves it according to the action parameter. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspGenome_Evolve] @intAnalysisID int, -- Used for all called procs. @intPopulationID int, -- Used locally. @intOldGenomeID as int, -- ID of the old genome to be evolved from. @intNewGenomeID as int, -- ID of the New genome to be evolved into. @strLevel nvarchar(5), -- The level at which the evolutionary action will take place. @tblAnalysis as Alg.udttConfig READONLY -- Holds analysis parameters AS BEGIN SET NOCOUNT ON; -- Declare the local variables used in the analysis DECLARE @bytCurrentGenes as tinyint, -- Number of genes in the current genome. @bytRandomGeneCode as tinyint, -- Code of randomly selected gene within the current genome. @bytCurrentCodons as tinyint, -- Number of codons in the current gene. @bytCodonCode as tinyint; -- Code of randomly selected codon within the current gene. -- First make a copy of the genes in the old genome into tmpGene. INSERT INTO Alg.tmpGene ( fldGenomeID, fldCode, fldLogical, fldLeft, fldRight, fldOrder, fldComparison, fldType) SELECT @intNewGenomeID, fldCode, fldLogical, fldLeft, fldRight, fldOrder, fldComparison, fldType FROM Alg.tvfCurrentGenes(@intOldGenomeID); SET @bytCurrentGenes = @@ROWCOUNT; -- And make a copy of all the codons in the genome into tmpCurrentCodon. INSERT INTO Alg.tmpCodon( fldGeneID, fldCode, fldLeft, fldRight, fldOrder, fldArithmetic, fldVariableID, fldValueID) SELECT NG.fldID, C.fldCode, C.fldLeft, C.fldRight, C.fldOrder, C.fldArithmetic, C.fldVariableID, C.fldValueID FROM Alg.tmpCodon AS C INNER JOIN (SELECT fldID, fldCode FROM Alg.tvfCurrentGenes(@intOldGenomeID) ) AS OG ON C.fldGeneID = OG.fldID INNER JOIN (SELECT fldID, fldCode FROM Alg.tvfCurrentGenes(@intNewGenomeID) ) AS NG ON OG.fldCode = NG.fldCode; IF @strLevel = 'Gene' EXEC Alg.uspGene_Evolve @intAnalysisID, @intPopulationID, @intNewGenomeID, @tblAnalysis; ELSE EXEC Alg.uspCodon_Evolve @intAnalysisID, @intPopulationID, @intNewGenomeID, @tblAnalysis; END GO /****** Object: StoredProcedure [Alg].[uspGenome_Evaluate] Script Date: 05/08/2024 11:19:01 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* ============================================================================================================= Author: Simon Poulton Create date: 20/12/2017 Description: Evaluates the current genome. It uses the true temp table #tmpDataCrosstab, created in uspAnalysis_run and populated in uspDataCrosstab_Initialise, to evaluate the genome using a single expression created with dynamic SQL. The SQL string takes the following format: WITH FirstPass as ( SELECT fldTarget ,CASE WHEN Var2 is null THEN NULL ELSE case when Var2 > 70 then 1 else 0 end END AS Gene1 ,CASE WHEN Var3 is null THEN NULL ELSE case when Var3 > 80 then 1 else 0 end END AS Gene2 , ... FROM #tblDataCrosstab ), SecondPass as ( SELECT fldTarget, (Gene1 & Gene2 | ...) AS fldPredictor FROM FirstPass ) SELECT fldTarget, CASE WHEN fldPredictor IS NULL THEN -1 ELSE fldPredictor END, COUNT(fldTarget) AS fldN FROM SecondPass GROUP BY fldTarget, fldPredictor ORDER BY fldTarget, fldPredictor; The proc returns a table with three fields; fldTarget bit, fldPredictor bit, fldN int; Amend Date: 28/02/2018 Description: I have added an additional Where clause to the final SQL string for #tblDataCrosstab. This utilises the new fldBalance which has been initialised in uspPopulation_Create. Amend Date: 01/03/2018 Description: I have added an additional input parameter indicating whether the population uses random target records. This modifies the final SQL evaluation expression to use fldTargetRandom when true. ============================================================================================================= */ CREATE PROCEDURE [Alg].[uspGenome_Evaluate] @intAnalysisID int, @intGenomeID int, @blnRandomPop bit AS BEGIN SET NOCOUNT ON; DECLARE @intGeneID as int, @bytGeneCode as tinyint, @strComparison as nvarchar(2), @strVariables as nvarchar(max) = '', @strValues as nvarchar(max) = '', @strGeneDefinitions as nvarchar(max) = '', @strGeneConcatenations as nvarchar(max) = '', @strSQL as nvarchar(max) = '', @strError as nvarchar(max); -- First declare a cursor to loop through the genes getting the code and comparison operator DECLARE csr CURSOR FAST_FORWARD FOR SELECT fldID, fldCode, fldComparison FROM Alg.tmpGene WHERE fldGenomeID = @intGenomeID ORDER BY fldOrder; OPEN csr; FETCH next FROM csr INTO @intGeneID, @bytGeneCode, @strComparison WHILE @@FETCH_STATUS = 0 BEGIN -- Concatenate the individual gene definitions SET @strVariables = ''; SELECT @strVariables += CASE WHEN C.fldOrder = 0 THEN '' ELSE C.fldArithmetic END + ' ' + fldLeft + 'Var' + CAST(V.fldID as nvarchar(10)) + CASE WHEN V.fldType = 4 THEN 'V' ELSE '' END + fldRight + ' ' FROM Alg.tmpCodon AS C INNER JOIN Alg.tvfVariable(@intAnalysisID) AS V ON C.fldVariableID = V.fldID WHERE C.fldGeneID = @intGeneID ORDER BY fldOrder; -- Concatenate the individual value definitions SET @strValues = ''; SELECT @strValues += CASE WHEN C.fldOrder = 0 THEN '' ELSE C.fldArithmetic END + ' ' + fldLeft + CASE WHEN V.fldType = 4 THEN VAL.fldText ELSE CAST(C.fldValueID as nvarchar(10)) END + fldRight + ' ' FROM Alg.tmpCodon AS C INNER JOIN Alg.tvfVariable(@intAnalysisID) AS V ON C.fldVariableID = V.fldID INNER JOIN Alg.tvfValue(@intAnalysisID) AS VAL ON C.fldValueID = VAL.fldID WHERE C.fldGeneID = @intGeneID ORDER BY fldOrder; -- Combine these into a single expression and concatenate it with the previous gene definitions. SELECT @strGeneDefinitions += ', CASE WHEN ' + @strVariables + ' IS NULL THEN NULL ELSE ' + 'CASE WHEN ' + @strVariables + ' ' + @strComparison + ' ' + @strValues + ' THEN 1 ELSE 0 END ' + 'END AS Gene' + CAST(@bytGeneCode AS nvarchar(10)); FETCH next FROM csr INTO @intGeneID, @bytGeneCode, @strComparison END CLOSE csr; DEALLOCATE csr; -- Now simply create the string for concatenating the genes together with the comparison operators. SELECT @strGeneConcatenations += CASE WHEN fldOrder > 0 THEN fldLogical ELSE '' END + ' ' + fldLeft + ' Gene' + CAST(fldCode as nvarchar(10)) + ' ' + fldRight + ' ' FROM Alg.tmpGene WHERE fldGenomeID = @intGenomeID ORDER BY fldOrder; -- Finally, compile the full SQL string and execute it SET @strSQL = ' WITH FirstPass as ( SELECT fldTarget' + CASE WHEN @blnRandomPop = 1 THEN 'Random' ELSE '' END + ' AS fldTarget ' + @strGeneDefinitions + ' FROM #tblDataCrosstab WHERE (fldTarget IS NOT NULL) AND (fldBalance = 1) ), SecondPass as ( SELECT fldTarget, ' + @strGeneConcatenations + ' AS fldPredictor FROM FirstPass) SELECT fldTarget, CASE WHEN fldPredictor IS NULL THEN -1 ELSE fldPredictor END AS fldPredictor, COUNT(fldTarget) AS fldN FROM SecondPass GROUP BY fldTarget, fldPredictor ORDER BY fldTarget, fldPredictor;'; BEGIN TRY EXEC (@strSQL); END TRY BEGIN CATCH -- If any error occurs in the evaluation ( maybe due to divide by zero error ) -- then just return an an empty table which will be dealt with by the calling proc. SELECT 0 AS fldTarget, 0 AS fldPredictor, 0 AS fldN; END CATCH END GO /****** Object: StoredProcedure [Alg].[uspGenome_Create] Script Date: 05/08/2024 11:19:01 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 20/12/2016 Description: Creates a brand new genome and returns the genomeID. If @blnMakeGenes = 1, i.e. the calling proc is uspPopulation_Create, then create the required number of genes for the specified genome, otherwise just return. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspGenome_Create] @intAnalysisID int, -- Used in uspGene_Create and uspGenome_Score. @intPopulationID int, -- Used locally to get the next genome code. @intGeneration smallint, -- Ditto. @bytGenomeCode as tinyint, -- Unique code for the genome within the population. @blnMakeGenes as bit, -- Boolean flag to create new genes or just return the genome record. @tblAnalysis as Alg.udttConfig READONLY -- Holds analysis parameters AS BEGIN SET NOCOUNT ON; DECLARE @intGenomeID as int, -- Unique ID for the genome - returned to the calling proc. @bytMaxGenes tinyint = 0, -- Used locally in the genome creation loop. @bytGeneCode as tinyint = 1, -- Counter for gene creation loop. @fltNewGene as float, -- Probability of a new gene being created within the loop. @blnNewGene as bit = 1; -- Boolean flag to control gene generation loop -- First create the new genome record and save its ID. INSERT INTO Alg.tmpGenome ( fldPopulationID, fldGeneration, fldCode) VALUES( @intPopulationID, @intGeneration, @bytGenomeCode); SET @intGenomeID = SCOPE_IDENTITY(); IF @blnMakeGenes = 1 -- If true then create the required number of new genes, BUT test the value of -- @blnLastGene during each loop. Within each loop it calls the uspGene_Create. BEGIN SELECT @bytMaxGenes = fldValue FROM @tblAnalysis WHERE fldText = 'Max Genes'; SELECT @fltNewGene = fldValue/100 FROM @tblAnalysis WHERE fldText = 'New Gene'; WHILE (@bytGeneCode <= @bytMaxGenes) AND (@blnNewGene = 1) BEGIN EXEC Alg.uspGene_Create @intAnalysisID, @intGenomeID, @bytGeneCode, @tblAnalysis; -- Increment the gene code and test for a new gene. SET @bytGeneCode += 1; SET @blnNewGene = CASE WHEN RAND() < @fltNewGene THEN 1 ELSE 0 END; END -- Finally, bracket the genes in this genome EXEC Alg.uspGene_Rebracket @intGenomeID; END RETURN @intGenomeID; END GO /****** Object: StoredProcedure [Alg].[uspGeneRecord_Insert] Script Date: 05/08/2024 11:19:01 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* ============================================================================================================= Author: Simon Poulton Create date: 03/02/2018 Description: Evaluates the current genome for genes individually, storing the results in tblGeneRecord. It uses the same logic as uspGenome_Evaluate, but stores the result within each gene loop, rather than concatenating at the end. ============================================================================================================= */ CREATE PROCEDURE [Alg].[uspGeneRecord_Insert] @intAnalysisID int, @intGenomeID int AS BEGIN SET NOCOUNT ON; DECLARE @intGeneID as int, @bytGeneCode as tinyint, @strComparison as nvarchar(2), @strVariables as nvarchar(max) = '', @strValues as nvarchar(max) = '', @strGeneDefinition as nvarchar(max) = '', @strSQL as nvarchar(max) = ''; -- First declare a cursor to loop through the genes getting the code and comparison operator DECLARE csr CURSOR FAST_FORWARD FOR SELECT fldID, fldCode, fldComparison FROM BE.tblGene WHERE fldGenomeID = @intGenomeID ORDER BY fldOrder; OPEN csr; FETCH next FROM csr INTO @intGeneID, @bytGeneCode, @strComparison WHILE @@FETCH_STATUS = 0 BEGIN -- Concatenate the individual gene definitions SET @strVariables = ''; SELECT @strVariables += CASE WHEN C.fldOrder = 0 THEN '' ELSE C.fldArithmetic END + ' ' + fldLeft + 'Var' + CAST(V.fldID as nvarchar(10)) + CASE WHEN V.fldType = 4 THEN 'V' ELSE '' END + fldRight + ' ' FROM BE.tblCodon AS C INNER JOIN BE.tblVariable AS V ON C.fldVariableID = V.fldID WHERE C.fldGeneID = @intGeneID ORDER BY fldOrder; -- Concatenate the individual value definitions SET @strValues = ''; SELECT @strValues += CASE WHEN C.fldOrder = 0 THEN '' ELSE C.fldArithmetic END + ' ' + fldLeft + CASE WHEN V.fldType = 4 THEN VAL.fldText ELSE CAST(C.fldValueID as nvarchar(10)) END + fldRight + ' ' FROM BE.tblCodon AS C INNER JOIN BE.tblVariable AS V ON C.fldVariableID = V.fldID INNER JOIN BE.tblValue AS VAL ON C.fldValueID = VAL.fldID WHERE C.fldGeneID = @intGeneID ORDER BY fldOrder; -- Combine these into a single expression for the gene definition. SELECT @strGeneDefinition = ', CASE WHEN ' + @strVariables + ' IS NULL THEN NULL ELSE ' + 'CASE WHEN ' + @strVariables + ' ' + @strComparison + ' ' + @strValues + ' THEN 1 ELSE 0 END ' + 'END AS fldGene'; -- + CAST(@bytGeneCode AS nvarchar(10)); -- Finally, compile the full SQL string and execute it SET @strSQL = ' WITH FirstPass as ( SELECT fldRecordID ' + @strGeneDefinition + ' FROM #tblDataCrosstab WHERE fldTarget IS NOT NULL) INSERT INTO BE.tblGeneRecord SELECT ' + CAST(@intGeneID as nvarchar(10)) + ', fldRecordID, fldGene AS fldResult FROM FirstPass WHERE fldGene IS NOT NULL; '; BEGIN TRY EXEC (@strSQL); END TRY BEGIN CATCH -- If any error occurs in the evaluation ( maybe due to divide by zero error ) -- then just return an an empty table which will be dealt with by the calling proc. SELECT 0 AS fldTarget, 0 AS fldPredictor, 0 AS fldN; END CATCH FETCH next FROM csr INTO @intGeneID, @bytGeneCode, @strComparison END CLOSE csr; DEALLOCATE csr; END GO /****** Object: StoredProcedure [Alg].[uspGene_Rebracket] Script Date: 05/08/2024 11:19:01 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* ======================================================================================================= Author: Simon Poulton Create date: 29/11/2016 Description: BRILLIANT CODING!!! I've finally cracked how to specify and store the information required for bracketing these expressions. It is based on FIVE logical rules; a) there will always be n - 1 pairs of brackets. b) The count of all the left brackets must equal the count of all the right brackets. c) Any one term must have left or right brackets - not both or neither. d Any one term can have between zero and n - 1 brackets. e) The whole genome expression must begin with a term with left brackets and end with a term with right brackets, but apart from this the terms can be in any order. ======================================================================================================= */ CREATE PROCEDURE [Alg].[uspGene_Rebracket] @intGenomeID int AS BEGIN SET NOCOUNT ON; DECLARE @bytRecords as tinyint, @bytCode as tinyint, @bytLefts as tinyint = 0, @bytRights as tinyint = 0; -- First get the number of gene records in the current genome. SELECT @bytRecords = COUNT(fldCode) FROM Alg.tmpGene WHERE (fldGenomeID = @intGenomeID); -- Clear the current genes of brackets and set the order to 255. UPDATE Alg.tmpGene SET fldLeft = '', fldRight = '', fldOrder = 255 WHERE (fldGenomeID = @intGenomeID); IF @bytRecords > 1 -- Only add brackets if more than one record BEGIN -- Add n-1 left brackets randomly to the records WHILE @bytLefts < (@bytRecords - 1) BEGIN SET @bytCode = Alg.svfGetRandomCode(RAND(), @bytRecords) UPDATE Alg.tmpGene SET fldLeft += '(', fldOrder = CASE WHEN @bytLefts = 0 THEN 0 ELSE fldOrder END WHERE (fldGenomeID = @intGenomeID) AND (fldCode = @bytCode); SET @bytLefts += 1; END -- Now add one right bracket to all records with no left brackets and count them. UPDATE Alg.tmpGene SET fldRight += ')' WHERE (fldGenomeID = @intGenomeID) AND (fldLeft = ''); SET @bytRights = @@ROWCOUNT; -- Finally, randomly add the remaining rights (up to n-1) to records with no lefts. WHILE @bytRights < (@bytRecords - 1) BEGIN SET @bytCode = Alg.svfGetRandomCode(RAND(), @bytRecords) -- Test to ensure that only records with no lefts are selected. IF (SELECT fldLeft FROM Alg.tmpGene WHERE (fldGenomeID = @intGenomeID) AND (fldCode = @bytCode)) = '' BEGIN UPDATE Alg.tmpGene SET fldRight += ')', fldOrder = CASE WHEN @bytRights = (@bytRecords - 2) THEN @bytRecords - 1 ELSE fldOrder END WHERE (fldGenomeID = @intGenomeID) AND (fldCode = @bytCode); SET @bytRights += 1; END; END; -- Finally, randomly sort the remaining records (still coded 255) and recode them from 1 to records-1 WITH FirstPass AS ( SELECT fldCode, (ROW_NUMBER() OVER (ORDER BY NEWID())) AS fldRN FROM Alg.tmpGene WHERE (fldGenomeID = @intGenomeID) AND (fldOrder = 255) ) UPDATE G SET fldOrder = fldRN FROM FirstPass AS FP INNER JOIN Alg.tmpGene AS G ON FP.fldCode = G.fldCode WHERE G.fldGenomeID = @intGenomeID; END -- Just set the order to zero for evaluation. ELSE BEGIN UPDATE Alg.tmpGene SET fldOrder = 0 WHERE (fldGenomeID = @intGenomeID); END END GO /****** Object: StoredProcedure [Alg].[uspGene_Evolve] Script Date: 05/08/2024 11:19:01 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 23/12/2016 Description: Takes the specified Genome, and evolves it according to a random mutation. Note that if this is explicitly 'Rebracket' or one of the first three in the list, then it calls the Rebracket proc. It does not do this for the logical and comparison mutations. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspGene_Evolve] @intAnalysisID int, -- Used only in uspGene_Create for insert. @intPopulationID int, -- Used only in svfGetRandomGene for transpose. @intGenomeID int, -- ID of current genome to be evolved. @tblAnalysis as Alg.udttConfig READONLY -- Holds analysis parameters AS BEGIN SET NOCOUNT ON; -- Declare variables used for returning the random mutation. DECLARE @bytCurrentGenes as tinyint, -- Number of genes in the current genome. @bytGeneCode tinyint, -- Code of the randomly selected gene that will be evolved. @bytType tinyint, -- Type of gene to be evolved, used in svfGetRandomMutation and strOperator. @bytMaxGenes tinyint, -- Maximum number of genes per genome, used to get strMutation. @strMutation nvarchar(10); -- Type of evolutionary mutation to apply to the gene. -- Declare the local variables used in the various mutations. DECLARE @bytNewGeneCode tinyint, -- Incremented gene code, used for insert and transpose. @intOldGeneID as int, -- Random gene ID from the breeding population, only used for transpose. @intNewGeneID as int, -- ID of new gene created by Transpose @strLogical nvarchar(1), -- Random Logical operator @strComparison nvarchar(2); -- Random comparison operator -- First get the random mutation. SELECT @bytCurrentGenes = COUNT(fldCode) FROM Alg.tvfCurrentGenes(@intGenomeID); SELECT @bytGeneCode = Alg.svfGetRandomCode(RAND(), @bytCurrentGenes); SELECT @bytType = fldType FROM Alg.tvfCurrentGenes(@intGenomeID) WHERE fldCode = @bytGeneCode; SELECT @bytMaxGenes = fldValue FROM @tblAnalysis WHERE fldText = 'Max Genes'; SELECT @strMutation = Alg.svfGetRandomMutation(@tblAnalysis, 'Gene', RAND(), @bytType, CASE WHEN @bytCurrentGenes = 1 THEN 1 ELSE 0 END, CASE WHEN @bytCurrentGenes = @bytMaxGenes THEN 1 ELSE 0 END); IF @strMutation = 'Insert' -- Create an entirely new gene by calling uspGene_Create BEGIN SELECT @bytNewGeneCode = MAX(fldCode) + 1 FROM Alg.tvfCurrentGenes(@intGenomeID); EXEC Alg.uspGene_Create @intAnalysisID, @intGenomeID, @bytNewGeneCode, @tblAnalysis; END IF @strMutation = 'Remove' -- Delete the specified Gene from tmpGene which cascades to tmpCodon and decrement the larger codes BEGIN DELETE FROM Alg.tvfCurrentGenes(@intGenomeID) WHERE (fldCode = @bytGeneCode); UPDATE Alg.tvfCurrentGenes(@intGenomeID) SET fldCode -= 1 WHERE (fldCode > @bytGeneCode); END IF @strMutation = 'Transpose' -- Copy an existing random gene plus its codons from the breeding population into tmpGene. BEGIN -- Get a random gene from the tmpBreedingPopulation and copy it into tmpGene. -- Note that it's not necessary to copy the brackets because the whole new genome will be rebracketed. SELECT @bytNewGeneCode = MAX(fldCode) + 1 FROM Alg.tvfCurrentGenes(@intGenomeID); SELECT @intOldGeneID = Alg.svfGetRandomGene(@intPopulationID, RAND()); INSERT INTO Alg.tmpGene ( fldGenomeID, fldCode, fldLogical, fldComparison, fldType) SELECT @intGenomeID, @bytNewGeneCode, fldLogical, fldComparison, fldType FROM Alg.tmpGene WHERE fldID = @intOldGeneID; SET @intNewGeneID = SCOPE_IDENTITY(); -- And copy the codons for this gene - but include the brackets. INSERT INTO Alg.tmpCodon ( fldGeneID, fldCode, fldLeft, fldRight, fldOrder, fldArithmetic, fldVariableID, fldValueID) SELECT @intNewGeneID, C.fldCode, C.fldLeft, C.fldRight, C.fldOrder, C.fldArithmetic, C.fldVariableID, C.fldValueID FROM Alg.tmpCodon AS C WHERE fldGeneID = @intOldGeneID; END IF @strMutation = 'Logical' -- Randomly mutate the logical operator in the specified gene BEGIN SELECT @strLogical = Alg.svfGetRandomOperator(@intAnalysisID, 'Logical', RAND(), @bytType); UPDATE Alg.tvfCurrentGenes(@intGenomeID) SET fldLogical = @strLogical WHERE (fldCode = @bytGeneCode); END IF @strMutation = 'Comparison' -- Randomly mutate the operator in the specified gene BEGIN SELECT @strComparison = Alg.svfGetRandomOperator(@intAnalysisID, 'Comparison', RAND(), @bytType); UPDATE Alg.tvfCurrentGenes(@intGenomeID) SET fldComparison = @strComparison WHERE (fldCode = @bytGeneCode); END IF @strMutation IN ('Rebracket', 'Insert', 'Remove', 'Transpose') -- Rebracket the whole genome BEGIN EXEC Alg.uspGene_Rebracket @intGenomeID; END END GO /****** Object: StoredProcedure [Alg].[uspGene_Create] Script Date: 05/08/2024 11:19:01 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 21/12/2016 Description: Creates a single new gene record in tmpCurrentGene. It then calls uspCodonCreate a variable number of times, up to the maximum number of codons allowed. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspGene_Create] @intAnalysisID int, -- Used create the random variable and passed to uspCodon_Create @intGenomeID int, -- ID for the current genome. @bytGeneCode tinyint, -- Code for the new gene to be created @tblAnalysis as Alg.udttConfig READONLY -- Holds analysis parameters AS BEGIN SET NOCOUNT ON; DECLARE @intVariableID as int, -- Holds the ID of the new random variable. @bytType as tinyint, -- Holds the type of this variables @strLogical as nvarchar(1), -- Holds a random logical operator for the gene. @strComparison as nvarchar(10), -- Holds a random comparison operator based on the variable. @intGeneID as int, -- ID for the gene created here. @bytMaxCodons as tinyint, -- Used for controlling the codon creation loop. @bytCodonCounter as tinyint = 1, -- Counter for codon loop. @fltNewCodon as float, -- Probability of a new codon being created within the loop. @blnNewCodon as bit = 1; -- Boolean flag to control codon generation loop. -- First get a random variable and its type SELECT @intVariableID = Alg.svfGetRandomVariable(@intAnalysisID, RAND(), 0); SELECT @bytType = fldType FROM Alg.tvfVariable(@intAnalysisID) WHERE fldID = @intVariableID; -- Next get random logical and comparison operators SELECT @strLogical = Alg.svfGetRandomOperator(@intAnalysisID, 'Logical', RAND(), 0); SELECT @strComparison = Alg.svfGetRandomOperator(@intAnalysisID, 'Comparison', RAND(), @bytType); -- Use these to insert a new record into the temp gene table. INSERT INTO Alg.tmpGene ( fldGenomeID, fldCode, fldLogical, fldComparison, fldType) VALUES( @intGenomeID, @bytGeneCode, @strLogical, @strComparison, @bytType); SET @intGeneID = SCOPE_IDENTITY(); -- Now create up to the specified number of codons. SELECT @bytMaxCodons = CASE WHEN @bytType <> 4 THEN 1 ELSE fldValue END FROM @tblAnalysis WHERE fldText = 'Max Codons'; SELECT @fltNewCodon = fldValue/100 FROM @tblAnalysis WHERE fldText = 'New Codon'; WHILE (@bytCodonCounter <= @bytMaxCodons) AND (@blnNewCodon = 1) BEGIN EXEC Alg.uspCodon_Create @intAnalysisID, @intGeneID, @bytCodonCounter, @intVariableID -- Now increment the codon code and test for last codon SET @bytCodonCounter += 1 SET @blnNewCodon = CASE WHEN RAND() < @fltNewCodon THEN 1 ELSE 0 END; END -- Finally, bracket the codons in this gene EXEC Alg.uspCodon_Rebracket @intGeneID; RETURN @intGeneID; END GO /****** Object: StoredProcedure [Alg].[uspPopulation_StoreProgress] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 20/12/2016 Description: Stores the progress of the specified population during running. It also tests the current status and returns it. Amend date: 27/08/2020 Description: I have removed the test of "perfect" discriminations, because it's not! UNfortunately, the only way to detect perfect discriminations is either phi = 1 or from the classification table, neither of which are easily available at this stage. So, for now, I've just increased the threshold to score = 1, which will still work for single gene/codon rules. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspPopulation_StoreProgress] @intPopulationID as int, @dtmPopStart as datetime2(7), @intGenCurrent as int, @intGenMax as int, @fltStoppingRule as float AS BEGIN SET NOCOUNT ON; DECLARE @fltMaxScore as float, @intMaxScoreGen as int, @fltProgress as float, @fltTotal as float, @fltElapsed as float, @fltRemaining as float, @intStoppingGens as int, -- Number of generations without improvement that causes a stop. @bytStatus as tinyint; -- Status of the population cycle, can only set by user from front-end. -- First get the current maximum score and the first generation it was attained. SELECT @fltMaxScore = MAX(fldScore) FROM Alg.tmpGenome WHERE (fldPopulationID = @intPopulationID); SELECT @intMaxScoreGen = MIN(fldGeneration) FROM Alg.tmpGenome WHERE (fldPopulationID = @intPopulationID) AND (fldScore = @fltMaxScore); -- set the values of the local variables SET @fltProgress = (CAST(@intGenCurrent AS FLOAT) / CAST(@intGenMax AS FLOAT)); SET @fltElapsed = DATEDIFF(s, @dtmPopStart, GetDate()); SET @fltTotal = @fltElapsed / @fltProgress; SET @fltRemaining = @fltTotal - @fltElapsed; -- Insert them into tmpProgress and return the blnStopped flag INSERT INTO Alg.tmpProgress VALUES( @intPopulationID, @intGenCurrent, @fltMaxScore, @intMaxScoreGen, @fltElapsed, @fltRemaining); -- Finally, get the population status to see if it is still running (3) or user-cancelled (4) SELECT @bytStatus = fldStatus from BE.tblPopulation WHERE fldID = @intPopulationID; IF @bytStatus = 3 BEGIN -- Test if there has been no improvement for the specified number of generations -- then set the status to stopped (5). SET @intStoppingGens = CAST((CAST(@intGenMax as float) * @fltStoppingRule) as int); IF (@intGenCurrent - @intMaxScoreGen) > @intStoppingGens SET @bytStatus = 5; ELSE BEGIN -- Next test if a perfect discrimination has been reached (based on there being -- gene or codon penalties of at least 1%. NB: see amendment IF @fltMaxScore = 1 -- > 0.99 BEGIN SET @bytStatus = 5; END; ELSE BEGIN -- Finally test if the maximum number of generations has been reached and, if so, -- set the status to completed (6). IF @intGenCurrent >= @intGenMax SET @bytStatus = 6; END; END; END; RETURN @bytStatus; END GO /****** Object: StoredProcedure [Alg].[uspDiscrimination_SetTarget] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 15/12/2017 Description: Update the target column on #tblDataCrosstab using the expression(s) held in one of two source tables; a) BE.tblTarget or b) #tmpTargetValue. These values are provided in the second parameter. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspDiscrimination_SetTarget] @intDiscriminationID AS int, @strSource as nvarchar(100) AS BEGIN SET NOCOUNT ON; DECLARE @strSQL as nvarchar(max), @strParams as nvarchar(max); SET @strSQL = ' UPDATE #tblDataCrosstab SET fldTarget = NULL; UPDATE D SET fldTarget = T.fldBranch FROM ' + @strSource + ' AS T INNER JOIN #tblDataCrosstab AS D ON T.fldCode = D.fldTargetCode WHERE T.fldDiscriminationID = @intDiscriminationID; '; SET @strParams = '@intDiscriminationID int' EXECUTE sp_executesql @strSQL, @strParams, @intDiscriminationID; END GO /****** Object: StoredProcedure [Alg].[uspDiscrimination_Insert] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* ==================================================================================================== Author: Simon Poulton Create date: 07/12/2017 Description: Inserts a new discrimination record into the hierarchy based on the specified discrimination and branch. It also inserts the relevant target records. ==================================================================================================== */ CREATE PROCEDURE [Alg].[uspDiscrimination_Insert] @intDiscriminationID as int, -- Parent discrimination ID @blnBranch as bit -- True or False branch. AS BEGIN SET NOCOUNT ON; DECLARE @intAnalysisID as int, @hid as hierarchyid, @intNewDiscID as int; SELECT @intAnalysisID = fldAnalysisID, @hid = hierarchyid::Parse(fldBranch.ToString() + CAST(@blnBranch as nvarchar(1)) + '/') FROM BE.tblDiscrimination WHERE fldID = @intDiscriminationID; -- Insert the new discrimination record. INSERT INTO BE.tblDiscrimination (fldAnalysisID, fldBranch) VALUES(@intAnalysisID, @hid); SET @intNewDiscID = SCOPE_IDENTITY(); -- Insert the target records copied from the parent, setting all the new branch flags to false. INSERT INTO BE.tblTarget SELECT @intNewDiscID, fldValueID, ROW_NUMBER() OVER (ORDER BY fldCode), 0 FROM BE.tblTarget WHERE (fldDiscriminationID = @intDiscriminationID) AND (fldBranch = @blnBranch); RETURN @intNewDiscID; END GO /****** Object: StoredProcedure [Alg].[uspDiscrimination_EvaluateTargets] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* ==================================================================================================== Author: Simon Poulton Create date: 12/12/2017 Description: This procedure evaluates all potential target combinations for the specified discrimination. It uses three local temporary tables (#tmpTarget, #tmpTargetValue and #tmpTally and one process-keyed table (Alg.tmpPredictorGenome). It requires #tmpTally to simplify the coding in the dynamic SQL, but then these data are passed to a table variable for onward passing to the tvfGetGenomeScores. Firstly, the number of categories (records) for the discrimination are already held in tblTarget, where the values for fldBranch are all set to 0. If the number of categories is 2 then the second category is defined deterministically as the first target value. Otherwise, all combinations of targets are evaluated against all the pre-populated predictor genomes in two nested loops. Each evaluation generates a single Phi coefficient which is stored back in tmpPredictorGenomes. At the end of the inner loop the average of the top 20% of scores is stored in #tblTarget. At the end of the outer loop, the values of fldBranch in tblTarget are updated to 1 in all the records in tblTarget that belong to the highest scoring target. Amend Date: 18/01/2017 Description: Included fldFilter in tblTarget at line 104. ==================================================================================================== */ CREATE PROCEDURE [Alg].[uspDiscrimination_EvaluateTargets] @intDiscriminationID as int AS BEGIN SET NOCOUNT ON; -- **** INITIALISE THE TEMP TABLES AND VARIABLES FOR THIS PROC ************************************ -- Table to hold the unique combinations of target categories, defined deterministically by the -- number of categories - looked up in tblLookupMultinomials. CREATE TABLE #tmpTarget( fldDiscrimination tinyint, fldScore float); -- Table to hold the actual values generated for each potential discrimination. CREATE TABLE #tmpTargetValue( fldDiscrimination tinyint, fldCode tinyint, fldBranch bit); -- Table to hold the 2 x 2 tallies of the binary flags in #tmpEvaluate, which are then passed to -- local variables for scoring. CREATE TABLE #tmpTally ( fldTarget int, fldPredictor int, fldN int); -- Local variables used for controlling flow of the procedure. DECLARE @intAnalysisID as int, @bytCategories as tinyint, @bytTargets as tinyint, @bytTargetCounter as tinyint, @bytPredictors as tinyint, @bytPredictCounter as tinyint, @bytTopTarget as tinyint; DECLARE --@strTarget as varchar(max), @strOperator as nvarchar (max), @strOperand as nvarchar (max), @strPredictor as varchar(max), @strSQL as nvarchar(max); -- Variables used to calculate the Phi coefficient. DECLARE @tblTally as Alg.udttGenomeScore, @Phi as float; --- **** END OF INITIALISATIONS ****************************************************************** -- Get the Analysis ID, the number of predictors and the number of categories in the discrimination. SELECT @intAnalysisID = fldAnalysisID FROM BE.tblDiscrimination WHERE fldID = @intDiscriminationID; SELECT @bytPredictors = COUNT(fldRN) FROM Alg.tvfPredictorGenome(@intAnalysisID); SELECT @bytCategories = COUNT(fldID) FROM BE.tblTarget WHERE fldDiscriminationID = @intDiscriminationID; --SELECT @bytCategories = COUNT(DISTINCT fldValueID) FROM #tblDataCrosstab; -- If there are only two categories then just create the deterministic target expression and return. IF @bytCategories = 2 BEGIN UPDATE BE.tblTarget SET fldBranch = 1 WHERE (fldDiscriminationID = @intDiscriminationID) AND (fldCode = 1); RETURN; END -- **** POPULATE THE TWO TEMP TARGET TABLES *************************************************** -- Populate #tmpTarget with the value combinations from the multinomial lookup table -- and return the number of records. INSERT INTO #tmpTarget SELECT DISTINCT fldDiscrimination, NULL FROM tblLookupMultinomials WHERE fldCategories = @bytCategories; SELECT @bytTargets = COUNT(fldDiscrimination) FROM #tmpTarget; -- Populate #tmpTargetValue with all the value IDs specified by the multinomial lookup table. INSERT INTO #tmpTargetValue SELECT LU.fldDiscrimination, LU.fldCode, LU.fldBranch FROM tblLookupMultinomials AS LU INNER JOIN BE.tblTarget AS T ON LU.fldCode = T.fldCode WHERE (LU.fldCategories = @bytCategories) AND (T.fldDiscriminationID = @intDiscriminationID); -- **** EVALUEATE EACH predictor against each potential target *********************************** -- Loop through all the potential target combinations in #tmpTarget. SET @bytTargetCounter = 1; WHILE @bytTargetCounter <= @bytTargets BEGIN -- Update the target column in #tblDataCrosstab UPDATE #tblDataCrosstab SET fldTarget = NULL; UPDATE DC SET fldTarget = TV.fldBranch FROM #tblDataCrosstab AS DC INNER JOIN BE.tblTarget AS T ON DC.fldValueID = T.fldValueID INNER JOIN #tmpTargetValue AS TV ON T.fldCode = TV.fldCode WHERE (T.fldDiscriminationID = @intDiscriminationID) AND (TV.fldDiscrimination = @bytTargetCounter); --select * into ##tmpEvoalg from #tblDataCrosstab; --drop table ##tmpEvoalg; -- Now loop through all the predictor records in tmpPredictorGenome. Note that this loop only uses -- the value IDs not the actual values (held in Varnnn...V) because no arithemtic operators are used. SELECT @bytPredictCounter = 1; WHILE @bytPredictCounter <= @bytPredictors BEGIN -- Create the predictor string for the current record. SELECT @strOperator = 'Var' + CAST(fldVariableID as varchar(10)), @strOperand = ' ' + fldComparison + ' ' + CAST(fldValueID as varchar(10)) FROM Alg.tvfPredictorGenome(@intAnalysisID) WHERE (fldRN = @bytPredictCounter); SET @strPredictor = ' CASE WHEN (' + @strOperator + ' IS NULL) THEN -1 ELSE CASE WHEN (' + @strOperator + @strOperand + ') THEN 1 ELSE 0 END END'; -- Concatenate the SQL string needed to evaluate #tmpDataCrosstab and store the value in #tmpTally DELETE FROM #tmpTally; SELECT @strSQL = 'INSERT INTO #tmpTally SELECT fldTarget, ' + @strPredictor + ' AS fldPredictor, COUNT(fldRecordID) AS fldN FROM #tblDataCrosstab WHERE fldTarget IS NOT NULL GROUP BY fldTarget, ' + @strPredictor + '; '; EXEC(@strSQL); -- Now get the phi coefficient and update the Predictor genome record with this score. INSERT INTO @tblTally SELECT * FROM #tmpTally; SELECT @phi = fldPhi FROM Alg.tvfGetGenomeScores(@tblTally, 'Target Evaluation', 0, 0, 0, 0); UPDATE Alg.tvfPredictorGenome(@intAnalysisID) SET fldScore = @Phi WHERE fldRN = @bytPredictCounter; SET @bytPredictCounter += 1; END; -- Calculate the average scores of the top 20% genomes and save them in the tmpTarget table. WITH FirstPass AS ( SELECT TOP 20 PERCENT fldScore FROM Alg.tvfPredictorGenome(@intAnalysisID) ORDER BY fldScore DESC) UPDATE #tmpTarget SET fldScore = (SELECT AVG(fldScore) FROM FirstPass) WHERE fldDiscrimination = @bytTargetCounter; SET @bytTargetCounter += 1; END; --select * from #tmpTarget; -- **** STORE THE RESULTS ********************************************************************** -- Update BE.tblTarget with the flag from fldBranch in #tmpTargetValue, using the highest -- scoring target in #tmpTarget. SELECT TOP 1 @bytTopTarget = fldDiscrimination FROM #tmpTarget ORDER BY fldScore DESC UPDATE T SET fldBranch = TV.fldBranch FROM #tmpTargetValue AS TV INNER JOIN BE.tblTarget AS T ON TV.fldCode = T.fldCode WHERE (T.fldDiscriminationID = @intDiscriminationID) AND (TV.fldDiscrimination = @bytTopTarget); END GO /****** Object: StoredProcedure [Alg].[uspDiscrimination_BuildNode] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 08/12/2017 Description: Encapsulates the three stored procs for building a new node in the discrimination hierarchy for the specified parent discrimination and branch. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspDiscrimination_BuildNode] @intDiscriminationID as int, @blnBranch as bit AS BEGIN SET NOCOUNT ON; -- Set the current node flag to true IF @blnBranch = 0 UPDATE BE.tblDiscrimination SET fldNodeFalse = 1 WHERE fldID = @intDiscriminationID; ELSE UPDATE BE.tblDiscrimination SET fldNodeTrue = 1 WHERE fldID = @intDiscriminationID; -- Create a new discrimination (with target records) and return the ID. EXEC @intDiscriminationID = Alg.uspDiscrimination_Insert @intDiscriminationID, @blnBranch; -- Evaluate the potential targets for the new discrimination. EXEC Alg.uspDiscrimination_EvaluateTargets @intDiscriminationID; RETURN @intDiscriminationID; END GO /****** Object: StoredProcedure [Alg].[uspCodon_Rebracket] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* ======================================================================================================= Author: Simon Poulton Create date: 29/11/2016 Description: BRILLIANT CODING!!! I've finally cracked how to specify and store the information required for bracketing these expressions. It is based on FIVE logical rules; a) there will always be n - 1 pairs of brackets. b) The count of all the left brackets must equal the count of all the right brackets. c) Any one term must have left or right brackets - not both or neither. d Any one term can have between zero and n - 1 brackets. e) The whole genome expression must begin with a term with left brackets and end with a term with right brackets, but apart from this the terms can be in any order. ======================================================================================================= */ CREATE PROCEDURE [Alg].[uspCodon_Rebracket] @intGeneID int AS BEGIN SET NOCOUNT ON; DECLARE @bytRecords as tinyint, @bytCode as tinyint, @bytLefts as tinyint = 0, @bytRights as tinyint = 0; -- First get the number of codon records in the current gene. SELECT @bytRecords = COUNT(fldCode) FROM Alg.tmpCodon WHERE (fldGeneID = @intGeneID); -- Clear the current genes of brackets and set the order to unity. UPDATE Alg.tmpCodon SET fldLeft = '', fldRight = '', fldOrder = 1 WHERE (fldGeneID = @intGeneID); IF @bytRecords > 1 -- Only add brackets if more than one record BEGIN -- Add n-1 left brackets randomly to the records WHILE @bytLefts < (@bytRecords - 1) BEGIN SET @bytCode = Alg.svfGetRandomCode(RAND(), @bytRecords) UPDATE Alg.tmpCodon SET fldLeft += '(', fldOrder = CASE WHEN @bytLefts = 0 THEN 0 ELSE fldOrder END WHERE ((fldGeneID = @intGeneID) AND (fldCode = @bytCode)); SET @bytLefts += 1; END -- Now add one right bracket to all records with no left brackets and count them. UPDATE Alg.tmpCodon SET fldRight += ')' WHERE ((fldGeneID = @intGeneID) AND (fldLeft = '')); SET @bytRights = @@ROWCOUNT; -- Finally, randomly add the remaining rights (up to n-1) to records with no lefts. WHILE @bytRights < (@bytRecords - 1) BEGIN SET @bytCode = Alg.svfGetRandomCode(RAND(), @bytRecords) -- Test to ensure that only records with no lefts are selected. IF (SELECT fldLeft FROM Alg.tmpCodon WHERE ((fldGeneID = @intGeneID) AND (fldCode = @bytCode))) = '' BEGIN UPDATE Alg.tmpCodon SET fldRight += ')', fldOrder = CASE WHEN @bytRights = (@bytRecords - 2) THEN 2 ELSE fldOrder END WHERE ((fldGeneID = @intGeneID) AND (fldCode = @bytCode)); SET @bytRights += 1; END END END ELSE -- Just set the order to zero for evaluation. BEGIN UPDATE Alg.tmpCodon SET fldOrder = 0 WHERE (fldGeneID = @intGeneID); END END GO /****** Object: StoredProcedure [Alg].[uspCodon_Evolve] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 23/12/2016 Description: Takes the specified Gene and evolves it according to a random mutation. Note that if this is explicitly 'Rebracket' or one of the first three in the list, then it calls the Rebracket proc. It does not do this for the arithemtic and value mutations. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspCodon_Evolve] @intAnalysisID int, -- Used only in uspGene_Create for insert. @intPopulationID int, -- Used only in svfGetRandomGene for transpose. @intGenomeID int, -- ID of current genome to be evolved. @tblAnalysis as Alg.udttConfig READONLY -- Holds analysis parameters AS BEGIN SET NOCOUNT ON; -- Declare variables used for returning the random mutation. DECLARE @bytCurrentGenes as tinyint, -- Number of genes in the current genome. @bytGeneCode tinyint, -- Code of the randomly selected gene that will be evolved. @intGeneID as int, -- ID of the randomly selected gene. @bytType tinyint, -- Type of gene to be evolved, used in svfGetRandomMutation and strArithmetic. @bytCurrentCodons tinyint, -- Number of codons in the current gene. @bytMaxCodons tinyint, -- Maximum number of codons per gene, used to get strMutation. @strMutation nvarchar(10); -- Type of evolutionary mutation to apply to the codon. -- Local variables used in the various mutations. DECLARE @bytNewCodonCode tinyint, -- Incremented codon code, used for insert and transpose. @bytCodonCode as tinyint, -- Code for random codon to be evolved @intOldCodonID as int, -- Random codon ID from the breeding population, only used for transpose. @strArithmetic nvarchar(1), -- Random arithmetic operator. @intVariableID as int, -- Only used for getting random value. @intValueID as int; -- Random Value ID. -- First get the random mutation. SELECT @bytCurrentGenes = COUNT(fldCode) FROM Alg.tvfCurrentGenes(@intGenomeID); SELECT @bytGeneCode = Alg.svfGetRandomCode(RAND(), @bytCurrentGenes); SELECT @intGeneID = fldID, @bytType = fldType FROM Alg.tvfCurrentGenes(@intGenomeID) WHERE fldCode = @bytGeneCode; SELECT @bytCurrentCodons = COUNT(fldCode) FROM Alg.tvfCurrentCodons(@intGeneID); SELECT @bytMaxCodons = fldValue FROM @tblAnalysis WHERE fldText = 'Max Codons'; SELECT @strMutation = Alg.svfGetRandomMutation(@tblAnalysis, 'Codon', RAND(), @bytType, CASE WHEN @bytCurrentCodons = 1 THEN 1 ELSE 0 END, CASE WHEN @bytCurrentCodons = @bytMaxCodons THEN 1 ELSE 0 END); SELECT @bytCodonCode = Alg.svfGetRandomCode(RAND(), @bytCurrentCodons); IF @strMutation = 'Insert' -- Create an entirely new codon by calling uspCodon_Create. Note the setting of @intVariableID to 0 -- so that uspCodon_Create can get a random variableID BEGIN SELECT @bytNewCodonCode = MAX(fldCode) + 1 FROM Alg.tvfCurrentCodons(@intGeneID); EXEC Alg.uspCodon_Create @intAnalysisID, @intGeneID, @bytNewCodonCode, 0; END IF @strMutation = 'Remove' -- Delete the specified codon from tmpCurrentCodon and decrement the codons with greater codes. BEGIN DELETE FROM Alg.tvfCurrentCodons(@intGeneID) WHERE (fldCode = @bytCodonCode); UPDATE Alg.tvfCurrentCodons(@intGeneID) SET fldCode -= 1 WHERE (fldCode > @bytCodonCode); END IF @strMutation = 'Transpose' -- Copy an existing codon from the breeding population into tmpCurrentCodon. BEGIN -- Get a random codon from the tmpBreedingPopulation and copy it into tmpCurrentCodon SELECT @bytNewCodonCode = MAX(fldCode) + 1 FROM Alg.tvfCurrentCodons(@intGeneID); SELECT @intOldCodonID = Alg.svfGetRandomCodon(@intPopulationID, RAND()); INSERT INTO Alg.tmpCodon( fldGeneID, fldCode, fldLeft, fldRight, fldOrder, fldArithmetic, fldVariableID, fldValueID) SELECT @intGeneID, @bytNewCodonCode, fldLeft, fldRight, fldOrder, fldArithmetic, fldVariableID, fldValueID FROM Alg.tmpCodon WHERE fldID = @intOldCodonID; END IF @strMutation = 'Arithmetic' -- Randomly mutate the arithmetic in the specified codon BEGIN SELECT @strArithmetic = Alg.svfGetRandomOperator(@intAnalysisID, 'Arithmetic', RAND(), @bytType); UPDATE Alg.tvfCurrentCodons(@intGeneID) SET fldArithmetic = @strArithmetic WHERE (fldCode = @bytCodonCode); END IF @strMutation = 'Value' -- Randomly mutate the valueID in the specified codon BEGIN SELECT @intVariableID = fldVariableID FROM Alg.tvfCurrentCodons(@intGeneID) WHERE (fldCode = @bytCodonCode); SELECT @intValueID = Alg.svfGetRandomValue(@intAnalysisID, @intVariableID, RAND(), 0) UPDATE Alg.tvfCurrentCodons(@intGeneID) SET fldValueID = @intValueID WHERE (fldCode = @bytCodonCode); END IF @strMutation IN ('Rebracket', 'Insert', 'Remove', 'Transpose') -- Rebracket all the codons in the gene BEGIN EXEC Alg.uspCodon_Rebracket @intGeneID; END END GO /****** Object: StoredProcedure [Alg].[uspCodon_Create] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* ====================================================================================================== Author: Simon Poulton Create date: 22/12/2016 Description: Creates a single new codon record in tmpCurrentCodon. The variable ID will have been selected in the calling proc, but if this not is the first codon in a gene, then a new numeric variable will be randomly selected. ====================================================================================================== */ CREATE PROCEDURE [Alg].[uspCodon_Create] @intAnalysisID int, @intGeneID int, -- The ID of the gene to which this codon belongs. @bytCodonCode tinyint, -- The sequential code for the codon within the gene. @intVariableID as int -- ID of the new random variable created. AS BEGIN SET NOCOUNT ON; DECLARE @strArithmetic as nvarchar(1), -- Holds the arithmetic operator for the codon. @intValueID as int; -- Holds the ID of a random value. -- Get random values for the arithmetic operator, valueID and optionally the VariableID. IF @bytCodonCode > 1 SELECT @intVariableID = Alg.svfGetRandomVariable(@intAnalysisID, RAND(), 1); SELECT @strArithmetic = Alg.svfGetRandomOperator(@intAnalysisID, 'Arithmetic', RAND(), 0); SELECT @intValueID = Alg.svfGetRandomValue(@intAnalysisID, @intVariableID, RAND(), 0); -- Insert these into a new record in tmpCodon. INSERT INTO Alg.tmpCodon( fldGeneID, fldCode, fldArithmetic, fldVariableID, fldValueID) VALUES( @intGeneID, @bytCodonCode, @strArithmetic, @intVariableID, @intValueID); END GO /****** Object: StoredProcedure [Alg].[uspAnalysis_Initialise] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 07/12/2017 Description: Initialises the specified ANALYSIS. It sets the status of the analysis and primes the random number seed. This ensures that each analysis will have a different sequence of random numbers. The actual variable is not used. Secondly, it populates the three process-keyed tables for the specified analysis, which are both invariant tables for the whole analysis: 1) tmpAnalysisConfig, which holds the user-specified parameters for the analysis. 2) tmpVariable, which takes the PREDICTOR variables specified in BE.tblAnalysisPredictor. 3) tmpValue, which holds all the unique values for these variables as defined in #tblDataCrosstab. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspAnalysis_Initialise] @intAnalysisID AS int AS BEGIN SET NOCOUNT ON; -- **** DECLARATIONS AND INITIALISATION ******************************************************* DECLARE @intVariableID as int; DECLARE @strAnalysisID as varchar(10), @strSQL as nvarchar(max), @strVar as nvarchar(100); -- Declare and prime the random number seed. DECLARE @fltSeed as float; SET @fltSeed = rand(checksum(newid())); -- Next set the status of the analysis to 3 (running) and the datetime UPDATE BE.tblAnalysis SET fldStatus = 3, fldRun = GETDATE() WHERE fldID = @intAnalysisID; SET @strAnalysisID = CAST(@intAnalysisID as varchar(10)); -- **** POPULATE THE SPID TEMPORARY TABLES USED IN THE ANALYSIS ******************************* DELETE FROM Alg.tvfAnalysisConfig(@intAnalysisID); INSERT INTO Alg.tvfAnalysisConfig(@intAnalysisID) SELECT @intAnalysisID, fldCategory, fldText, fldValue FROM BE.uvwAnalysisConfig WHERE fldAnalysisID = @intAnalysisID; DELETE FROM Alg.tvfVariable(@intAnalysisID); INSERT INTO Alg.tvfVariable(@intAnalysisID) SELECT @intAnalysisID, ROW_NUMBER() OVER(ORDER BY V.fldID) AS RN, V.fldID, V.fldName, V.fldType, CASE WHEN fldType <= 2 THEN 1 ELSE 0 END, NULL FROM BE.tblVariable AS V INNER JOIN BE.tblAnalysisPredictor AS AP ON v.fldID = ap.fldVariableID WHERE (AP.fldAnalysisID = @intAnalysisID) AND (AP.fldSelected = 1); DELETE FROM Alg.tvfValue(@intAnalysisID); -- Create a cursor from all the records just inserted into tvfVariable. DECLARE csrVariable CURSOR FAST_FORWARD FOR SELECT fldID FROM Alg.tvfVariable(@intAnalysisID) ORDER BY fldRN; OPEN csrVariable; FETCH NEXT FROM csrVariable INTO @intVariableID; WHILE @@FETCH_STATUS = 0 BEGIN SET @strVar = 'Var' + CAST(@intVariableID AS varchar(100)); SET @strSQL = ' WITH FirstPass AS ( SELECT DISTINCT ' + @strVar + ' AS fldID FROM #tblDataCrosstab ) INSERT INTO Alg.tvfValue(' + @strAnalysisID + ') SELECT ' + @strAnalysisID + ', V.fldVariableID, ROW_NUMBER() OVER (PARTITION BY V.fldVariableID ORDER BY V.fldCode) AS RN, V.fldID, V.fldCode, V.fldText FROM BE.tblValue AS V INNER JOIN FirstPass AS FP ON V.fldID = FP.fldID;'; EXEC(@strSQL); FETCH NEXT FROM csrVariable INTO @intVariableID; END CLOSE csrVariable; DEALLOCATE csrVariable; -- Use this table to update the temp variables table with the number of unique values UPDATE V SET fldUniqueValues = VAL.fldUniqueValues FROM Alg.tvfVariable(@intAnalysisID) AS V INNER JOIN (SELECT fldVariableID, COUNT(fldRN) AS fldUniqueValues FROM Alg.tvfValue(@intAnalysisID) GROUP BY fldVariableID) AS VAL ON V.fldID = VAL.fldVariableID; END GO /****** Object: StoredProcedure [Alg].[uspAnalysis_Deinitialise] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create Date: 22/01/2018 Description: Just clears up the process-keyed tables and #tmpDataCrosstab from uspAnalysis_Run and uspDiscrimination_BuildHierarchy =================================================================================================== */ CREATE PROCEDURE [Alg].[uspAnalysis_Deinitialise] @intAnalysisID as int AS BEGIN SET NOCOUNT ON; DELETE FROM alg.tvfVariable(@intAnalysisID); DELETE FROM alg.tvfValue(@intAnalysisID); DELETE FROM alg.tvfAnalysisConfig(@intAnalysisID); DELETE FROM alg.tvfPredictorGenome(@intAnalysisID); END GO /****** Object: StoredProcedure [Alg].[uspAnalysis_CreatePredictorGenomes] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 08/12/2017 Description: Populates the temp table with random predictor genomes, with user-specified number of records. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspAnalysis_CreatePredictorGenomes] ( @intAnalysisID int ) AS BEGIN SET NOCOUNT ON; DECLARE @intPredictVarID as int, @strComparison as nvarchar(10), @intPredictValID as int, @bytType as tinyint; DECLARE @intPredictors as int, @intPredictCounter as int = 1; SELECT @intPredictors = Alg.svfGetAnalysisConfiguration(@intAnalysisID, 'Discriminating Pop'); DELETE FROM Alg.tvfPredictorGenome(@intAnalysisID); WHILE @intPredictCounter <= @intPredictors BEGIN SET @intPredictVarID = Alg.svfGetRandomVariable(@intAnalysisID, RAND(), 0); SELECT @bytType = fldType FROM Alg.tvfVariable(@intAnalysisID) WHERE fldID = @intPredictVarID; SET @strComparison = Alg.svfGetRandomOperator(@intAnalysisID, 'Comparison', RAND(), @bytType); SET @intPredictValID = Alg.svfGetRandomValue(@intAnalysisID, @intPredictVarID, RAND(), 0); INSERT INTO Alg.tmpPredictorGenome VALUES ( @intAnalysisID, @intPredictCounter, @intPredictVarID, @strComparison, @intPredictValID, NULL); SET @intPredictCounter += 1; END; END GO /****** Object: StoredProcedure [Alg].[uspDiscrimination_Run] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* ============================================================================================================= Author: Simon Poulton Create date: 14/12/2017 Description: New procedure to run a complete discrimination. This is the controlling proc for the whole evolutionalry algorithm. It has been derived from uspAnalysis_Run, which now simply creates the temp data evalution table and derives the multinomial targets. Amend Date: 01/03/2018 Description: I have included a new sub-routine to set fldTargetRandom in #tblDataCrosstab. This is only run if the @blnRandomise flag is true. I have also created a sub-routine to toggle between real and randomised populations, which is passed as a new parameter to uspPopulation_Create, uspPopulation_Evolve and uspPopulation_StoreResult. Amend Date: 08/09/2020 Description: I have modified the variables @bytPopulations and @bytPopCounter to be small integers, thereby allowing more than 255 populations to be run in a single analysis. For now, I have left the name ============================================================================================================= */ CREATE PROCEDURE [Alg].[uspDiscrimination_Run] @intDiscriminationID as int AS BEGIN SET NOCOUNT ON; -- **** DECLARATIONS ************************************************************************************ DECLARE @intAnalysisID as int, -- ID of the parent analysis. @blnRandomise as bit, -- Randomisation flag for the parent analysis. @blnRandomPop as bit, -- Flag to indicate population is based on random target. @intTarget1 as int, -- Number of Traget-True records. @bytDiscStatus as tinyint = 3; -- Status of the current discrimination. -- Table variable for the analysis configuration and probabilities. DECLARE @tblAnalysis as Alg.udttConfig -- Holds analysis parameters, passed to sub procs. -- Local variables to hold user-defined values from @tblAnalysis. DECLARE @intPopulations as smallint, -- User-defined number of populations. @intGenerations as smallint, -- Maximum number of generations within a population @intProgressInterval as smallint, -- Number of generations between storing progress. @fltStoppingRule as float; -- Passed to uspPopulation_StoreProgress. -- Local variables for controlling the flow and progress of the analysis. DECLARE @intPopCounter as smallint, -- Counter for looping through the specified populations. @dtmPopStart as datetime2(7), -- Start time of each population cycle. @bytPopStatus as tinyint, -- Status of population, set in uspPopulation_StoreProgress. @intGenCounter as smallint, -- Counter for looping through the generations within a population @intPopulationID as int; -- ID of the population created in uspPopulation_Create. -- **** END OF DECLARATIONS ***************************************************************************** -- **** INITIALISE THE DISCRIMINATION ******************************************************************* SELECT @intAnalysisID = fldAnalysisID FROM BE.tblDiscrimination WHERE fldID = @intDiscriminationID; SELECT @blnRandomise = fldTargetRandomise FROM BE.tblAnalysis WHERE fldID = @intAnalysisID; UPDATE BE.tblDiscrimination SET fldStatus = @bytDiscStatus WHERE fldID = @intDiscriminationID; -- Clear any child populations still remaining DELETE FROM BE.tblPopulation WHERE fldDiscriminationID = @intDiscriminationID; -- Now get the analysis parameters from tblAnalysisConfig and tblAnalysisPredictor -- and save them in @tblAnalysis INSERT INTO @tblAnalysis SELECT fldCategory, fldText, fldValue FROM BE.uvwAnalysisConfig WHERE fldAnalysisID = @intAnalysisID; -- Now get the local variables from the analysis table variable. SELECT @intPopulations = fldValue FROM @tblAnalysis WHERE fldText = 'Populations'; SELECT @intGenerations = fldValue FROM @tblAnalysis WHERE fldText = 'Generations'; SELECT @intProgressInterval = fldValue FROM @tblAnalysis WHERE fldText = 'Progress Interval'; SELECT @fltStoppingRule = fldValue/100 FROM @tblAnalysis WHERE fldText = 'Stopping Rule'; -- Set the target column in #tblDataCrosstab - this is constant for the whole discrimination. UPDATE #tblDataCrosstab SET fldTarget = NULL; UPDATE DC SET fldTarget = T.fldBranch FROM #tblDataCrosstab AS DC INNER JOIN BE.tblTarget AS T ON DC.fldValueID = T.fldValueID WHERE T.fldDiscriminationID = @intDiscriminationID; -- New sub-routine to set the randomised target. IF @blnRandomise = 1 BEGIN SELECT @intTarget1 = COUNT(fldTarget) FROM #tblDataCrosstab WHERE fldTarget = 1; UPDATE #tblDataCrosstab SET fldTargetRandom = NULL, fldRandom = NULL; UPDATE #tblDataCrosstab SET fldRandom = RAND(CAST(NEWID() as varbinary)), fldTargetRandom = 0 WHERE fldTarget IS NOT NULL; WITH FirstPass AS ( SELECT TOP(@intTarget1) * FROM #tblDataCrosstab ORDER BY fldRandom DESC) UPDATE FirstPass SET fldTargetRandom = 1; END; --select * from #tblDataCrosstab; -- **** END OF INITIALISATIONS ****************************************************************************** -- **** POPULATION LOOP ************************************************************************************* -- Loop through the specified number of populations, as long as the discrimination status is still running. SET @blnRandomPop = 0; SET @intPopCounter = 1; WHILE (@intPopCounter <= @intPopulations) AND (@bytDiscStatus = 3) BEGIN -- Initialise the population run SET @dtmPopStart = sysdatetime(); SET @bytPopStatus = 3; SET @intGenCounter = 1; -- Create the population for the specified discrimination and store the ID EXEC @intPopulationID = Alg.uspPopulation_Create @intAnalysisID, @intDiscriminationID, @blnRandomPop, @tblAnalysis -- Loop through the specified number of generations WHILE (@intGenCounter <= @intGenerations ) AND (@bytPopStatus = 3) BEGIN -- Call the population evolving routine EXEC Alg.uspPopulation_Evolve @intAnalysisID, @intPopulationID, @intGenCounter, @blnRandomPop, @tblAnalysis; -- Set the progress in tmpProgress and return the population status IF (@intGenCounter % @intProgressInterval) = 0 EXEC @bytPopStatus = Alg.uspPopulation_StoreProgress @intPopulationID, @dtmPopStart, @intGenCounter, @intGenerations, @fltStoppingRule; SET @intGenCounter += 1; END -- Store the population results (reducing the generation counter byt one to represent the end of the previous block). SET @intGenCounter -= 1; EXEC Alg.uspPopulation_StoreResult @intAnalysisID, @intPopulationID, @bytPopStatus, @intGenCounter, @blnRandomPop, @tblAnalysis; -- Get the status of this discrimination, clear the progress table and, increment the pop counter. SELECT @bytDiscStatus = fldStatus FROM BE.tblDiscrimination WHERE fldID = @intDiscriminationID; DELETE FROM Alg.tvfProgress(@intAnalysisID); --SET @intPopCounter += 1; -- If randomisation is not set for the analysis, then just increment the population counter. -- If it is set, toggle @blnRandomPop and only increment the population counter if WAS 1. IF @blnRandomise = 0 SET @intPopCounter += 1 ELSE BEGIN IF @blnRandomPop = 0 set @blnRandomPop = 1 ELSE BEGIN SET @blnRandomPop = 0; SET @intPopCounter += 1 END END; -- Delete the temporary tables for the currrent population DELETE FROM Alg.tmpGenome where fldPopulationID = @intPopulationID; DELETE FROM Alg.tmpProgress where fldPopulationID = @intPopulationID; END -- **** END OF POPULATION LOOP ****************************************************************************** -- Update the status in tblDiscrimination if it has completed. UPDATE BE.tblDiscrimination SET fldStatus = CASE WHEN fldStatus = 3 THEN 6 ELSE fldStatus END WHERE fldID = @intDiscriminationID; END --INSERT INTO @tblAnalysis -- SELECT 'Analysis', 'New Genomes', 2 * COUNT(fldID) -- FROM BE.tblAnalysisPredictor -- WHERE fldAnalysisID = @intAnalysisID AND fldSelected = 1; GO /****** Object: StoredProcedure [Alg].[uspPopulation_StoreResult] Script Date: 05/08/2024 11:19:02 ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO /* =================================================================================================== Author: Simon Poulton Create date: 27/12/2016 Description: Stores the results of the specified population. The first two parameters are just the IDs passed down the calling stack. The third and fourth parameters are used to update tblPopulation. The fifth parameter is the flag for whether these are the results of the real or random target records, which is passed on to uspGenome_Evaluate. =================================================================================================== */ CREATE PROCEDURE [Alg].[uspPopulation_StoreResult] @intAnalysisID as int, @intPopID as int, @intPopStatus as int, @intGeneration as int, @blnRandomPop bit, @tblAnalysis as Alg.udttConfig READONLY AS BEGIN SET NOCOUNT ON; -- **** DECLARATIONS ************************************************************************** DECLARE @tblTally as Alg.udttGenomeScore; DECLARE @intTempGenomeID as int, -- used locally to hold the ID of the highest scoring genome. @intPermGenomeID as int; -- used to hold the ID of the Genome added to BE.tblGenome. DECLARE @fltGenePenalty as float, @fltCodonPenalty as float, @bytGenes as tinyint, -- Number of genes created by call to uspGenome_Create. @bytCodons as tinyint, -- Number of codons created by call to uspGenome_Create. @fltPhi as float; -- Holds the phi coefficient. -- **** GET THE HIGEST SCORING GENOME IN THE POPULATION AND SAVE THE RESULTS IN THE GENOME HIERARCHY **** -- First populate the local variables SELECT TOP 1 @intTempGenomeID = fldID FROM Alg.tmpGenome WHERE fldPopulationID = @intPopID ORDER BY fldScore DESC, fldID ASC; -- First update tblPopulation with the progress details. WITH FirstPass (fldMaxScore, fldMaxScoreGen, fldElapsed) AS ( SELECT TOP 1 fldMaxScore, fldMaxScoreGen, fldElapsed FROM Alg.tvfProgress(@intAnalysisID) ORDER BY fldGeneration DESC ) UPDATE BE.tblPopulation SET fldStatus = @intPopStatus, fldGenerationCompleted = @intGeneration, fldGenerationMax = FP.fldMaxScoreGen, fldMaxScore = FP.fldMaxScore, fldElapsed = FP.fldElapsed FROM BE.tblPopulation AS P, FirstPass AS FP WHERE fldID = @intPopID; -- Now copy the highest scoring record from tmpGenome into BE.tblGenome. INSERT INTO BE.tblGenome (fldPopulationID, fldGeneration, fldCode, fldScore) SELECT fldPopulationID, fldGeneration, fldCode, fldScore FROM Alg.tmpGenome WHERE fldID = @intTempGenomeID; SET @intPermGenomeID = SCOPE_IDENTITY(); -- And copy all its child records from tmpGene and tmpCodon. INSERT INTO BE.tblGene (fldGenomeID, fldCode, fldLogical, fldLeft, fldRight, fldOrder, fldComparison, fldType) SELECT @intPermGenomeID, fldCode, fldLogical, fldLeft, fldRight, fldOrder, fldComparison, fldType FROM Alg.tmpGene WHERE fldGenomeID = @intTempGenomeID; INSERT INTO BE.tblCodon ( fldGeneID, fldCode, fldLeft, fldRight, fldOrder, fldArithmetic, fldVariableID, fldValueID) SELECT P.fldID AS fldGeneID, C.fldCode, C.fldLeft, C.fldRight, C.fldOrder, C.fldArithmetic, C.fldVariableID, C.fldValueID FROM Alg.tmpGene AS T INNER JOIN Alg.tmpCodon AS C ON T.fldID = C.fldGeneID INNER JOIN BE.tblGene AS P ON T.fldCode = P.fldCode WHERE T.fldGenomeID = @intTempGenomeID AND P.fldGenomeID = @intPermGenomeID -- **** SCORE THE HIGHEST SCORING GENOME AND STORE THE RESULTS ******************************** -- First populate the local variables SELECT @fltGenePenalty = (fldValue / 1000) FROM @tblAnalysis WHERE fldText = 'Gene Penalty'; SELECT @fltCodonPenalty = (fldValue / 1000) FROM @tblAnalysis WHERE fldText = 'Codon Penalty'; SELECT @bytGenes = COUNT(fldID) FROM Alg.tmpGene WHERE fldGenomeID = @intTempGenomeID; SELECT @bytCodons = COUNT(G.fldID) FROM Alg.tmpGene AS G INNER JOIN Alg.tmpCodon AS C ON G.fldID = C.fldGeneID WHERE G.fldGenomeID = @intTempGenomeID; -- Now call uspGenomeEvaluate to get the table of tallies, which is used below and -- stored in tblGenomeClassification INSERT INTO @tblTally EXEC Alg.uspGenome_Evaluate @intAnalysisID, @intTempGenomeID, @blnRandomPop; INSERT INTO BE.tblGenomeClassification SELECT @intPermGenomeID, 1, fldTarget, fldPredictor, fldN FROM @tblTally; -- Next get the scores and stored them in tblGenomeScore. -- Note use of TRY/CATCH to capture errors (e.g. divide by zero) in the function. BEGIN TRY INSERT INTO BE.tblGenomeScore SELECT @intPermGenomeID, 1, fldPhi, fldRn, fldChiSq, fldChiP FROM Alg.tvfGetGenomeScores( @tblTally, 'Scoring', @bytGenes, @bytCodons, @fltGenePenalty, @fltCodonPenalty); END TRY BEGIN CATCH INSERT INTO BE.tblGenomeScore VALUES( @intPermGenomeID, 1, 0, 0, 0, 0); END CATCH; -- Finally, evaluate the individual genes andd store them in tblGeneRecord. EXEC Alg.uspGeneRecord_Insert @intAnalysisID, @intPermGenomeID; END GO