2012年5月11日 星期五

Transact-SQL 基因演算法範例程式 (T-SQL Genetic Algorithm Sample)



基因演算法為 University of Michigan 學者 John Holland 於 1975 年所提出,主要是將數值基因化 (即以 0、1 表示),並模擬生物演進過程的複製 (Reproduction)交配 (Crossover)突變 (Mutation) 等過程,尋找問題的近似較佳解。


基因演算法的應用近年來逐漸受到重視,而 C、C++ 等較偏工程方面的程式語言也較常被用來開發此類程式。然而,當計算 Fitness Value 需用到大量資料時,就必須與資料庫作某種程度的結合,方可順利達到目的;但應如何使用資料庫、並兼顧整體的效能,也就成為一個值得思考的問題。筆者在此提供一解決方案,即以 SQL Server 所提供的程式語言 (Transact-SQL) 進行基因演算法核心邏輯程式的開發,將基因演算法寫成一個 Stored Procedure,目的就是將演算邏輯直接植入資料庫,減少演算程式獲取資料的時間,進而提升整體運作效能。本程式會呼叫其他三個 Stored Procedures --

  • usp_IEEE754_Generate :產生 IEEE 754 (32 bits) 格式的 2 進制基因碼 (程式碼請見文章 Transact-SQL IEEE 754 轉換程式 -- 32 bits)。
  • usp_IEEE754_Restore: 將 IEEE 754 (32 bits) 格式的 2 進制基因碼反轉為 Float (程式碼請見文章 Transact-SQL IEEE 754 反轉程式 -- 32 bits)
  • usp_Polynomial:此處使用一元二次多項式 (2X^2 + 3X + 7) 的 Fitness Value 產生程式,以找出該多項式的最小值 (詳細程式碼請見本文最後 [註] usp_Polynomial 程式碼)。

範例程式

本程式於複製(Reproduction)階段,採用輪盤式選擇(Roulette Wheel Selection);交配 (Crossover) 階段,採用雙點交配 (Two Point Crossover);突變 (Mutation) 階段,採用單點突變 (Single Point Mutation)。

參數說明及呼叫方式:請見程式碼第 16 ~ 33 行

Create Proc usp_Genetic_Algorithm @prm_total_generation_no    int,
                                  @prm_total_item_no          int,
                                  @prm_crossover_rate         float,
                                  @prm_mutation_rate          float,
                                  @prm_boundary_high          float,
                                  @prm_boundary_low           float,
                                  @rtn_result                 float output
/*-----------------------------------------------------------------------------------+
 | Author: Wei-jie Yang                                                              |
 | Date:   2012/06/14                                                                |
 | All Rights Reserved                                                               |
 |                                                                                   |
 | [Description]                                                                     |
 |  This stored procedure is designed to perform Genetic Algorithm.                  |
 |                                                                                   |
 | [Parameters]                                                                      |
 |  INPUT Parameters                                                                 |
 |     - @prm_total_generation_no:     Total Generation Number                       |
 |     - @prm_total_item_no:           Total Item Number Per Generation (even)       |
 |     - @prm_crossover_rate:          Crossover Rate                                |
 |     - @prm_mutation_rate:           Mutation Rate                                 | 
 |     - @prm_boundary_high:           Upper Boundary                                | 
 |     - @prm_boundary_low:            Lower Boundary                                | 
 |                                                                                   |
 |  OUTPUT Parameter                                                                 |
 |     - @rtn_result:                  Final Result                                  |
 |                                                                                   |
 | [Usage]                                                                           |
 |   declare @return_result float                                                    |
 |                                                                                   |
 |   exec usp_Genetic_Algorithm 200, 100, 0.8, 0.1, 100, -100, @return_result output |
 |                                                                                   |
 |   select @return_result Result                                                    |
 +-----------------------------------------------------------------------------------*/

as
declare @avg_fitness_value          float,

        @crossover_digit_1          int,
        @crossover_digit_2          int,
        @crossover_digit_temp       int,
        @crossover_item_no_1        int,
        @crossover_item_no_2        int,
        @crossover_times            int,

        @fitness_value              float,
  
        @generation_no              int,
        @gene_1                     varchar(32),
        @gene_1_1                   varchar(32),
        @gene_1_2                   varchar(32),
        @gene_1_3                   varchar(32),
        @gene_2                     varchar(32),
        @gene_2_1                   varchar(32),
        @gene_2_2                   varchar(32),
        @gene_2_3                   varchar(32),
        @gene_codes                 varchar(32),
        @gene_codes_binary          binary(32),
        @gene_digit                 bit,
  
        @i                          int,
        @item_counter               int,
        @item_no                    int,
        @item_value                 float,
  
        @j                          int,
  
        @k                          int,
  
        @l                          int,
  
        @mutation_digit             int,
        @mutation_gene_codes        varchar(32),
        @mutation_gene_1            varchar(32),
        @mutation_gene_2            char(1),
        @mutation_gene_3            varchar(32),
        @mutation_item_no           int,
  
        @new_item_no                int,
  
        @overflow_item_amount       int,
  
        @pre_perfect_fitness_value  float,
        @pre_perfect_gene_codes     varchar(32),
        @pre_perfect_item_value     float,
  
        @selected_numbers           int,
        @sql                        nvarchar(2000),
  
        @tmp_fitness_value          float,  
        @tmp_gene_codes             varchar(32),
        @tmp_item_value             float,
        @tmp_reproduction_no        int,
        @tmp_sn                     int,
        @total_digit_length         tinyint,
        @total_reproduction         int

declare @tbl_generation table( 
        sn                          int identity,
        generation_no               int,
        item_no                     int,
        item_value                  float,
        gene_codes                  varchar(32),
        fitness_value               float,
        reproduction_no             int,
        crossover_selected          char(1),         -- Selected Y/N (Crossover Stage)
        mutation_selected           char(1),         -- Selected Y/N (Mutation  Stage)
        refresh_yn                  char(1)
        )

declare @tbl_single_generation table( 
        sn                          int,
        item_no                     int,
        item_value                  float,
        gene_codes                  varchar(32),
        fitness_value               float,
        reproduction_no             int
        )

set nocount on


-- Initialization (begin) =============================================================================================
select @total_digit_length = 32
select @generation_no      = 0
-- Initialization (end) ===============================================================================================



-- Generation Zero (begin) ============================================================================================
select @item_counter = 1

while @item_counter <= @prm_total_item_no
 begin
    select @gene_codes = ''
    select @item_value = 0


    -- Gene Codes, Item Value (begin) -------------------------------------------------------------
    -- Item Value ...................................................
    select @item_value = (@prm_boundary_high - @prm_boundary_low) * rand() + @prm_boundary_low

    -- Gene Codes ...................................................
    select @sql = ''
    select @sql = 'exec usp_IEEE754_Generate ' + cast(@item_value as varchar(100)) + 
                  ', ' + '@inner1 output, @inner2 output '

    exec sp_executesql @sql,
            N'@inner1 varchar(32) output, @inner2 binary(32) output ', 
            @inner1 = @gene_codes   output,
            @inner2 = @gene_codes_binary   output
    -- Gene Codes, Item Value (end) ---------------------------------------------------------------


    -- Fitness Value (begin) ----------------------------------------------------------------------
    -- Call Fitness Value Calculating Stored Procedure ..............
    select @sql = ''
    select @sql = 'exec usp_Polynomial ' + cast(@item_value as varchar(100)) + 
                  ', ' + '@inner output '
 
    exec sp_executesql @sql,
                        N'@inner float output ', 
                        @inner = @fitness_value output 
    -- Fitness Value (end) ------------------------------------------------------------------------


    insert into @tbl_generation (generation_no, item_no, item_value, gene_codes, fitness_value, reproduction_no, crossover_selected, mutation_selected, refresh_yn)
       values(@generation_no, @item_counter, @item_value, @gene_codes, @fitness_value, 0, 'N', 'N', 'N')

    select @item_counter = @item_counter + 1
 end


 
-- Reproduction No (begin) ------------------------------------------------------------------------
select @avg_fitness_value = avg(fitness_value)
  from @tbl_generation
 where generation_no = @generation_no

update @tbl_generation
   set reproduction_no = round(fitness_value / @avg_fitness_value, 0)
 where generation_no = @generation_no   

select @total_reproduction = sum(reproduction_no)
  from @tbl_generation
 where generation_no = @generation_no


if @total_reproduction < @prm_total_item_no
 begin
    select top 1 @item_no = item_no
      from @tbl_generation
     where generation_no = @generation_no
     order by fitness_value desc

    update @tbl_generation
       set reproduction_no = reproduction_no + (@prm_total_item_no - @total_reproduction)
     where generation_no = @generation_no
       and item_no = @item_no
 end


if @total_reproduction > @prm_total_item_no
 begin
    select @overflow_item_amount = @total_reproduction - @prm_total_item_no
    select @i = 1

    while @i <= @overflow_item_amount
     begin
        select top 1 @item_no = item_no
          from @tbl_generation
         where generation_no = @generation_no
           and reproduction_no > 0
         order by fitness_value asc

        update @tbl_generation
           set reproduction_no = reproduction_no - 1
         where generation_no = @generation_no
           and item_no = @item_no
 
        select @i = @i + 1
     end
 end
-- Reproduction No (end) --------------------------------------------------------------------------
-- Generation Zero (end) ==============================================================================================



WHILE @generation_no <= @prm_total_generation_no
 BEGIN
    select @generation_no = @generation_no + 1
 
    -- STAGE 1. Reproduction (begin) ==================================================================================
    delete from @tbl_single_generation where 1 = 1

    insert into @tbl_single_generation (item_no, item_value, gene_codes, fitness_value, reproduction_no)
        select item_no, item_value, gene_codes, fitness_value, reproduction_no
          from @tbl_generation
         where generation_no = @generation_no - 1

    declare Reproduction cursor for
        select item_value, gene_codes, fitness_value, reproduction_no
          from @tbl_single_generation
 
    open Reproduction

    fetch next from Reproduction
       into @tmp_item_value, @tmp_gene_codes, @tmp_fitness_value, @tmp_reproduction_no

    select @new_item_no = 1
 
    while @@fetch_status = 0
     begin
        select @j = 1 
  
        while @j <= @tmp_reproduction_no
         begin
            insert into @tbl_generation (generation_no, item_no, item_value, gene_codes, fitness_value, reproduction_no, crossover_selected, mutation_selected, refresh_yn)
               values(@generation_no, @new_item_no, @tmp_item_value, @tmp_gene_codes, @tmp_fitness_value, 0,  'N', 'N', 'N')
   
            select @new_item_no = @new_item_no + 1
  
            select @j = @j + 1
         end
  
        fetch next from Reproduction
           into @tmp_item_value, @tmp_gene_codes, @tmp_fitness_value, @tmp_reproduction_no
     end

    close Reproduction
    deallocate Reproduction
    -- STAGE 1. Reproduction (end) ====================================================================================



    -- Out of Range Item(s) Handling (begin) --------------------------------------------------------------------------
    select top 1 
           @pre_perfect_item_value = item_value,
           @pre_perfect_gene_codes = gene_codes,
           @pre_perfect_fitness_value = fitness_value
      from @tbl_generation
     where generation_no = @generation_no - 1
       and item_value > 0
     order by fitness_value desc

    update @tbl_generation
       set item_value = @pre_perfect_item_value,
           gene_codes = @pre_perfect_gene_codes,
           fitness_value = @pre_perfect_fitness_value,
           reproduction_no = -1
     where generation_no = @generation_no
       and ((item_value > @prm_boundary_high) or 
            (item_value < @prm_boundary_low))
    -- Out of Range Item(s) Handling (end) ----------------------------------------------------------------------------



    -- STAGE 2. Crossover (begin) =====================================================================================
    select @crossover_times = @prm_total_item_no / 2

    select @k = 1
 
    while @k <= @crossover_times
     begin
  
        if rand() < @prm_crossover_rate
         begin
            select @crossover_item_no_1 = cast(rand() * (@prm_total_item_no - 1) + 1 as int)
   
            select @crossover_item_no_2 = cast(rand() * (@prm_total_item_no - 1) + 1 as int)
   
            select @selected_numbers = count(*)
              from @tbl_generation
             where crossover_selected = 'N'
               and generation_no = @generation_no
               and item_no in (@crossover_item_no_1, @crossover_item_no_2)
   
            if @selected_numbers = 2                                -- Both items are not selected yet. Perform Double Point Crossover.
             begin

                if rand() < @prm_crossover_rate
                 begin
                    select @crossover_digit_1 = cast(rand() * (@total_digit_length - 1) + 1 as int)
     
                    select @crossover_digit_2 = cast(rand() * (@total_digit_length - 1) + 1 as int)
     
                    while @crossover_digit_1 = @crossover_digit_2
                     begin
                        select @crossover_digit_2 = cast(rand() * (@total_digit_length - 1) + 1 as int) 
                     end

                    if @crossover_digit_1 > @crossover_digit_2     -- Keep @crossover_digit_1 < @crossover_digit_2 (consider as string)
                     begin
                        select @crossover_digit_temp = @crossover_digit_1
      
                        select @crossover_digit_1 = @crossover_digit_2
      
                        select @crossover_digit_2 = @crossover_digit_temp
                     end     
     
                    -- Retrieve gene codes
                    select @gene_1 = gene_codes
                      from @tbl_generation
                     where generation_no = @generation_no
                       and item_no = @crossover_item_no_1

                    select @gene_2 = gene_codes
                      from @tbl_generation
                     where generation_no = @generation_no
                       and item_no = @crossover_item_no_2
        
                    -- Divide each gene codes into three parts
                    if @crossover_digit_1 = 1
                     begin
                        select @gene_1_1 = ''
                        select @gene_2_1 = ''
                     end
                    else
                     begin
                        select @gene_1_1 = substring(@gene_1, 1, @crossover_digit_1 - 1)
                        select @gene_2_1 = substring(@gene_2, 1, @crossover_digit_1 - 1)
                     end
     
                    select @gene_1_2 = substring(@gene_1, @crossover_digit_1, @crossover_digit_2 - @crossover_digit_1 + 1)
                    select @gene_2_2 = substring(@gene_2, @crossover_digit_1, @crossover_digit_2 - @crossover_digit_1 + 1)

                    if @crossover_digit_2 = @total_digit_length
                     begin
                        select @gene_1_3 = substring(@gene_1, @crossover_digit_2, 1)
                        select @gene_2_3 = substring(@gene_2, @crossover_digit_2, 1)
                     end
                    else
                     begin
                        select @gene_1_3 = substring(@gene_1, @crossover_digit_2 + 1, @total_digit_length - @crossover_digit_2)
                        select @gene_2_3 = substring(@gene_2, @crossover_digit_2 + 1, @total_digit_length - @crossover_digit_2)
                     end

                    -- Crossover
                    select @gene_1 = @gene_1_1 + @gene_1_2 + @gene_1_3
                    select @gene_2 = @gene_2_1 + @gene_2_2 + @gene_2_3
                 end
             end

            if @selected_numbers = 2
             begin
                update @tbl_generation
                   set gene_codes = @gene_1,
                       crossover_selected = 'Y',
                       refresh_yn = 'Y'
                 where generation_no = @generation_no
                   and item_no = @crossover_item_no_1
       
                update @tbl_generation
                   set gene_codes = @gene_2,
                       crossover_selected = 'Y',
                       refresh_yn = 'Y'
                 where generation_no = @generation_no
                   and item_no = @crossover_item_no_2       
             end
            else
             begin
                update @tbl_generation
                   set crossover_selected = 'Y'
                 where generation_no = @generation_no
                   and item_no in (@crossover_item_no_1, @crossover_item_no_2)
             end
         end
  
        select @k = @k + 1
     end
    -- STAGE 2. Crossover (end) =======================================================================================



    -- STAGE 3. Mutation (begin) ======================================================================================
    select @l = 1

    while @l <= @prm_total_item_no
     begin
  
        if rand() < @prm_mutation_rate
         begin
            select @mutation_item_no = cast(rand() * (@prm_total_item_no - 1) + 1 as int)
   
            select @mutation_digit = cast(rand() * (@total_digit_length - 1) + 1 as int)  
   
            select @mutation_gene_codes = gene_codes
              from @tbl_generation
             where generation_no = @generation_no
               and item_no = @mutation_item_no
               and mutation_selected = 'N'
   
            if @mutation_digit = 1
             begin
                select @mutation_gene_1 = ''
                select @mutation_gene_3 = substring(@mutation_gene_codes, 2, @total_digit_length - 1)
             end
            else
             begin
     
                if @mutation_digit = @total_digit_length
                 begin
                    select @mutation_gene_1 = substring(@mutation_gene_codes, 1, @total_digit_length - 1)
                    select @mutation_gene_3 = ''
                 end
                else
                 begin
                    select @mutation_gene_1 = substring(@mutation_gene_codes, 1, @mutation_digit - 1)
                    select @mutation_gene_3 = substring(@mutation_gene_codes, @mutation_digit + 1, @total_digit_length - @mutation_digit)
                 end
             end
   
            select @mutation_gene_2 = substring(@mutation_gene_codes, @mutation_digit, 1)
   
            if @mutation_gene_2 = '0'
                select @mutation_gene_codes = @mutation_gene_1 + '1' + @mutation_gene_3    
            else
                select @mutation_gene_codes = @mutation_gene_1 + '0' + @mutation_gene_3    
   
            update @tbl_generation
               set gene_codes = @mutation_gene_codes,
                   mutation_selected = 'Y',
                   refresh_yn = 'Y'
             where generation_no = @generation_no
               and item_no = @mutation_item_no 
         end
   
        select @l = @l + 1   
     end
    -- STAGE 3. Mutation (end) ========================================================================================



    -- Refresh (begin) ================================================================================================
    delete from @tbl_single_generation where 1 = 1
 
    insert into @tbl_single_generation (sn, gene_codes)
        select sn, gene_codes
          from @tbl_generation
         where generation_no = @generation_no
           and refresh_yn = 'Y' 

    declare Refresh cursor for
         select sn, gene_codes
           from @tbl_single_generation
   
    open Refresh

    fetch next from Refresh into @tmp_sn, @tmp_gene_codes

    while @@fetch_status = 0
     begin
        -- Item Value (begin) ---------------------------------------
        select @gene_codes = @tmp_gene_codes
  
        select @sql = ''
        select @sql = 'exec usp_IEEE754_Restore ' + '''' + @gene_codes + '''' + 
                      ', ' + '@inner output '
 
        exec sp_executesql @sql,
                            N'@inner float output ', 
                            @inner = @item_value output
        -- Item Value (end) -----------------------------------------


        -- Fitness Value (begin) ------------------------------------
        -- Call Fitness Value Calculating Stored Procedure 
        select @sql = ''
        select @sql = 'exec usp_Polynomial ' + cast(@item_value as varchar(100)) + 
                      ', ' + '@inner output '
  
        exec sp_executesql @sql,
                            N'@inner float output ', 
                            @inner = @fitness_value output 
        -- Fitness Value (end) --------------------------------------


        update @tbl_generation
           set item_value    = @item_value,
               fitness_value = @fitness_value
         where sn = @tmp_sn
  
        fetch next from Refresh into @tmp_sn, @tmp_gene_codes
     end

    close Refresh
    deallocate Refresh
    -- Refresh (end) ==================================================================================================



    -- Generate Reproduction No (begin) ===============================================================================
    select @avg_fitness_value = avg(fitness_value)
      from @tbl_generation
     where generation_no = @generation_no

    update @tbl_generation
       set reproduction_no = round(fitness_value / @avg_fitness_value, 0)
     where generation_no = @generation_no   

    select @total_reproduction = sum(reproduction_no)
      from @tbl_generation
     where generation_no = @generation_no


    if @total_reproduction < @prm_total_item_no
     begin
        select top 1 @item_no = item_no
          from @tbl_generation
         where generation_no = @generation_no
         order by fitness_value desc

        update @tbl_generation
           set reproduction_no = reproduction_no + (@prm_total_item_no - @total_reproduction)
         where generation_no = @generation_no
           and item_no = @item_no
     end


    if @total_reproduction > @prm_total_item_no
     begin
        select @overflow_item_amount = @total_reproduction - @prm_total_item_no
        select @i = 1

        while @i <= @overflow_item_amount
         begin
            select top 1 @item_no = item_no
              from @tbl_generation
             where generation_no = @generation_no
               and reproduction_no > 0
             order by fitness_value

            update @tbl_generation
               set reproduction_no = reproduction_no - 1
             where generation_no = @generation_no
               and item_no = @item_no
  
            select @i = @i + 1
        end
     end
    -- Geberate Reproduction No (end) =================================================================================
 END


select top 1 @rtn_result = item_value
  from @tbl_generation
 where generation_no = @prm_total_generation_no
   and item_value <= @prm_boundary_high
   and item_value >= @prm_boundary_low
 order by fitness_value desc

select @rtn_result Result

--select * 
--  from @tbl_generation
-- where generation_no = @prm_total_generation_no


set nocount off
return



Error_Handler:

set nocount off
return
  


[註] usp_Polynomial 程式碼
create proc usp_Polynomial @prm_num     float,
                           @rtn_value   float output
as

declare @fx   float

-- 2X^2 + 3X + 7     Ans: -0.75
select @fx = 2 * (@prm_num * @prm_num) + (3 * @prm_num) + 7

select @rtn_value = 100000 * 1 / @fx

return