1. 程式人生 > >16位浮點轉32位浮點

16位浮點轉32位浮點

在DX中有著半精度浮點轉單精度浮點的詳細函式,我們先來了解一下資料的儲存結構。

浮點結構

   type Bits1  is mod 2**1;
   type Bits4  is mod 2**4;
   type Bits5  is mod 2**5;
   type Bits7  is mod 2**7;
   type Bits8  is mod 2**8;
   type Bits10 is mod 2**10;
   type Bits22 is mod 2**22;
   type Bits23 is mod 2**23;


   type WORD is mod 2**16;
   type UINT is mod 2**32; 

    type FLOAT32(i:Integer:=0) is record
      case i is
         when 0=>
            Fraction:Bits23;
            Exponent:Bits8;
            Sign:Bits1;
         when 1=>
            Fr:Bits23;
            Exp:Bits7;
            Es:Bits1;
            S:Bits1;
         when 2=>
            f:Float;
         when others=>
            U:UINT;
      end case;
   end record with Unchecked_Union,Pack,size=>32;

   type HALF(i:Integer:=0) is record
      case i is
         when 0=>
            Mantissa:Bits10;
            Exponent:Bits5;
            Sign:Bits1;
         when 1=>
            Fr:Bits10;
            Exp:Bits4;
            Es:Bits1;
            S:Bits1;
         when others=>
            w:word;
      end case;
   end record with Unchecked_Union,Pack,size=>16;

    function Shift_Left(u:UINT;b:Natural) return uint with Import,Convention=>Intrinsic;
   function Lsh(u:uint;b:Natural) return uint is (Shift_Left(u,b));

   function Shift_Right(u:UINT;b:Natural) return uint with Import,Convention=>Intrinsic;
   function Rsh(u:uint;b:Natural) return uint is (Shift_Left(u,b));

   function ConvertHalfToFloat(h:HALF) return FLOAT32 is
      f32:FLOAT32;
      Man:uint:=uint(h.Mantissa);
      exp:uint:=uint(h.Exp);--*補碼+偏置*
      esc:uint;--*原碼*
   begin

      f32.Sign:=h.Sign;
      f32.Es:=h.Es;

      if exp=0 then
         if man/=0 then
            exp:=1;
            for i in 1..10 loop
               exp:=exp-1;
               man:=Lsh(man,1);
               exit when (man and 2#1_0000_0000_00#)/=0;
            end loop;
            man:=man and 2#1111_1111_11#;
         else
            f32.Fraction:=0;
            f32.Exponent:=0;
            return f32;
         end if;
      end if;


      esc:=(if h.es=1 then not (Exp-15-1) else not(Exp-15));

      f32.exp:=(if h.es=1 then Bits7((not esc)+1+127) else Bits7((not esc)+127));

      f32.Fraction:=Bits23(Lsh(Man,13));

      return f32;

   end;