unit ucsmap;

interface

Uses Classes;

Type
  TCSTyp = (csCodepage, csUTF8, csUTF7);
  TCSInfo = Class
    public
       Typ: TCSTyp;
       UTFMap: Packed Array[128..255] of Word;
       RUTFMap: Packed Array[128..65535] of Byte;
       InternalName, HeaderName, Desc, MIME: String;
       Procedure Clear;
       Procedure Save;
       Procedure Load (Const MyName: String);
  end;
  TCSInfos = Class
    private
       fSets, fAliase, fCharsets : TStringlist;
       Function GetItem(i: Integer): TCSInfo;
    public
       Constructor Create;
       Destructor Destroy; override;
       Function CS2Nr (Const cs: String): Integer;
       property Items[i: Integer]: TCSInfo Read GetItem; default;
       Function Count: Integer;
       property Sets: TStringlist Read FSets;
       Function CharsetIsKnown (Const CSName: String): boolean;
  end;
  TCharsets = Class
    private
       fInternalCharset, fBodyCharsets: String;
       fICS: Char; fBCSs: String;
       Procedure SetInternalCharset (Const s: String);
       Procedure SetBodyCharsets (Const s: String);
       Function _X2Unicode (Const s: String; CSInfo: TCSInfo): String;
       Function _Unicode2X (Const s: String; CSInfo: TCSInfo; Var MissingChar: boolean): String;
    public
       Constructor Create (Const InternalCS, BodyCSs: String);
       property InternalCharset: String Read fInternalCharset Write SetInternalCharset;
       property BodyCharsets: String Read fBodyCharsets Write SetBodyCharsets;
       Function GetMinCharsetFor (Const Inh: String): TCSInfo;
       Function X2Unicode (Const s, CSName: String): String;
       Function Unicode2X (Const s, CSName: String; Var MissingChar: boolean): String;
  end;

Function CSInfos: TCSInfos;

implementation

Uses IniFiles, SysUtils, Windows, Settings, uTransla;

{ Tools }

Const
   HexChars = '0123456789ABCDEF';
   Base64Chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';

Function Hex(Const c: Char): Integer;
begin
   Result := Pos(c, HexChars)-1;
   If Result < 0 then Result := 0
end;

Function Hex2Dez(Const s: String): Word;
Var i: integer;
begin
   Result := 0;
   For i := 1 to Length(s) do Result := 16*Result + Hex(s[i])
end;

Function To4Hex (Const x: Word): String;
Var i, R: Integer;
begin
   Result := '';
   R := x;
   For i := 1 to 4 do begin
      Result := HexChars[(R mod 16)+1]+Result;
      R := R div 16
   end
end;

Function To2Hex (Const x: Byte): String;
begin
   Result := HexChars[(x div 16)+1]+HexChars[(x mod 16)+1]
end;

Function GetIni: TIniFIle;
begin
   Result := TIniFile.Create(ExtractFilePath(ParamStr(0))+'charsets.dat')
end;

Function StringReplace (Const Org, Ersetze, Durch: String): String;
Var i, j: Integer; b: boolean;
begin
   Result := Org;
   For i := Length(Org)-Length(Ersetze)+1 downto 1 do begin
      If Org[i] = Ersetze[1] then begin
         b := true;
         For j := 2 to Length(Ersetze) do If b then b := Org[i-1+j] = Ersetze[j];
         If b then Result := Copy(Result, 1, i-1) + Durch + Copy(Result, i+Length(Ersetze), Length(Result))
      end
   end
end;

{ TCSInfos }

Var fCSInfos: TCSInfos;

Function CSInfos: TCSInfos;
begin
   If Not Assigned(fCSInfos) then fCSInfos := TCSInfos.Create;
   Result := fCSInfos
end;

Constructor TCSInfos.Create;
Var p, i: Integer; Info: TCSInfo; s: String;
begin
   fAliase := TStringlist.Create;
   fCharsets := TStringlist.Create;
   fSets := TStringlist.Create;
   With GetIni do try
      ReadSection ('Define', fCharsets);
      If fCharsets.Count = 0 then
         raise Exception.Create (TrL('CharsetsDatDoesntExists.Error',
            'FEHLER! "Charsets.dat" nicht vorhanden bzw. nicht korrekt'));
      fAliase.Clear;
      For i := 0 to fCharsets.Count-1 do begin
         Info := TCSInfo.Create;
         fCharsets.Objects[i] := Info;
         Info.Load (fCharsets[i]);
         s := Info.MIME;
         While s > '' do begin
            p := Pos(',', s);
            If p = 0 then p := Length(s)+1;
            fAliase.AddObject (Copy(s, 1, p-1), Pointer(i));
            Delete(s, 1, p)
         end
      end;

      Info := TCSInfo.Create;
      With Info do begin
         InternalName := 'utf-7';
         HeaderName := 'utf-7';
         Desc := 'Unicode (7-Bit)';
         Typ := csUTF7;
         p := fCharsets.AddObject (InternalName, Info);
         fAliase.AddObject (InternalName, Pointer(p))
      end;

      Info := TCSInfo.Create;
      With Info do begin
         InternalName := 'utf-8';
         HeaderName := 'utf-8';
         Desc := 'Unicode (8-Bit)';
         Typ := csUTF8;
         p := fCharsets.AddObject (InternalName, Info);
         fAliase.AddObject (InternalName, Pointer(p))
      end;

      ReadSection ('Sets', fSets);
      With fSets do begin
         For i := Count-1 downto 0 do begin
            Strings[i] := ReadString('Sets', Strings[i], '');
            If Strings[i] = '' then Delete(i)
         end
      end;
   finally free end
end;

Destructor TCSInfos.Destroy;
Var i: Integer;
begin
   fSets.free;
   fAliase.free;
   For i := 0 to Count-1 do Items[i].free;
   fCharsets.free
end;

Function TCSInfos.GetItem(i: Integer): TCSInfo;
begin
   If (i >= 0) and (i<=Count-1) then begin
      Result := TCSInfo(fCharsets.Objects[i])
   end else begin
      Result := NIL
   end
end;

Function TCSInfos.Count: Integer;
begin
   Result := fCharsets.Count
end;

Function TCSInfos.CS2Nr (Const cs: String): Integer;
Var s: String;
begin
   s := LowerCase(CS);
   Result := fAliase.IndexOf(s);
   If Result >= 0 then Result := Longint(fAliase.Objects[Result]);
   If Result < 0 then Result := fCharsets.IndexOf(s);
   If Result < 0 then Result := 0
end;

Function TCSInfos.CharsetIsKnown (Const CSName: String): boolean;
Var s: String;
begin
   s := LowerCase(CSName);
   Result := true;
   If fAliase.IndexOf(s) < 0 then
      If fCharsets.IndexOf(s) < 0 then Result := false
end;

{ TCharsets }

Constructor TCharsets.Create (Const InternalCS, BodyCSs: String);
begin
   InternalCharset := InternalCS;
   BodyCharsets := BodyCSs
end;

Procedure TCharsets.SetInternalCharset (Const s: String);
begin
   If s = '' then exit;
   fInternalCharset := s;
   fICS := Chr(CSInfos.CS2Nr(s))
end;

Procedure TCharsets.SetBodyCharsets (Const s: String);
Var s2, s3: String; p: Integer;
begin
   If s = '' then exit;
   fBodyCharsets := s;
   fBCSs := '';
   s2 := s;
   While s2 > '' do begin
      p := Pos(',', s2);
      If p > 0 then begin
         s3 := Trim(Copy(s2, 1, p-1));
         Delete(s2, 1, p)
      end else begin
         s3 := Trim(s2); s2 := ''
      end;
      fBCSs := fBCSs + Chr(CSInfos.CS2Nr(s3))
   end
end;

Function TCharsets.GetMinCharsetFor (Const Inh: String): TCSInfo;
Var z: String; i, j: Integer; s: String; W: Word;
begin
   s := _X2Unicode (Inh, CSInfos[Ord(fICS)]);
   z := fBCSs;
   For i := 1 to Length(s) div 2 do begin
      W := Ord(s[i*2-1])*256 + Ord(s[i*2]);
      If W > $80 then begin
         For j := Length(z) downto 1 do begin
            With CSInfos[Ord(z[j])] do
               If (Typ = csCodepage) and (RUTFMap[W] = 0) then Delete(z, j, 1)
         end
      end
   end;
   If z = ''
      then Result := CSInfos[0]
      else Result := CSInfos[ord(z[1])]
end;

Function TCharsets.X2Unicode (Const s, CSName: String): String;
begin
   Result := _X2Unicode (s, CSInfos[CSInfos.CS2Nr(CSName)])
end;

Function TCharsets.Unicode2X (Const s, CSName: String; Var MissingChar: boolean): String;
begin
   Result := _Unicode2X (s, CSInfos[CSInfos.CS2Nr(CSName)], MissingChar)
end;

Function TCharsets._X2Unicode (Const s: String; CSInfo: TCSInfo): String;
Var Ueb, Buf: String; c: Char;
    M, i, j, p, q: Integer; W, W2: Word; B: Byte;
begin Case CSInfo.Typ of
   csUTF7: begin
      M := 0; W := 0; p := 1; Buf := '';
      SetLength(Result, 2* Length(s));
      For i := 1 to Length(s) do begin
         Ueb := '';
         c := s[i];
         Case M of
            0: If c = '+' then begin Buf := #0+c; Inc(M) end
               else Ueb := #0+c;
            1..3:
               If ( M = 1 ) and ( c = '-' ) then begin
                  M := 0; Ueb := #0 + '+'
               end else begin
                  q := Pos(c, Base64Chars)-1; Buf := Buf+#0+c;
                  If q >= 0 then begin
                     Case M of
                        1: W := q shl 10;
                        2: W := W + (q shl 4);
                        3: W := W + ((q and $FC) shr 2);
                     end;
                     Inc(M)
                  end else begin
                     M := 0; Ueb := Buf
                  end
               end;
            4: begin
                  M := 0;
                  Ueb := Chr(W div 256) + Chr(W mod 256);
                  If c <> '-' then Ueb := Ueb + #0 + c
               end
         end;
         For j := 1 to Length(Ueb) do begin
            Result[p] := Ueb[j]; Inc(p)
         end
      end;
      SetLength(Result, p-1)
   end;
   csUTF8: begin
      SetLength(Result, 2 * Length(s));
      p := 1; M := 0; W := 0;
      For i := 1 to Length(s) do begin
         c := s[i]; B := Ord(c); W2 := 0;
         If M = 0 then begin
            If B <= $7F then W2 := B
            else If (B and $E0) = $C0 then begin W := B and $1F; M := 1 end
            else If (B and $F0) = $E0 then begin W := B and $0F; M := 2 end
            else W2 := 0; // Unicode mit mehr als 2 Bytes wird nicht untersttzt
         end else begin
            W := (W shl 6) + (B and $3F);
            If M = 1 then W2 := W;
            Dec(M)
         end;
         If W2 > 0 then begin
            Result[p] := Chr(W2 div 256); Inc(p);
            Result[p] := Chr(W2 mod 256); Inc(p);
         end
      end;
      SetLength(Result, p-1)
   end;
   csCodepage: With CSInfo do begin
      SetLength(Result, 2*Length(s));
      For i := 1 to Length(s) do begin
         If ord(s[i]) < $80
            then W := Ord(s[i])
            else W := UTFMap[Ord(s[i])];
         Result[i*2-1] :=  Chr(W div 256);
         Result[i*2] :=  Chr(W mod 256)
      end
   end
end end;

Function TCharsets._Unicode2X (Const s: String; CSInfo: TCSInfo; Var MissingChar: boolean): String;
Var Ueb: String; i, j, p: Integer; B: Byte; bEncode: boolean; W: Word;
begin
   MissingChar := false;
   With CSInfo do Case Typ of
      csUTF7: begin
         SetLength(Result, Length(s));
         p := 1;
         For i := 1 to Length(s) div 2 do begin
            W := Ord(s[i*2-1])*256 + Ord(s[i*2]);
            Ueb := '';
            If W = Ord('+') then ueb := '+-'
            else begin
               If W > 126 then bEncode := true
               else bEncode := Chr(W mod 256) IN [#0, '~', '\'];
               If bEncode then begin
                  Ueb := '+'+Base64Chars[((W and $FC00) shr 10)+1]
                            +Base64Chars[((W and $03F0) shr 4)+1]
                            +Base64Chars[((W and $000F) shl 2)+1]
                            + '-';
               end else begin
                  If W > 0 then Ueb := Chr(W mod 256)
               end
            end;
            If p + Length(Ueb) >= Length(Result) then SetLength(Result, p + 100);
            For j := 1 to Length(Ueb) do begin Result[p] := Ueb[j]; Inc(p) end;
            //MessageBox ( 0, PChar(Inttostr(p)+'/'+Inttostr(Length(Result))+': '+Copy(Result, 1, p)), NIL, 0);
         end;
         SetLength(Result, p-1)
      end;
      csUTF8: begin
         Result := s;
         SetLength(Result, Length(s));
         p := 1;
         For i := 1 to Length(s) div 2 do begin
            W := Ord(s[i*2-1]) * 256 + Ord(s[i*2]);
            If W < $80 then begin
               Ueb := Chr(W and $7F)
            end else
            If W < $800 then begin
               Ueb :=  Chr($C0 + ((W and $03C0) shr 6))
                      + Chr($80 + (W and $003F))
            end else begin
               Ueb :=  Chr($E0 + ((W and $F000) shr 12))
                      + Chr($80 + (W and $0FC0) shr 6)
                      + Chr($80 + (W and $003F))
            end;
            If p + Length(Ueb) >= Length(Result) then SetLength(Result, p + 100);
            For j := 1 to Length(Ueb) do begin Result[p] := Ueb[j]; Inc(p) end
         end;
         SetLength(Result, p-1)
      end;
      csCodepage: begin
         SetLength(Result, Length(s) div 2);
         For i := 1 to Length(s) div 2 do begin
            W := Ord(s[i*2-1])*256 + Ord(s[i*2]);
            If W < $80
               then B := W
               else B := RUTFMap[W];
            if B = 0 then begin
               B := Ord('?');
               MissingChar := true
            end;
            Result[i] := Chr(B)
         end
      end;
   end
end;

{ TCSInfo }

procedure TCSInfo.Clear;
Var i: Integer;
begin
   InternalName := ''; HeaderName := ''; MIME := ''; Desc := '';
   For i := Low(UTFMap) to High(UTFMap) do UTFMap[i] := 0
end;

Procedure TCSInfo.Load (Const MyName: String);
Var p, i: Integer; s: String;
begin
   Clear;
   Typ := csCodepage;
   With GetIni do try
      s := ReadString ('Define', MyName, '');
      If Copy(s, 1, 3) <> 'V1#' then Exit;
      Delete(s, 1, 3);
      p := Pos('#', s);
      If p = 0 then Exit;
      Desc := Copy(s, 1, p-1);
      Delete(s, 1, p);
      MIME := s;
      s := ReadString ('ToUTF', MyName, '');
      For i := Low(UTFMap) to High(UTFMap) do If Length(s) >= 4 then begin
         UTFMap[i] := Hex2Dez(Copy(s, 1, 4));
         Delete(s, 1, 4)
      end;
      For i := Low(RUTFMap) to High(RUTFMap) do RUTFMap[i] := 0;
      For i := Low(UTFMap) to High(UTFMap) do If UTFMap[i] > 0 then RUTFMap[UTFMap[i]] := i;
      InternalName := MyName;
      HeaderName := Copy(MIME, 1, Pos(',', MIME+',')-1)
   finally Free end
end;

Procedure TCSInfo.Save;
Var s: String; i: Integer;
begin
   s := '';
   For i := Low(UTFMap) to High(UTFMap) do s := s + To4Hex(UTFMap[i]);
   With GetIni do try
      WriteString ('ToUTF', InternalName, s);
   finally Free end
end;

initialization
   fCSInfos := NIL
finalization
   If Assigned(fCSInfos) then fCSInfos.free
end.
