/*
   UNSAFE for DOS
   Version 1.0 (beta 4)
   Copyright (C) Jodeart Mindworks 1997, 1998, 1999, 2000 2001 2002
*/

#define IDEAKEYSIZE 16
#define IDEABLOCKSIZE 8
#define IDEAROUNDS 8
#define IDEAKEYLEN (6*IDEAROUNDS+4)

/*
 * Multiplication, modulo (2**16)+1
 * Note that this code is structured on the assumption that
 * untaken branches are cheaper than taken branches, and the
 * compiler doesn't schedule branches.
 */

CONST static word16 mul(register word16 a, register word16 b)
{
 register word32 p;

 p=(word32)a*b;
 if(p)
 {
  b=p;
  a=p>>16;
  return((b-a)+(b<a));
 }
 else if(a)return(1-a);
      else return(1-b);
}

/*
 * Compute the multiplicative inverse of x, modulo 65537, using Euclid's
 * algorithm. It is unrolled twice to avoid swapping the registers each
 * iteration, and some subtracts of t have been changed to adds.
 */
CONST static word16 mulInv(word16 x)
{
 word16 t0,t1;
 word16 q,y;

 if(x<=1)return(x); /* 0 and 1 are self-inverse */
 t1=0x10001L/x;     /* Since x >= 2, this fits into 16 bits */
 y=0x10001L%x;
 if(y==1)return(1-t1);
 t0=1;
 do
 {
  q=x/y;
  x=x%y;
  t0+=q*t1;
  if(x==1)return(t0);
  q=y/x;
  y=y%x;
  t1+=q*t0;
 }while(y!=1);
 return(1-t1);
}

/* Expand a 128-bit user key to a working encryption key EK */
static void ideaExpandKey(byte const *userkey, word16 * EK)
{
 int i, j;

 for(j=0;j<8;j++)
 {
  EK[j]=(userkey[0]<<8)+userkey[1];
  userkey+=2;
 }
 for(i=0;j<IDEAKEYLEN;j++)
 {
  i++;
  EK[i+7]=(EK[i&7]<<9|EK[i+1&7]>>7);
  EK+=i&8;
  i&=7;
 }
}

/*
 * Compute IDEA decryption key DK from an expanded IDEA encryption key EK
 * Note that the input and output may be the same.  Thus, the key is
 * inverted into an internal buffer, and then copied to the output.
 */
static void ideaInvertKey(word16 const *EK, word16 DK[IDEAKEYLEN])
{
 int i;
 word16 t1, t2, t3;
 word16 temp[IDEAKEYLEN];
 word16 *p=temp+IDEAKEYLEN;

 t1=mulInv(*EK++);
 t2=-*EK++;
 t3=-*EK++;
 *--p=mulInv(*EK++);
 *--p=t3;
 *--p=t2;
 *--p=t1;

 for(i=0;i<IDEAROUNDS-1;i++)
 {
  t1=*EK++;
  *--p=*EK++;
  *--p=t1;
  t1=mulInv(*EK++);
  t2=-*EK++;
  t3=-*EK++;
  *--p=mulInv(*EK++);
  *--p=t2;
  *--p=t3;
  *--p=t1;
 }
 t1=*EK++;
 *--p=*EK++;
 *--p=t1;
 t1=mulInv(*EK++);
 t2=-*EK++;
 t3=-*EK++;
 *--p=mulInv(*EK++);
 *--p=t3;
 *--p=t2;
 *--p=t1;
 memcpy(DK, temp, sizeof(temp)); /* Copy and destroy temp copy */
 burn(temp);
}

/*
 * MUL(x,y) computes x = x*y, modulo 0x10001.  Requires two temps,
 * t16 and t32.  x is modified, and must be a side-effect-free lvalue.
 * y may be anything, but unlike x, must be strictly less than 65536
 * even if low16() is #defined.
 * All of these are equivalent - see which is faster on your machine
 */
#ifdef SMALL_CACHE
 #define MUL(x,y)(x=mul(x,y))
#else
 #ifdef AVOID_JUMPS
  #define MUL(x,y)(x=(x-1),t16=((y)-1),\
          t32=(word32)x*t16+x+t16,x=t32,\
          t16=t32>>16,x=(x-t16)+(x<t16)+1)
 #else
  #define MUL(x,y)\
          ((t16=(y))?\
          (x=(x))?\
          t32=(word32)x*t16,\
          x=(t32),\
          t16=t32>>16,\
          x=(x-t16)+(x<t16)\
          :\
          (x=1-t16)\
          :\
          (x=1-x))
 #endif
#endif

/*      IDEA encryption/decryption algorithm */
/* Note that in and out can be the same buffer */
static void ideaCipher(byte const inbuf[8],byte outbuf[8],word16 const *key)
{
 register word16 x1,x2,x3,x4,s2,s3;
 word16 *in, *out;
 #ifndef SMALL_CACHE
  register word16 t16; /* Temporaries needed by MUL macro */
  register word32 t32;
 #endif
 int r=IDEAROUNDS;

 in=(word16 *)inbuf;
 x1=*in++;
 x2=*in++;
 x3=*in++;
 x4=*in;
 #ifndef HIGHFIRST
  x1=(x1>>8)|(x1<<8);
  x2=(x2>>8)|(x2<<8);
  x3=(x3>>8)|(x3<<8);
  x4=(x4>>8)|(x4<<8);
 #endif
 do
 {
  MUL(x1,*key++);
  x2+=*key++;
  x3+=*key++;
  MUL(x4,*key++);
  s3=x3;
  x3^=x1;
  MUL(x3,*key++);
  s2=x2;
  x2^=x4;
  x2+=x3;
  MUL(x2,*key++);
  x3+=x2;
  x1^=x2;
  x4^=x3;
  x2^=s3;
  x3^=s2;
 }while(--r);
 MUL(x1, *key++);
 x3+=*key++;
 x2+=*key++;
 MUL(x4,*key);
 out=(word16*)outbuf;
 #ifdef HIGHFIRST
  *out++=x1;
  *out++=x3;
  *out++=x2;
  *out=x4;
 #else				/* !HIGHFIRST */
  *out++=(x1>>8)|(x1<<8);
  *out++=(x3>>8)|(x3<<8);
  *out++=(x2>>8)|(x2<<8);
  *out=(x4>>8)|(x4<<8);
 #endif
}
