/**
 * @file      tge.c
 * @brief     tiny 3D Graphics Engine
 * @date      2011-03-11
 *
 * @copyright
 * Copyright 2010-2011 Japan Aerospace Exploration Agency
 *
 */

#ifdef HAVE_CONFIG_H
#  include "config.h"
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include "tge.h"
#include "zbuf.h"
#include "float_util.h"

#define TGE_MODEL_MATRIX_STACK_SIZE        32
#define TGE_PROJECTION_MATRIX_STACK_SIZE    8

#define TGE_FRUSTUM    0x8001
#define TGE_ORTHO      0x8002

#define TGE_MAX(a,b)        (((a)<(b)) ? (b) : (a))
#define TGE_MIN(a,b)        (((a)>(b)) ? (b) : (a))
#define TGE_DEG2RAD(x)      ((x)*M_PI/180.0)
#define TGE_RAD2DEG(x)      ((x)*180.0/M_PI)
#define TGE_BETWEEN(n,a,b)  TGE_MIN(TGE_MAX((n),(a)),(b))

// tgeDrawTriangle Debug
// Switch to enable debug to output stderr
//#define TGE_DRAW_TRIANGLE_DEBUG

/* For variable assignment of type conversion function */
typedef void (* tgeGetfv)(const TGEvoid* data, TGEint index,
                          TGEint stride, TGEsizei size, TGEfloat* value);
typedef void (* tgeGetdv)(const TGEvoid* data, TGEint index,
                          TGEint stride, TGEsizei size, TGEdouble* value);
typedef void (* tgeGetuiv)(const TGEvoid* data, TGEint index,
                           TGEint stride, TGEsizei size, TGEuint* value);
typedef void (* tgeGetiv)(const TGEvoid* data, TGEint index,
                          TGEint stride, TGEsizei size, TGEint* value);

/*
 * Projection configuration.
 */
typedef struct _TGEProjection {
  TGEenum type;
  TGEdouble left, right;
  TGEdouble bottom, top;
  TGEdouble near, far;
} TGEProjection;

/*
 * Internal vertex format.
 */
typedef struct _TGEVertex {
  TGEdouble vertex[3];       /* vertex */
  TGEdouble normal[3];       /* normal */
  TGEubyte color[4];        /* origin color (RGBA) */
  TGEdouble texCoord[2];       /* texture coordinate */
  TGEdouble multVertex[3];       /* screen */
  TGEdouble lightAffect;       /* light affect */
  TGEubyte compColor[4];       /* effective color (RGBA) */
  TGEboolean calced;
} TGEVertex;

/*
 * Internal _TGEContext format
 */
struct TGEContext {
  TGEenum error;
  canvas* screen;
  TGEboolean enableDepthTest;
  TGEboolean enableTexture;
  TGEboolean enableCullFace;
  TGEboolean enableLight;
  TGEboolean enableAutoNormal;
  TGEboolean enableRescaleNormal;
  TGEboolean enableColorArray;
  TGEboolean enableNormalArray;
  TGEboolean enableTexCoordArray;
  TGEboolean enableVertexArray;
  TGEboolean enableBlend;
  TGEboolean enablePolygonOffsetFill;
  TGEboolean enablePolygonOffsetLine;
  TGEboolean enablePolygonOffsetPoint;
  TGEenum blendSrcFactor;
  TGEenum blendDstFactor;
  TGEclampf clearColor[4];
  TGEclampf color[4];
  TGEfloat lightPosition[3];
  canvas* texture;
  TGEdouble projectionToScreenScale;
  TGEdouble* modelMatrix;
  TGEdouble modelMatrixStack[TGE_MODEL_MATRIX_STACK_SIZE*16];
  TGEint modelMatrixStackLevel;
  TGEdouble* projectionMatrix;
  TGEdouble projectionMatrixStack[TGE_PROJECTION_MATRIX_STACK_SIZE*16];
  TGEProjection projectionStack[TGE_PROJECTION_MATRIX_STACK_SIZE];
  TGEint projectionMatrixStackLevel;
  TGEdouble screenMatrix[16];
  TGEProjection projection;
  TGEenum cullFace;
  TGEenum matrixMode;
  /* tgeVertexPointer */
  struct {
    TGEint size;
    TGEenum type;
    TGEsizei stride;
    const void* data;
    tgeGetdv getValuedv;
  } vertexArray;
  /* tgeNormalPointer */
  struct {
    TGEenum type;
    TGEsizei stride;
    const void* data;
    tgeGetdv getValuedv;
  } normalArray;
  /* tgeColorPointer */
  struct {
    TGEint size;
    TGEenum type;
    TGEsizei stride;
    const void* data;
    tgeGetuiv getValueuiv;
    tgeGetdv getValuedv;
  } colorArray;
  /* tgeTexCoordPointer */
  struct {
    TGEint size;
    TGEenum type;
    TGEsizei stride;
    const void* data;
    tgeGetdv getValuedv;
  } texCoordArray;
  zbuf* depthBuffer;
  TGEboolean depthMask;
  TGEfloat polygonOffsetFactor;
  TGEfloat polygonOffsetUnits;
};
#define TGE_CAST(ctx)     ((struct TGEContext *)(ctx))

#define RGB_TO_COLORREF(r,g,b)   ((COLORREF)((((r)&0xff)<<16)|(((g)&0xff)<<8)|((b)&0xff)))
#define MakeARGB(a,r,g,b)        ((COLORREF)((((uint32_t)(a)&0xff)<<24) | \
                                             (((uint32_t)(r)&0xff)<<16) | \
                                             (((uint32_t)(g)&0xff)<< 8) | \
                                             ((uint32_t)(b)&0xff)))

static struct TGEContext* _ge = NULL;



void tgeTransposeMatrixd(const TGEdouble* m, TGEdouble* mout);
void tgeDumpMatrixd(TGEint r, TGEint c, const TGEdouble* va);
void tgeMultMatrixInternald(const TGEdouble* ma, const TGEdouble* mb, TGEdouble* mc);
void tgeMultVectorMatrixd(const TGEdouble* va, const TGEdouble* ma, TGEdouble* vb);
TGEboolean tgeiVertexIsInnerTriangled(const TGEdouble* va, const TGEdouble* vb, const TGEdouble* vc, const TGEdouble* v);

// Array type conversion function

void tgeByteArrayGetfv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEfloat* value);
void tgeUByteArrayGetfv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEfloat* value);
void tgeShortArrayGetfv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEfloat* value);
void tgeUShortArrayGetfv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEfloat* value);
void tgeIntArrayGetfv(const TGEvoid* data, TGEint index,
                      TGEint stride, TGEsizei size, TGEfloat* value);
void tgeUIntArrayGetfv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEfloat* value);
void tgeFloatArrayGetfv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEfloat* value);
void tgeDoubleArrayGetfv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEfloat* value);
void tgeByteArrayGetdv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEdouble* value);
void tgeUByteArrayGetdv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEdouble* value);
void tgeShortArrayGetdv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEdouble* value);
void tgeUShortArrayGetdv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEdouble* value);
void tgeIntArrayGetdv(const TGEvoid* data, TGEint index,
                      TGEint stride, TGEsizei size, TGEdouble* value);
void tgeUIntArrayGetdv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEdouble* value);
void tgeFloatArrayGetdv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEdouble* value);
void tgeDoubleArrayGetdv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEdouble* value);
void tgeByteArrayGetiv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEint* value);
void tgeUByteArrayGetiv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEint* value);
void tgeShortArrayGetiv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEint* value);
void tgeUShortArrayGetiv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEint* value);
void tgeIntArrayGetiv(const TGEvoid* data, TGEint index,
                      TGEint stride, TGEsizei size, TGEint* value);
void tgeUIntArrayGetiv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEint* value);
void tgeFloatArrayGetiv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEint* value);
void tgeDoubleArrayGetiv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEint* value);
void tgeByteArrayGetuiv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEuint* value);
void tgeUByteArrayGetuiv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEuint* value);
void tgeShortArrayGetuiv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEuint* value);
void tgeUShortArrayGetuiv(const TGEvoid* data, TGEint index,
                          TGEint stride, TGEsizei size, TGEuint* value);
void tgeIntArrayGetuiv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEuint* value);
void tgeUIntArrayGetuiv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEuint* value);
void tgeFloatArrayGetuiv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEuint* value);
void tgeDoubleArrayGetuiv(const TGEvoid* data, TGEint index,
                          TGEint stride, TGEsizei size, TGEuint* value);

// other private functions
static void tgeCalcVertex(TGEVertex* v);
static void tgeDrawVertex(TGEVertex* v);
static void tgeDrawLine(TGEVertex* va, TGEVertex* vb);
static void tgeDrawTriangle(TGEVertex* va, TGEVertex* vb, TGEVertex* vc);
static TGEenum tgeFaceTest(const TGEVertex* va, const TGEVertex* vb, const TGEVertex* vc);
static void tgeDrawArrays_points(TGEint first, TGEsizei count);
static void tgeDrawArrays_line_strip(TGEint first, TGEsizei count);
static void tgeDrawArrays_line_loop(TGEint first, TGEsizei count);
static void tgeDrawArrays_lines(TGEint first, TGEsizei count);
static void tgeDrawArrays_triangle_strip(TGEint first, TGEsizei count);
static void tgeDrawArrays_triangle_fan(TGEint first, TGEsizei count);
static void tgeDrawArrays_triangles(TGEint first, TGEsizei count);
static void tgeDrawElements_points(TGEsizei count, TGEuint* indices);
static void tgeDrawElements_line_strip(TGEsizei count, TGEuint* indices);
static void tgeDrawElements_line_loop(TGEsizei count, TGEuint* indices);
static void tgeDrawElements_lines(TGEsizei count, TGEuint* indices);
static void tgeDrawElements_triangle_strip(TGEsizei count, const TGEuint* indices);
static void tgeDrawElements_triangle_fan(TGEsizei count, const TGEuint* indices);
static void tgeDrawElements_triangles(TGEsizei count, const TGEuint* indices);
static void tgeExtractArray(TGEint index, TGEVertex* tv);
static void tgeMakeScreenMatrix(void);
static TGEdouble tgeCalcLightAffectd(TGEdouble* vertex, TGEdouble* lightpos);
static TGEdouble* tgePushMatrixInternal(TGEdouble* stack, TGEsizei size, TGEint level);
static TGEdouble* tgePopMatrixInternal(TGEdouble* stack, TGEsizei size, TGEint level);


TGEContext* tgeCreateContext()
{
  struct TGEContext* ge;
  TGEclampf clearColor[] = {0.0, 0.0, 0.0, 1.0};
  TGEdouble m[16] = {
    1.0, 0.0, 0.0, 0.0,
    0.0, 1.0, 0.0, 0.0,
    0.0, 0.0, 1.0, 0.0,
    0.0, 0.0, 0.0, 1.0
  };
  ge = malloc(sizeof(*ge));
  if (!ge) {
    return NULL;
  }
  ge->error = TGE_NO_ERROR;
  ge->texture = NULL;
  ge->screen = NULL;
  ge->depthBuffer = NULL;
  ge->depthMask = TGE_TRUE;
  ge->enableDepthTest = TGE_FALSE;
  ge->enableTexture = TGE_FALSE;
  ge->enableCullFace = TGE_FALSE;
  ge->enableLight = TGE_FALSE;
  ge->enableAutoNormal = TGE_FALSE;
  ge->enableRescaleNormal = TGE_FALSE;
  ge->enableBlend = TGE_FALSE;
  ge->enablePolygonOffsetFill = TGE_FALSE;
  ge->enablePolygonOffsetLine = TGE_FALSE;
  ge->enablePolygonOffsetPoint = TGE_FALSE;
  ge->enableColorArray = TGE_FALSE;
  ge->enableNormalArray = TGE_FALSE;
  ge->enableTexCoordArray = TGE_FALSE;
  ge->enableVertexArray = TGE_FALSE;
  ge->matrixMode = TGE_MODELVIEW;
  ge->modelMatrixStackLevel = 0;
  ge->projectionMatrixStackLevel = 0;
  ge->modelMatrix = ge->modelMatrixStack;
  ge->projectionMatrix = ge->projectionMatrixStack;
  ge->blendSrcFactor = TGE_ONE;
  ge->blendDstFactor = TGE_ZERO;
  ge->polygonOffsetFactor = 0;
  ge->polygonOffsetUnits = 0;
  memcpy(ge->clearColor, clearColor, sizeof(clearColor));
  memcpy(ge->projectionMatrix, m, sizeof(m));
  memcpy(ge->modelMatrix, m, sizeof(m));
  return (TGEContext *)ge;
}

void tgeDestroyContext(TGEContext* self)
{
  struct TGEContext* ge = TGE_CAST(self);
  if (ge) {
    if (_ge == ge) {
      tri_end();
      _ge = NULL;
    }
    if (ge->depthBuffer) zbuf_free(ge->depthBuffer);
    free(ge);
  }
}

TGEboolean tgeMakeCurrent(canvas* screen, TGEContext* ctx)
{
  _ge = TGE_CAST(ctx);
  _ge->screen = screen;
  _ge->depthBuffer = zbuf_create(_ge->screen->width,
                                 _ge->screen->height,
                                 ZBUFConditionLess,
                                 -1.0, 1.0);
  tri_begin(_ge->screen, _ge->depthBuffer, NULL);
  tri_set_depth_test(_ge->enableDepthTest);
  if (_ge->enableTexture && _ge->texture) {
    tri_set_texture(_ge->texture);
  }
  return TGE_TRUE;
}

void tgePushMatrix(void)
{
  TGEdouble* m;
  switch (_ge->matrixMode) {
  case TGE_MODELVIEW:
    m = tgePushMatrixInternal(_ge->modelMatrixStack,
                              TGE_MODEL_MATRIX_STACK_SIZE,
                              _ge->modelMatrixStackLevel);
    if (!m) return;
    _ge->modelMatrix = m;
    _ge->modelMatrixStackLevel++;
    break;
  case TGE_PROJECTION:
    m = tgePushMatrixInternal(_ge->projectionMatrixStack,
                              TGE_PROJECTION_MATRIX_STACK_SIZE,
                              _ge->projectionMatrixStackLevel);
    if (!m) return;
    _ge->projectionStack[_ge->projectionMatrixStackLevel] = _ge->projection;
    _ge->projectionMatrix = m;
    _ge->projectionMatrixStackLevel++;
    break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
}

static TGEdouble* tgePushMatrixInternal(TGEdouble* stack, TGEsizei size, TGEint level)
{
  TGEdouble* m;
  TGEdouble* m2;
  TGEint i;
  if (size <= level + 1) {
    _ge->error = TGE_STACK_OVERFLOW;
    return NULL;
  }
  m = stack + level*16;
  m2 = stack + (level+1)*16;
  for (i = 0; i < 16; i++) m2[i] = m[i];
  return m2;
}

void tgePopMatrix(void)
{
  TGEdouble* m;
  switch (_ge->matrixMode) {
  case TGE_MODELVIEW:
    m = tgePopMatrixInternal(_ge->modelMatrixStack,
                             TGE_MODEL_MATRIX_STACK_SIZE,
                             _ge->modelMatrixStackLevel);
    if (!m) return;
    _ge->modelMatrix = m;
    _ge->modelMatrixStackLevel--;
    break;
  case TGE_PROJECTION:
    m = tgePopMatrixInternal(_ge->projectionMatrixStack,
                             TGE_PROJECTION_MATRIX_STACK_SIZE,
                             _ge->projectionMatrixStackLevel);
    if (!m) return;
    _ge->projection = _ge->projectionStack[_ge->projectionMatrixStackLevel-1];
    _ge->projectionMatrix = m;
    _ge->projectionMatrixStackLevel--;
    break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
  tgeMakeScreenMatrix();
}

static TGEdouble* tgePopMatrixInternal(TGEdouble* stack, TGEsizei size, TGEint level)
{
  TGEdouble* m;
  if (level == 0) {
    _ge->error = TGE_STACK_UNDERFLOW;
    return NULL;
  }
  level--;
  m = stack + level*16;
  return m;
}

void tgeTexImage2D(canvas* tex)
{
  _ge->texture = tex;
  if (_ge->enableTexture) {
    tri_set_texture(_ge->texture);
  }
}

void tgeEnable(TGEenum cap)
{
  switch (cap) {
  case TGE_CULL_FACE:
    _ge->enableCullFace = TGE_TRUE;
    break;
  case TGE_LIGHT0:
    _ge->enableLight = TGE_TRUE;
    break;
  case TGE_TEXTURE_2D:
    _ge->enableTexture = TGE_TRUE;
    if (_ge->texture) {
      tri_set_texture(_ge->texture);
    }
    break;
  case TGE_DEPTH_TEST:
    _ge->enableDepthTest = TGE_TRUE;
    tri_set_depth_test(1);
    break;
  case TGE_AUTO_NORMAL:
    _ge->enableAutoNormal = TGE_TRUE;
    break;
  case TGE_BLEND:
    _ge->enableBlend = TGE_TRUE;
    tri_set_blend(1);
    break;
  case TGE_POLYGON_OFFSET_FILL:
    _ge->enablePolygonOffsetFill = TGE_TRUE;
    break;
  case TGE_POLYGON_OFFSET_LINE:
    _ge->enablePolygonOffsetLine = TGE_TRUE;
    break;
  case TGE_POLYGON_OFFSET_POINT:
    _ge->enablePolygonOffsetPoint = TGE_TRUE;
    break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
}

void tgeDisable(TGEenum cap)
{
  switch (cap) {
  case TGE_CULL_FACE:
    _ge->enableCullFace = TGE_FALSE;
    break;
  case TGE_LIGHT0:
    _ge->enableLight = TGE_FALSE;
    break;
  case TGE_TEXTURE_2D:
    _ge->enableTexture = TGE_FALSE;
    tri_set_texture(NULL);
    break;
  case TGE_DEPTH_TEST:
    _ge->enableDepthTest = TGE_FALSE;
    tri_set_depth_test(0);
    break;
  case TGE_AUTO_NORMAL:
    _ge->enableAutoNormal = TGE_FALSE;
    break;
  case TGE_BLEND:
    _ge->enableBlend = TGE_FALSE;
    tri_set_blend(0);
    break;
  case TGE_POLYGON_OFFSET_FILL:
    _ge->enablePolygonOffsetFill = TGE_FALSE;
    break;
  case TGE_POLYGON_OFFSET_LINE:
    _ge->enablePolygonOffsetLine = TGE_FALSE;
    break;
  case TGE_POLYGON_OFFSET_POINT:
    _ge->enablePolygonOffsetPoint = TGE_FALSE;
    break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
}

void tgeGetBooleanv(TGEenum pname, TGEboolean* params)
{
  TGEint i;
  switch (pname) {
  case TGE_ALPHA_TEST:
    *params = TGE_FALSE; break;
  case TGE_AUTO_NORMAL:
    *params = _ge->enableAutoNormal; break;
  case TGE_BLEND:
    *params = _ge->enableBlend; break;
  case TGE_COLOR_ARRAY:
    *params = _ge->enableColorArray; break;
  case TGE_COLOR_ARRAY_SIZE:
    *params = (0==_ge->colorArray.size) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_COLOR_ARRAY_STRIDE:
    *params = (0==_ge->colorArray.stride) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_COLOR_ARRAY_TYPE:
    *params = (0==_ge->colorArray.type) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_CULL_FACE:
    *params = _ge->enableCullFace; break;
  case TGE_CULL_FACE_MODE:
    *params = _ge->cullFace; break;
  case TGE_CURRENT_COLOR:
    for (i = 0; i < 4; i++) {
      //params[i] = (_ge->color[i] == 0) ? TGE_TRUE : TGE_FALSE;
      params[i] = (isDoubleAlmostEquals(_ge->color[i], 0)) ? TGE_TRUE : TGE_FALSE;
    }

    break;
  case TGE_DEPTH_FUNC:
    *params = TGE_TRUE; break;
  case TGE_DEPTH_TEST:
    *params = _ge->enableDepthTest; break;
  case TGE_LIGHT0:
    *params = _ge->enableLight; break;
  case TGE_MATRIX_MODE:
    *params = TGE_TRUE; break;
  case TGE_MAX_LIGHTS:
    *params = TGE_TRUE; break;
  case TGE_MAX_PROJECTION_STACK_DEPTH:
    *params = TGE_TRUE; break;
  case TGE_NORMAL_ARRAY:
    *params = _ge->enableNormalArray; break;
  case TGE_NORMAL_ARRAY_STRIDE:
    *params = (0==_ge->normalArray.stride) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_NORMAL_ARRAY_TYPE:
    *params = (0==_ge->normalArray.type) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_POLYGON_OFFSET_FACTOR:
    *params = (0 == _ge->polygonOffsetFactor) ? TGE_TRUE : TGE_FALSE; break;
  case TGE_POLYGON_OFFSET_UNITS:
    *params = (0 == _ge->polygonOffsetUnits) ? TGE_TRUE : TGE_FALSE; break;
  case TGE_POLYGON_OFFSET_FILL:
    *params = _ge->enablePolygonOffsetFill;
  case TGE_POLYGON_OFFSET_LINE:
    *params = _ge->enablePolygonOffsetLine;
  case TGE_POLYGON_OFFSET_POINT:
    *params = _ge->enablePolygonOffsetPoint;
  case TGE_PROJECTION_MATRIX:
    for (i = 0; i < 16; i++)
      params[i] = (0==_ge->projectionMatrix[i]) ? TGE_FALSE : TGE_TRUE;
    break;
  case TGE_PROJECTION_STACK_DEPTH:
    *params = (0==_ge->projectionMatrixStackLevel) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_RESCALE_NORMAL:
    *params = _ge->enableRescaleNormal; break;
  case TGE_TEXTURE_2D:
    *params = _ge->enableTexture; break;
  case TGE_TEXTURE_COORD_ARRAY:
    *params = _ge->enableTexCoordArray; break;
  case TGE_TEXTURE_COORD_ARRAY_SIZE:
    *params = (0==_ge->texCoordArray.size) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_TEXTURE_COORD_ARRAY_STRIDE:
    *params = (0==_ge->texCoordArray.stride) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_TEXTURE_COORD_ARRAY_TYPE:
    *params = (0==_ge->texCoordArray.type) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_VERTEX_ARRAY:
    *params = _ge->enableVertexArray; break;
  case TGE_VERTEX_ARRAY_SIZE:
    *params = (0==_ge->vertexArray.size) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_VERTEX_ARRAY_STRIDE:
    *params = (0==_ge->vertexArray.stride) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_VERTEX_ARRAY_TYPE:
    *params = (0==_ge->vertexArray.type) ? TGE_FALSE : TGE_TRUE; break;
  case TGE_VIEWPORT:
    params[0] = TGE_FALSE;
    params[1] = TGE_FALSE;
    params[2] = TGE_TRUE;
    params[3] = TGE_TRUE;
    break;
  default:
    _ge->error = TGE_INVALID_ENUM; break;
  }
}

void tgeGetDoublev(TGEenum pname, TGEdouble* params)
{
  TGEint i;
  switch (pname) {
  case TGE_ALPHA_TEST:
    *params = TGE_FALSE; break;
  case TGE_AUTO_NORMAL:
    *params = _ge->enableAutoNormal; break;
  case TGE_BLEND:
    *params = _ge->enableBlend; break;
  case TGE_COLOR_ARRAY:
    *params = _ge->enableColorArray; break;
  case TGE_COLOR_ARRAY_SIZE:
    *params = _ge->colorArray.size; break;
  case TGE_COLOR_ARRAY_STRIDE:
    *params = _ge->colorArray.stride; break;
  case TGE_COLOR_ARRAY_TYPE:
    *params = _ge->colorArray.type; break;
  case TGE_CULL_FACE:
    *params = _ge->enableCullFace; break;
  case TGE_CULL_FACE_MODE:
    *params = _ge->cullFace; break;
  case TGE_CURRENT_COLOR:
    for (i = 0; i < 4; i++)
      params[i] = _ge->color[i];
    break;
  case TGE_DEPTH_FUNC:
    *params = TGE_TRUE; break;
  case TGE_DEPTH_TEST:
    *params = _ge->enableDepthTest; break;
  case TGE_LIGHT0:
    *params = _ge->enableLight; break;
  case TGE_MATRIX_MODE:
    *params = _ge->matrixMode; break;
  case TGE_MAX_LIGHTS:
    *params = 1; break;
  case TGE_MAX_PROJECTION_STACK_DEPTH:
    *params = TGE_PROJECTION_MATRIX_STACK_SIZE; break;
  case TGE_NORMAL_ARRAY:
    *params = _ge->enableNormalArray; break;
  case TGE_NORMAL_ARRAY_STRIDE:
    *params = _ge->normalArray.stride; break;
  case TGE_NORMAL_ARRAY_TYPE:
    *params = _ge->normalArray.type; break;
  case TGE_POLYGON_OFFSET_FACTOR:
    *params = _ge->polygonOffsetFactor; break;
  case TGE_POLYGON_OFFSET_UNITS:
    *params = _ge->polygonOffsetUnits; break;
  case TGE_POLYGON_OFFSET_FILL:
    *params = _ge->enablePolygonOffsetFill;
  case TGE_POLYGON_OFFSET_LINE:
    *params = _ge->enablePolygonOffsetLine;
  case TGE_POLYGON_OFFSET_POINT:
    *params = _ge->enablePolygonOffsetPoint;
  case TGE_PROJECTION_MATRIX:
    for (i = 0; i < 16; i++)
      params[i] = _ge->projectionMatrix[i];
    break;
  case TGE_PROJECTION_STACK_DEPTH:
    *params = _ge->projectionMatrixStackLevel; break;
  case TGE_RESCALE_NORMAL:
    *params = _ge->enableRescaleNormal; break;
  case TGE_TEXTURE_2D:
    *params = _ge->enableTexture; break;
  case TGE_TEXTURE_COORD_ARRAY:
    *params = _ge->enableTexCoordArray; break;
  case TGE_TEXTURE_COORD_ARRAY_SIZE:
    *params = _ge->texCoordArray.size; break;
  case TGE_TEXTURE_COORD_ARRAY_STRIDE:
    *params = _ge->texCoordArray.stride; break;
  case TGE_TEXTURE_COORD_ARRAY_TYPE:
    *params = _ge->texCoordArray.type; break;
  case TGE_VERTEX_ARRAY:
    *params = _ge->enableVertexArray; break;
  case TGE_VERTEX_ARRAY_SIZE:
    *params = _ge->vertexArray.size; break;
  case TGE_VERTEX_ARRAY_STRIDE:
    *params = _ge->vertexArray.stride; break;
  case TGE_VERTEX_ARRAY_TYPE:
    *params = _ge->vertexArray.type; break;
  case TGE_VIEWPORT:
    params[0] = TGE_FALSE;
    params[1] = TGE_FALSE;
    params[2] = TGE_TRUE;
    params[3] = TGE_TRUE;
    break;
  default:
    _ge->error = TGE_INVALID_ENUM; break;
  }
}

void tgeGetFloatv(TGEenum pname, TGEfloat* params)
{
  TGEint i;
  switch (pname) {
  case TGE_ALPHA_TEST:
    *params = TGE_FALSE; break;
  case TGE_AUTO_NORMAL:
    *params = _ge->enableAutoNormal; break;
  case TGE_BLEND:
    *params = _ge->enableBlend; break;
  case TGE_COLOR_ARRAY:
    *params = _ge->enableColorArray; break;
  case TGE_COLOR_ARRAY_SIZE:
    *params = _ge->colorArray.size; break;
  case TGE_COLOR_ARRAY_STRIDE:
    *params = _ge->colorArray.stride; break;
  case TGE_COLOR_ARRAY_TYPE:
    *params = _ge->colorArray.type; break;
  case TGE_CULL_FACE:
    *params = _ge->enableCullFace; break;
  case TGE_CULL_FACE_MODE:
    *params = _ge->cullFace; break;
  case TGE_CURRENT_COLOR:
    for (i = 0; i < 4; i++)
      params[i] = _ge->color[i];
    break;
  case TGE_DEPTH_FUNC:
    *params = TGE_TRUE; break;
  case TGE_DEPTH_TEST:
    *params = _ge->enableDepthTest; break;
  case TGE_LIGHT0:
    *params = _ge->enableLight; break;
  case TGE_MATRIX_MODE:
    *params = _ge->matrixMode; break;
  case TGE_MAX_LIGHTS:
    *params = 1; break;
  case TGE_MAX_PROJECTION_STACK_DEPTH:
    *params = TGE_PROJECTION_MATRIX_STACK_SIZE; break;
  case TGE_NORMAL_ARRAY:
    *params = _ge->enableNormalArray; break;
  case TGE_NORMAL_ARRAY_STRIDE:
    *params = _ge->normalArray.stride; break;
  case TGE_NORMAL_ARRAY_TYPE:
    *params = _ge->normalArray.type; break;
  case TGE_POLYGON_OFFSET_FACTOR:
    *params = _ge->polygonOffsetFactor; break;
  case TGE_POLYGON_OFFSET_UNITS:
    *params = _ge->polygonOffsetUnits; break;
  case TGE_POLYGON_OFFSET_FILL:
    *params = _ge->enablePolygonOffsetFill;
  case TGE_POLYGON_OFFSET_LINE:
    *params = _ge->enablePolygonOffsetLine;
  case TGE_POLYGON_OFFSET_POINT:
    *params = _ge->enablePolygonOffsetPoint;
  case TGE_PROJECTION_MATRIX:
    for (i = 0; i < 16; i++)
      params[i] = _ge->projectionMatrix[i];
    break;
  case TGE_PROJECTION_STACK_DEPTH:
    *params = _ge->projectionMatrixStackLevel; break;
  case TGE_RESCALE_NORMAL:
    *params = _ge->enableRescaleNormal; break;
  case TGE_TEXTURE_2D:
    *params = _ge->enableTexture; break;
  case TGE_TEXTURE_COORD_ARRAY:
    *params = _ge->enableTexCoordArray; break;
  case TGE_TEXTURE_COORD_ARRAY_SIZE:
    *params = _ge->texCoordArray.size; break;
  case TGE_TEXTURE_COORD_ARRAY_STRIDE:
    *params = _ge->texCoordArray.stride; break;
  case TGE_TEXTURE_COORD_ARRAY_TYPE:
    *params = _ge->texCoordArray.type; break;
  case TGE_VERTEX_ARRAY:
    *params = _ge->enableVertexArray; break;
  case TGE_VERTEX_ARRAY_SIZE:
    *params = _ge->vertexArray.size; break;
  case TGE_VERTEX_ARRAY_STRIDE:
    *params = _ge->vertexArray.stride; break;
  case TGE_VERTEX_ARRAY_TYPE:
    *params = _ge->vertexArray.type; break;
  case TGE_VIEWPORT:
    params[0] = TGE_FALSE;
    params[1] = TGE_FALSE;
    params[2] = TGE_TRUE;
    params[3] = TGE_TRUE;
    break;
  default:
    _ge->error = TGE_INVALID_ENUM; break;
  }
}

void tgeGetIntegerv(TGEenum pname, TGEint* params)
{
  TGEint i;
  switch (pname) {
  case TGE_ALPHA_TEST:
    *params = TGE_FALSE; break;
  case TGE_AUTO_NORMAL:
    *params = _ge->enableAutoNormal; break;
  case TGE_BLEND:
    *params = _ge->enableBlend; break;
  case TGE_COLOR_ARRAY:
    *params = _ge->enableColorArray; break;
  case TGE_COLOR_ARRAY_SIZE:
    *params = _ge->colorArray.size; break;
  case TGE_COLOR_ARRAY_STRIDE:
    *params = _ge->colorArray.stride; break;
  case TGE_COLOR_ARRAY_TYPE:
    *params = _ge->colorArray.type; break;
  case TGE_CULL_FACE:
    *params = _ge->enableCullFace; break;
  case TGE_CULL_FACE_MODE:
    *params = _ge->cullFace; break;
  case TGE_CURRENT_COLOR:
    for (i = 0; i < 4; i++)
      params[i] = (int)floor(_ge->color[i]*255.0);
    break;
  case TGE_DEPTH_FUNC:
    *params = TGE_TRUE; break;
  case TGE_DEPTH_TEST:
    *params = _ge->enableDepthTest; break;
  case TGE_LIGHT0:
    *params = _ge->enableLight; break;
  case TGE_MATRIX_MODE:
    *params = _ge->matrixMode; break;
  case TGE_MAX_LIGHTS:
    *params = 1; break;
  case TGE_MAX_PROJECTION_STACK_DEPTH:
    *params = TGE_PROJECTION_MATRIX_STACK_SIZE; break;
  case TGE_NORMAL_ARRAY:
    *params = _ge->enableNormalArray; break;
  case TGE_NORMAL_ARRAY_STRIDE:
    *params = _ge->normalArray.stride; break;
  case TGE_NORMAL_ARRAY_TYPE:
    *params = _ge->normalArray.type; break;
  case TGE_POLYGON_OFFSET_FACTOR:
    *params = _ge->polygonOffsetFactor; break;
  case TGE_POLYGON_OFFSET_UNITS:
    *params = _ge->polygonOffsetUnits; break;
  case TGE_POLYGON_OFFSET_FILL:
    *params = _ge->enablePolygonOffsetFill;
  case TGE_POLYGON_OFFSET_LINE:
    *params = _ge->enablePolygonOffsetLine;
  case TGE_POLYGON_OFFSET_POINT:
    *params = _ge->enablePolygonOffsetPoint;
  case TGE_PROJECTION_MATRIX:
    for (i = 0; i < 16; i++)
      params[i] = (int)floor(_ge->projectionMatrix[i]);
    break;
  case TGE_PROJECTION_STACK_DEPTH:
    *params = _ge->projectionMatrixStackLevel; break;
  case TGE_RESCALE_NORMAL:
    *params = _ge->enableRescaleNormal; break;
  case TGE_TEXTURE_2D:
    *params = _ge->enableTexture; break;
  case TGE_TEXTURE_COORD_ARRAY:
    *params = _ge->enableTexCoordArray; break;
  case TGE_TEXTURE_COORD_ARRAY_SIZE:
    *params = _ge->texCoordArray.size; break;
  case TGE_TEXTURE_COORD_ARRAY_STRIDE:
    *params = _ge->texCoordArray.stride; break;
  case TGE_TEXTURE_COORD_ARRAY_TYPE:
    *params = _ge->texCoordArray.type; break;
  case TGE_VERTEX_ARRAY:
    *params = _ge->enableVertexArray; break;
  case TGE_VERTEX_ARRAY_SIZE:
    *params = _ge->vertexArray.size; break;
  case TGE_VERTEX_ARRAY_STRIDE:
    *params = _ge->vertexArray.stride; break;
  case TGE_VERTEX_ARRAY_TYPE:
    *params = _ge->vertexArray.type; break;
  case TGE_VIEWPORT:
    params[0] = TGE_FALSE;
    params[1] = TGE_FALSE;
    params[2] = TGE_TRUE;
    params[3] = TGE_TRUE;
    break;
  default:
    _ge->error = TGE_INVALID_ENUM; break;
  }
}

void tgeBlendFunc(TGEenum sfactor, TGEenum dfactor)
{
  int sf, df;
  switch (sfactor) {
  case TGE_ZERO:                sf = CVBlendZero; break;
  case TGE_ONE:                 sf = CVBlendOne; break;
  case TGE_SRC_COLOR:           sf = CVBlendSrcColor; break;
  case TGE_ONE_MINUS_SRC_COLOR: sf = CVBlendOneMinusSrcColor; break;
  case TGE_DST_COLOR:           sf = CVBlendDstColor; break;
  case TGE_ONE_MINUS_DST_COLOR: sf = CVBlendOneMinusDstColor; break;
  case TGE_SRC_ALPHA:           sf = CVBlendSrcAlpha; break;
  case TGE_ONE_MINUS_SRC_ALPHA: sf = CVBlendOneMinusSrcAlpha; break;
  case TGE_DST_ALPHA:           sf = CVBlendDstAlpha; break;
  case TGE_ONE_MINUS_DST_ALPHA: sf = CVBlendOneMinusDstAlpha; break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
  switch (dfactor) {
  case TGE_ZERO:                df = CVBlendZero; break;
  case TGE_ONE:                 df = CVBlendOne; break;
  case TGE_SRC_COLOR:           df = CVBlendSrcColor; break;
  case TGE_ONE_MINUS_SRC_COLOR: df = CVBlendOneMinusSrcColor; break;
  case TGE_DST_COLOR:           df = CVBlendDstColor; break;
  case TGE_ONE_MINUS_DST_COLOR: df = CVBlendOneMinusDstColor; break;
  case TGE_SRC_ALPHA:           df = CVBlendSrcAlpha; break;
  case TGE_ONE_MINUS_SRC_ALPHA: df = CVBlendOneMinusSrcAlpha; break;
  case TGE_DST_ALPHA:           df = CVBlendDstAlpha; break;
  case TGE_ONE_MINUS_DST_ALPHA: df = CVBlendOneMinusDstAlpha; break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
  _ge->blendSrcFactor = sfactor;
  _ge->blendDstFactor = dfactor;
  tri_set_blend_func(sf, df);
}

void tgeLightfv(TGEenum light, TGEenum pname, const TGEfloat* params)
{
  int i;
  TGEdouble vec[4];
  TGEdouble m[16];
  if (TGE_LIGHT0 != light) {
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
  if (TGE_POSITION != pname) {
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
  vec[0] = params[0];
  vec[1] = params[1];
  vec[2] = params[2];
  vec[3] = 1.0;
  //tgeMultVectorMatrixd(vec, _ge->screenMatrix, vec);
  tgeInvertMatrixd(_ge->screenMatrix, m);
  tgeMultVectorMatrixd(vec, m, vec);
  for (i = 0; i < 3; i++) {
    _ge->lightPosition[i] = vec[i];
  }
}

void tgeCullFace(TGEenum face)
{
  if (face != TGE_FRONT &&
      face != TGE_BACK &&
      face != TGE_FRONT_AND_BACK) {
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
  _ge->cullFace = face;
}

void tgeClearColor(TGEclampf red, TGEclampf green, TGEclampf blue, TGEclampf alpha)
{
  _ge->clearColor[0] = red;
  _ge->clearColor[1] = green;
  _ge->clearColor[2] = blue;
  _ge->clearColor[3] = alpha;
}

void tgeColor4f(TGEclampf red, TGEclampf green, TGEclampf blue, TGEclampf alpha)
{
  _ge->color[0] = red;
  _ge->color[1] = green;
  _ge->color[2] = blue;
  _ge->color[3] = alpha;
}

void tgeColor4i(TGEint red, TGEint green, TGEint blue, TGEint alpha)
{
  tgeColor4f(red/255.0f, green/255.0f, blue/255.0f, alpha/255.0f);
}

void tgeColor4dv(const TGEdouble* v)
{
  tgeColor4f(v[0], v[1], v[2], v[3]);
}

void tgeColor4fv(const TGEfloat* v)
{
  tgeColor4f(v[0], v[1], v[2], v[3]);
}

void tgeColor4iv(const TGEint* v)
{
  tgeColor4f(v[0]/255.0f, v[1]/255.0f, v[2]/255.0f, v[3]/255.0f);
}

void tgeClear(TGEbitfield mask)
{
  TGEfloat* cv;
  TGEubyte c[4];
  if (mask & TGE_COLOR_BUFFER_BIT) {
    cv = _ge->clearColor;
    c[0] = (TGEubyte)(cv[0]*255);
    c[1] = (TGEubyte)(cv[1]*255);
    c[2] = (TGEubyte)(cv[2]*255);
    cv_draw_rect_ex(_ge->screen, 0, 0,
                    _ge->screen->width, _ge->screen->height,
                    CVDepthRGB8, &c, CVBlendNone);
  }
  if (mask & TGE_DEPTH_BUFFER_BIT) {
    zbuf_clear(_ge->depthBuffer);
  }
}

TGEenum tgeGetError(void)
{
  return _ge->error;
}

void tgeEnableClientState(TGEenum cap)
{
  switch (cap) {
  case TGE_COLOR_ARRAY: _ge->enableColorArray = TGE_TRUE; break;
  case TGE_NORMAL_ARRAY: _ge->enableNormalArray = TGE_TRUE; break;
  case TGE_TEXTURE_COORD_ARRAY: _ge->enableTexCoordArray = TGE_TRUE; break;
  case TGE_VERTEX_ARRAY: _ge->enableVertexArray = TGE_TRUE; break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
}

void tgeDisableClientState(TGEenum cap)
{
  switch (cap) {
  case TGE_COLOR_ARRAY: _ge->enableColorArray = TGE_FALSE; break;
  case TGE_NORMAL_ARRAY: _ge->enableNormalArray = TGE_FALSE; break;
  case TGE_TEXTURE_COORD_ARRAY: _ge->enableTexCoordArray = TGE_FALSE; break;
  case TGE_VERTEX_ARRAY: _ge->enableVertexArray = TGE_FALSE; break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
}

void tgePolygonOffset(TGEfloat factor, TGEfloat units)
{
  assert(NULL != _ge);
  _ge->polygonOffsetFactor = factor;
  _ge->polygonOffsetUnits = units;
}

void tgeDepthMask(TGEboolean flag)
{
  assert(NULL != _ge);
  _ge->depthMask = flag;
  tri_set_depth_mask(flag);
}

void tgeVertexPointer(TGEint size, TGEenum type, TGEsizei stride, const TGEvoid* pointer)
{
  switch (type) {
  case TGE_BYTE:           _ge->vertexArray.getValuedv = tgeByteArrayGetdv; break;
  case TGE_UNSIGNED_BYTE:  _ge->vertexArray.getValuedv = tgeUByteArrayGetdv; break;
  case TGE_SHORT:          _ge->vertexArray.getValuedv = tgeShortArrayGetdv; break;
  case TGE_UNSIGNED_SHORT: _ge->vertexArray.getValuedv = tgeUShortArrayGetdv; break;
  case TGE_INT:            _ge->vertexArray.getValuedv = tgeIntArrayGetdv; break;
  case TGE_UNSIGNED_INT:   _ge->vertexArray.getValuedv = tgeUIntArrayGetdv; break;
  case TGE_FLOAT:          _ge->vertexArray.getValuedv = tgeFloatArrayGetdv; break;
  case TGE_DOUBLE:         _ge->vertexArray.getValuedv = tgeDoubleArrayGetdv; break;
  default: _ge->error = TGE_INVALID_ENUM; return;
  }
  _ge->vertexArray.size = size;
  _ge->vertexArray.type = type;
  _ge->vertexArray.stride = stride;
  _ge->vertexArray.data = pointer;
}

void tgeColorPointer(TGEint size, TGEenum type, TGEsizei stride, const TGEvoid* pointer)
{
  switch (type) {
  case TGE_BYTE:           _ge->colorArray.getValueuiv = tgeByteArrayGetuiv; break;
  case TGE_UNSIGNED_BYTE:  _ge->colorArray.getValueuiv = tgeUByteArrayGetuiv; break;
  case TGE_SHORT:          _ge->colorArray.getValueuiv = tgeShortArrayGetuiv; break;
  case TGE_UNSIGNED_SHORT: _ge->colorArray.getValueuiv = tgeUShortArrayGetuiv; break;
  case TGE_INT:            _ge->colorArray.getValueuiv = tgeIntArrayGetuiv; break;
  case TGE_UNSIGNED_INT:   _ge->colorArray.getValueuiv = tgeUIntArrayGetuiv; break;
  case TGE_FLOAT:          _ge->colorArray.getValuedv = tgeFloatArrayGetdv; break;
  case TGE_DOUBLE:         _ge->colorArray.getValuedv = tgeDoubleArrayGetdv; break;
  default: _ge->error = TGE_INVALID_ENUM; return;
  }
  _ge->colorArray.size = size;
  _ge->colorArray.type = type;
  _ge->colorArray.stride = stride;
  _ge->colorArray.data = pointer;
}

void tgeNormalPointer(TGEenum type, TGEsizei stride, const TGEvoid* pointer)
{
  switch (type) {
  case TGE_BYTE:           _ge->normalArray.getValuedv = tgeByteArrayGetdv; break;
  case TGE_UNSIGNED_BYTE:  _ge->normalArray.getValuedv = tgeUByteArrayGetdv; break;
  case TGE_SHORT:          _ge->normalArray.getValuedv = tgeShortArrayGetdv; break;
  case TGE_UNSIGNED_SHORT: _ge->normalArray.getValuedv = tgeUShortArrayGetdv; break;
  case TGE_INT:            _ge->normalArray.getValuedv = tgeIntArrayGetdv; break;
  case TGE_UNSIGNED_INT:   _ge->normalArray.getValuedv = tgeUIntArrayGetdv; break;
  case TGE_FLOAT:          _ge->normalArray.getValuedv = tgeFloatArrayGetdv; break;
  case TGE_DOUBLE:         _ge->normalArray.getValuedv = tgeDoubleArrayGetdv; break;
  default: _ge->error = TGE_INVALID_ENUM; return;
  }
  _ge->normalArray.type = type;
  _ge->normalArray.stride = stride;
  _ge->normalArray.data = pointer;
}

void tgeTexCoordPointer(TGEint size, TGEenum type, TGEsizei stride, const TGEvoid* pointer)
{
  switch (type) {
  case TGE_BYTE:           _ge->texCoordArray.getValuedv = tgeByteArrayGetdv; break;
  case TGE_UNSIGNED_BYTE:  _ge->texCoordArray.getValuedv = tgeUByteArrayGetdv; break;
  case TGE_SHORT:          _ge->texCoordArray.getValuedv = tgeShortArrayGetdv; break;
  case TGE_UNSIGNED_SHORT: _ge->texCoordArray.getValuedv = tgeUShortArrayGetdv; break;
  case TGE_INT:            _ge->texCoordArray.getValuedv = tgeIntArrayGetdv; break;
  case TGE_UNSIGNED_INT:   _ge->texCoordArray.getValuedv = tgeUIntArrayGetdv; break;
  case TGE_FLOAT:          _ge->texCoordArray.getValuedv = tgeFloatArrayGetdv; break;
  case TGE_DOUBLE:         _ge->texCoordArray.getValuedv = tgeDoubleArrayGetdv; break;
  default: _ge->error = TGE_INVALID_ENUM; return;
  }
  _ge->texCoordArray.size = size;
  _ge->texCoordArray.type = type;
  _ge->texCoordArray.stride = stride;
  _ge->texCoordArray.data = pointer;
}

void tgeDrawArrays(TGEenum mode, TGEint first, TGEsizei count)
{
  switch (mode) {
  case TGE_POINTS:
    tgeDrawArrays_points(first, count);
    break;
  case TGE_LINE_STRIP:
    tgeDrawArrays_line_strip(first, count);
    break;
  case TGE_LINE_LOOP:
    tgeDrawArrays_line_loop(first, count);
    break;
  case TGE_LINES:
    tgeDrawArrays_lines(first, count);
    break;
  case TGE_TRIANGLE_STRIP:
    tgeDrawArrays_triangle_strip(first, count);
    break;
  case TGE_TRIANGLE_FAN:
    tgeDrawArrays_triangle_fan(first, count);
    break;
  case TGE_TRIANGLES:
    tgeDrawArrays_triangles(first, count);
    break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
}

void tgeDrawElements(TGEenum mode, TGEsizei count, TGEenum type, const TGEvoid* indices)
{
  TGEuint* intp;
  TGEboolean needFree = TGE_FALSE;

  if (type == TGE_UNSIGNED_INT) {
    intp = (TGEuint *)indices;
  }
  else {
    needFree = TGE_TRUE;
    intp = malloc(sizeof(TGEuint)*count);
    switch (type) {
    case TGE_BYTE:    tgeByteArrayGetuiv(indices, 0, 0, count, intp); break;
    case TGE_UNSIGNED_BYTE:   tgeUByteArrayGetuiv(indices, 0, 0, count, intp); break;
    case TGE_SHORT:   tgeShortArrayGetuiv(indices, 0, 0, count, intp); break;
    case TGE_UNSIGNED_SHORT:  tgeUShortArrayGetuiv(indices, 0, 0, count, intp); break;
    case TGE_INT:     tgeIntArrayGetuiv(indices, 0, 0, count, intp); break;
    case TGE_FLOAT:   tgeFloatArrayGetuiv(indices, 0, 0, count, intp); break;
    case TGE_DOUBLE:  tgeDoubleArrayGetuiv(indices, 0, 0, count, intp); break;
    }
  }

  switch (mode) {
  case TGE_POINTS:
    tgeDrawElements_points(count, intp);
    break;
  case TGE_LINE_STRIP:
    tgeDrawElements_line_strip(count, intp);
    break;
  case TGE_LINE_LOOP:
    tgeDrawElements_line_loop(count, intp);
    break;
  case TGE_LINES:
    tgeDrawElements_lines(count, intp);
    break;
  case TGE_TRIANGLE_STRIP:
    tgeDrawElements_triangle_strip(count, intp);
    break;
  case TGE_TRIANGLE_FAN:
    tgeDrawElements_triangle_fan(count, intp);
    break;
  case TGE_TRIANGLES:
    tgeDrawElements_triangles(count, intp);
    break;
  default:
    _ge->error = TGE_INVALID_ENUM;
  }
  if (needFree) {
    free(intp);
  }
}

// tgeDrawElements sub function

static void tgeDrawElements_points(TGEsizei count, TGEuint* indices)
{
  TGEVertex tv;
  TGEsizei i;
  for (i = 0; i < count; i++) {
    tgeExtractArray(indices[i], &tv);
    tgeDrawVertex(&tv);
  }
}

static void tgeDrawElements_line_strip(TGEsizei count, TGEuint* indices)
{
  TGEVertex tv, tva, tvb;
  TGEsizei i;
  for (i = 0; i < count; i++) {
    tgeExtractArray(indices[i], &tv);
    switch (i) {
    case 0: tva = tv; continue;
    default: tvb = tv;
    }
    tgeDrawLine(&tva, &tvb);
    tva = tvb;
  }
}

static void tgeDrawElements_line_loop(TGEsizei count, TGEuint* indices)
{
  TGEVertex tv, tva, tvb;
  TGEVertex tvs;
  TGEsizei i;
  for (i = 0; i < count; i++) {
    tgeExtractArray(indices[i], &tv);
    switch (i) {
    case 0: tvs = tva = tv;; continue;
    default: tvb = tv;
    }
    tgeDrawLine(&tva, &tvb);
    tva = tvb;
  }
  tgeDrawLine(&tvs, &tva);
}

static void tgeDrawElements_lines(TGEsizei count, TGEuint* indices)
{
  TGEVertex tv, tva, tvb;
  TGEsizei i;
  for (i = 0; i < count; i++) {
    tgeExtractArray(indices[i], &tv);
    switch (i%2) {
    case 0: tva = tv; continue;
    default: tvb = tv;
    }
    tgeDrawLine(&tva, &tvb);
  }
}

static void tgeDrawElements_triangle_strip(TGEsizei count, const TGEuint* indices)
{
  TGEVertex tv, tva, tvb, tvc;
  TGEsizei i;
  for (i = 0; i < count; i++) {
    tgeExtractArray(indices[i], &tv);
    switch (i) {
    case 0: tva = tv; continue;
    case 1: tvb = tv; continue;
    default: tvc = tv;
    }
    tgeDrawTriangle(&tva, &tvb, &tvc);
    tva = tvb;
    tvb = tvc;
  }
}

static void tgeDrawElements_triangle_fan(TGEsizei count, const TGEuint* indices)
{
  TGEVertex tv, tva, tvb, tvc;
  TGEsizei i;

  for (i = 0; i < count; i++) {
    tgeExtractArray(indices[i], &tv);
    switch (i) {
    case 0: tva = tv; continue;
    case 1: tvb = tv; continue;
    default: tvc = tv;
    }
    tgeDrawTriangle(&tva, &tvb, &tvc);
    tvb = tvc;
  }
}

static void tgeDrawElements_triangles(TGEsizei count, const TGEuint* indices)
{
  TGEVertex tv, tva, tvb, tvc;
  TGEint i;

  for (i = 0; i < count; i++) {
    tgeExtractArray(indices[i], &tv);
    switch (i%3) {
    case 0: tva = tv; continue;
    case 1: tvb = tv; continue;
    default: tvc = tv;
    }
    tgeDrawTriangle(&tva, &tvb, &tvc);
  }
}

// tgeDrawArrays sub function

static void tgeDrawArrays_points(TGEint first, TGEsizei count)
{
  TGEVertex tv;
  TGEsizei i;
  for (i = 0; i < count; i++) {
    tgeExtractArray(first+i, &tv);
    tgeDrawVertex(&tv);
  }
}

static void tgeDrawArrays_line_strip(TGEint first, TGEsizei count)
{
  TGEVertex tv, tva, tvb;
  TGEsizei i;
  for (i = 0; i < count; i++) {
    tgeExtractArray(first+i, &tv);
    switch (i) {
    case 0: tva = tv; continue;
    default: tvb = tv;
    }
    tgeDrawLine(&tva, &tvb);
    tva = tvb;
  }
}

static void tgeDrawArrays_line_loop(TGEint first, TGEsizei count)
{
  TGEVertex tv, tva, tvb;
  TGEVertex tvs;
  TGEsizei i;
  for (i = 0; i < count; i++) {
    tgeExtractArray(first+i, &tv);
    switch (i) {
    case 0: tvs = tva = tv;; continue;
    default: tvb = tv;
    }
    tgeDrawLine(&tva, &tvb);
    tva = tvb;
  }
  tgeDrawLine(&tvs, &tva);
}

static void tgeDrawArrays_lines(TGEint first, TGEsizei count)
{
  TGEVertex tv, tva, tvb;
  TGEsizei i;
  for (i = 0; i < count; i++) {
    tgeExtractArray(first+i, &tv);
    switch (i%2) {
    case 0: tva = tv; continue;
    default: tvb = tv;
    }
    tgeDrawLine(&tva, &tvb);
  }
}

static void tgeDrawArrays_triangle_strip(TGEint first, TGEsizei count)
{
  TGEVertex tv, tva, tvb, tvc;
  TGEsizei i;

  for (i = 0; i < count; i++) {
    tgeExtractArray(first+i, &tv);
    switch (i) {
    case 0: tva = tv; continue;
    case 1: tvb = tv; continue;
    default: tvc = tv; break;
    }
    tgeDrawTriangle(&tva, &tvb, &tvc);
    tva = tvb;
    tvb = tvc;
  }
}

static void tgeDrawArrays_triangle_fan(TGEint first, TGEsizei count)
{
  TGEVertex tv, tva, tvb, tvc;
  TGEsizei i;

  for (i = 0; i < count; i++) {
    tgeExtractArray(first+i, &tv);
    switch (i) {
    case 0: tva = tv; continue;
    case 1: tvb = tv; continue;
    default: tvc = tv;
    }
    tgeDrawTriangle(&tva, &tvb, &tvc);
    tvb = tvc;
  }
}

static void tgeDrawArrays_triangles(TGEint first, TGEsizei count)
{
  TGEVertex tv, tva, tvb, tvc;
  TGEint i;

  for (i = 0; i < count; i++) {
    tgeExtractArray(first+i, &tv);
    switch (i%3) {
    case 0: tva = tv; continue;
    case 1: tvb = tv; continue;
    default: tvc = tv;
    }
    tgeDrawTriangle(&tva, &tvb, &tvc);
  }
}

static void tgeExtractArray(TGEint index, TGEVertex* tv)
{
  TGEdouble v[3];
  TGEdouble n[3];
  TGEdouble t[2];
  TGEdouble c[4];
  TGEuint ci[4];
  TGEdouble dist;

  tv->calced = TGE_FALSE;
  // vertex
  if (_ge->enableVertexArray) {
    _ge->vertexArray.getValuedv(_ge->vertexArray.data, index,
                                _ge->vertexArray.stride,
                                _ge->vertexArray.size, v);
    if (_ge->vertexArray.size == 2) v[2] = 0;
    tv->vertex[0] = v[0];
    tv->vertex[1] = v[1];
    tv->vertex[2] = v[2];
  }
  // normal
  if (_ge->enableNormalArray) {
    _ge->normalArray.getValuedv(_ge->normalArray.data, index,
                                _ge->normalArray.stride, 3, n);
    tv->normal[0] = n[0];
    tv->normal[1] = n[1];
    tv->normal[2] = n[2];
  }
  else if (_ge->enableAutoNormal) {
    dist = sqrt(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
    tv->normal[0] = v[0]/dist;
    tv->normal[1] = v[1]/dist;
    tv->normal[2] = v[2]/dist;
  }
  else {
    tv->normal[0] = 0;
    tv->normal[1] = 0;
    tv->normal[2] = -1.0;
  }
  // color
  if (_ge->enableColorArray) {
    switch (_ge->colorArray.type) {
    case TGE_BYTE:
    case TGE_UNSIGNED_BYTE:
    case TGE_SHORT:
    case TGE_UNSIGNED_SHORT:
    case TGE_INT:
    case TGE_UNSIGNED_INT:
      _ge->colorArray.getValueuiv(_ge->colorArray.data, index,
                                  _ge->colorArray.stride,
                                  _ge->colorArray.size, ci);
      tv->color[0] = (ci[0] & 0xFF);
      tv->color[1] = (ci[1] & 0xFF);
      tv->color[2] = (ci[2] & 0xFF);
      tv->color[3] = (_ge->colorArray.size == 4) ? (ci[3] & 0xFF) : 0xFF;
      break;
    default:
      _ge->colorArray.getValuedv(_ge->colorArray.data, index,
                                 _ge->colorArray.stride,
                                 _ge->colorArray.size, c);
      tv->color[0] = (TGEubyte)floor(c[0] * 255.0);
      tv->color[1] = (TGEubyte)floor(c[1] * 255.0);
      tv->color[2] = (TGEubyte)floor(c[2] * 255.0);
      tv->color[3] = (_ge->colorArray.size == 4) ?
                     (TGEubyte)floor(c[3] * 255.0) : 0xFF;
    }
  }
  else {
    tv->color[0] = (TGEubyte)floor(_ge->color[0] * 255.0);
    tv->color[1] = (TGEubyte)floor(_ge->color[1] * 255.0);
    tv->color[2] = (TGEubyte)floor(_ge->color[2] * 255.0);
    tv->color[3] = (TGEubyte)floor(_ge->color[3] * 255.0);
  }
  // texture
  if (_ge->enableTexture) {
    if (_ge->enableTexCoordArray) {
      _ge->texCoordArray.getValuedv(_ge->texCoordArray.data, index,
                                    _ge->texCoordArray.stride,
                                    _ge->texCoordArray.size, t);
      tv->texCoord[0] = t[0];
      tv->texCoord[1] = t[1];
    }
    else {
      tv->texCoord[0] = 0;
      tv->texCoord[1] = 0;
    }
  }
}

static void tgeCalcVertex(TGEVertex* v)
{
  TGEdouble vl[] = {
    _ge->lightPosition[0],
    _ge->lightPosition[1],
    _ge->lightPosition[2]
  };
  TGEdouble vn[3];
  TGEuint cr, cg, cb;
  TGEdouble* vv = v->vertex;
  TGEProjection p = _ge->projection;
  TGEdouble vec[] = {vv[0], vv[1], vv[2], 1.0};
  TGEdouble vp[3];
  TGEdouble scale;
  TGEsizei i;

  (void)scale;

  tgeMultVectorMatrixd(vec, _ge->screenMatrix, vec);
  // convert to Viewport
  if (_ge->projection.type == TGE_FRUSTUM) {
    vp[0] = (p.near / vec[2] * vec[0] - p.left) * _ge->screen->width / (p.right - p.left);
    vp[1] = (p.near / vec[2] * vec[1] - p.top) * _ge->screen->height / (p.bottom - p.top);
  }
  else if (_ge->projection.type == TGE_ORTHO) {
    vp[0] = (vec[0] - p.left) * _ge->screen->width / (p.right - p.left);
    vp[1] = (vec[1] - p.top) * _ge->screen->height / (p.bottom - p.top);
  }
  vp[2] = vec[2];

  if (_ge->enableLight) {
    vn[0] = v->normal[0];
    vn[1] = v->normal[1];
    vn[2] = v->normal[2];
    v->lightAffect = tgeCalcLightAffectd(vl, vn);
    cr = v->color[0];
    cg = v->color[1];
    cb = v->color[2];
    if (v->lightAffect < 0.0) v->lightAffect = 0.0;
    cr *= v->lightAffect;
    cg *= v->lightAffect;
    cb *= v->lightAffect;
    v->compColor[0] = cr & 0xFF;
    v->compColor[1] = cg & 0xFF;
    v->compColor[2] = cb & 0xFF;
    v->compColor[3] = v->color[3];
  }
  else {
    for (i = 0; i < 4; i++) v->compColor[i] = v->color[i];
  }
  v->multVertex[0] = vp[0];
  v->multVertex[1] = vp[1];
  v->multVertex[2] = vp[2];
  v->calced = TGE_TRUE;
}

static TGEdouble tgeCalcLightAffectd(TGEdouble* vertex, TGEdouble* lightpos)
{
  TGEdouble uv[3];
  TGEdouble ul[3];
  TGEdouble vd;
  TGEdouble br;

  vd = tgeVectorNormd(3, vertex, uv);
  tgeVectorNormd(3, lightpos, ul);
  if (vd == 0.0) {
    return 1.0;
  }
  br = tgeVectorDotd(3, ul, uv);
  if (br < 0.0) br = 0.0;
  return br;
}

static void tgeDrawVertex(TGEVertex* v)
{
  TGEint x, y;
  TGEProjection* p = &_ge->projection;
  TGEint sfactor, dfactor;
  if (!v->calced) tgeCalcVertex(v);
  if (v->multVertex[0] < 0 || v->multVertex[0] >= _ge->screen->width ||
      v->multVertex[1] < 0 || v->multVertex[1] >= _ge->screen->height ||
      v->multVertex[2] < p->near || v->multVertex[2] > p->far) {
    return;
  }
  x = (TGEint)floor(v->multVertex[0]);
  y = (TGEint)floor(v->multVertex[1]);
  if (_ge->enableBlend) {
    switch (_ge->blendSrcFactor) {
    case TGE_ONE: sfactor = CVBlendOne; break;
    case TGE_ZERO: sfactor = CVBlendZero; break;
    case TGE_SRC_COLOR: sfactor = CVBlendSrcColor; break;
    case TGE_ONE_MINUS_SRC_COLOR: sfactor = CVBlendOneMinusSrcColor; break;
    case TGE_DST_COLOR: sfactor = CVBlendDstColor; break;
    case TGE_ONE_MINUS_DST_COLOR: sfactor = CVBlendOneMinusDstColor; break;
    case TGE_SRC_ALPHA: sfactor = CVBlendSrcAlpha; break;
    case TGE_ONE_MINUS_SRC_ALPHA: sfactor = CVBlendOneMinusSrcAlpha; break;
    case TGE_DST_ALPHA: sfactor = CVBlendDstAlpha; break;
    case TGE_ONE_MINUS_DST_ALPHA: sfactor = CVBlendOneMinusDstAlpha; break;
    }
    switch (_ge->blendDstFactor) {
    case TGE_ONE: dfactor = CVBlendOne; break;
    case TGE_ZERO: dfactor = CVBlendZero; break;
    case TGE_SRC_COLOR: dfactor = CVBlendSrcColor; break;
    case TGE_ONE_MINUS_SRC_COLOR: dfactor = CVBlendOneMinusSrcColor; break;
    case TGE_DST_COLOR: dfactor = CVBlendDstColor; break;
    case TGE_ONE_MINUS_DST_COLOR: dfactor = CVBlendOneMinusDstColor; break;
    case TGE_SRC_ALPHA: dfactor = CVBlendSrcAlpha; break;
    case TGE_ONE_MINUS_SRC_ALPHA: dfactor = CVBlendOneMinusSrcAlpha; break;
    case TGE_DST_ALPHA: dfactor = CVBlendDstAlpha; break;
    case TGE_ONE_MINUS_DST_ALPHA: dfactor = CVBlendOneMinusDstAlpha; break;
    }
  }
  if (!_ge->enableDepthTest ||
      ((_ge->depthMask) ? zbuf_set(_ge->depthBuffer, x, y, v->multVertex[2])
       : zbuf_test(_ge->depthBuffer, x, y, v->multVertex[2]))) {
    if (_ge->enableBlend) {
      cv_blend_pixel2_ex(_ge->screen, x, y, CVDepthRGBA8, &v->compColor,
                         sfactor, dfactor);
    }
    else {
      cv_set_pixel_ex(_ge->screen, x, y, CVDepthRGB8, &v->compColor);
    }
  }
}

static void tgeDrawLine(TGEVertex* va, TGEVertex* vb)
{
  TGEdouble sv[3];
  TGEdouble ev[3];
  TGEdouble dv[3];
  TGEdouble sc[4];
  TGEdouble ec[4];
  TGEdouble dc[4];
  TGEdouble tv[3];       /* temp */
  TGEdouble tc[4];       /* temp */
  TGEint w = _ge->screen->width;
  TGEint h = _ge->screen->height;
  TGEint svi[3];
  TGEint evi[3];
  TGEint dvi[3];
  TGEsizei i, j;
  TGEsizei count;
  TGEint x, y;
  TGEubyte c[4];
  TGEint sfactor;
  TGEint dfactor;

  if (!va->calced) tgeCalcVertex(va);
  if (!vb->calced) tgeCalcVertex(vb);
  /* clip x-axis */
  if (va->multVertex[0] < 0 &&
      vb->multVertex[0] < 0) {
    return;
  }
  if (va->multVertex[0] > w &&
      vb->multVertex[0] > w) {
    return;
  }
  /* clip y-axis */
  if (va->multVertex[1] < 0 &&
      vb->multVertex[1] < 0) {
    return;
  }
  if (va->multVertex[1] > h &&
      vb->multVertex[1] > h) {
    return;
  }
  /* clip z-axis */
  if (_ge->projection.far < va->multVertex[2] ||
      _ge->projection.far < vb->multVertex[2]) {
    return;
  }
  if (_ge->projection.near > va->multVertex[2] ||
      _ge->projection.near > vb->multVertex[2]) {
    return;
  }

  for (i = 0; i < 4; i++) {
    if (i < 3) {
      if (i == 2 && _ge->enablePolygonOffsetLine) {
        sv[i] = (va->multVertex[i] * _ge->polygonOffsetFactor +
                 1.0 * _ge->polygonOffsetUnits);
        ev[i] = (vb->multVertex[i] * _ge->polygonOffsetFactor +
                 1.0 * _ge->polygonOffsetUnits);
      }
      else {
        sv[i] = va->multVertex[i];
        ev[i] = vb->multVertex[i];
      }
      dv[i] = ev[i] - sv[i];
    }
    sc[i] = va->compColor[i]/255.0;
    ec[i] = vb->compColor[i]/255.0;
    dc[i] = ec[i] - sc[i];
  }

  for (i = 0; i < 3; i++) {
    svi[i] = (int)floor(sv[i]);
    evi[i] = (int)floor(ev[i]);
    dvi[i] = evi[i] - svi[i];
  }
  if (svi[0] == evi[0] && svi[1] == evi[0]) {
    // 1 dot
    if (sv[2] < ev[2]) {
      tgeDrawVertex(va);
    }
    else {
      tgeDrawVertex(vb);
    }
    return;
  }

  if (0 <= sv[0] && w-1 >= sv[0] && 0 <= ev[0] && w-1 >= ev[0] &&
      0 <= sv[1] && h-1 >= sv[1] && 0 <= ev[1] && h-1 >= ev[1]) {
    // both point in RECT
  }
  else {
    // sv.x
    if (0 > sv[0]) {
      tgeVectorMuld(3, dv, fabs(sv[0]/dv[0]), tv);
      tgeVectorMuld(4, dc, fabs(sv[0]/dv[0]), tc);
      tgeVectorAddd(3, sv, tv, sv);
      tgeVectorAddd(4, sc, tc, sc);
      tgeVectorSubd(3, ev, sv, dv);
      tgeVectorSubd(4, ec, sc, dc);
    }
    else if (w-1 < sv[0]) {
      tgeVectorMuld(3, dv, fabs((sv[0]-(w-1))/dv[0]), tv);
      tgeVectorMuld(4, dc, fabs((sv[0]-(w-1))/dv[0]), tc);
      tgeVectorAddd(3, sv, tv, sv);
      tgeVectorAddd(4, sc, tc, sc);
      tgeVectorSubd(3, ev, sv, dv);
      tgeVectorSubd(4, ec, sc, dc);
    }
    // sv.y
    if (0 > sv[1]) {
      tgeVectorMuld(3, dv, fabs(sv[1]/dv[1]), tv);
      tgeVectorMuld(4, dc, fabs(sv[1]/dv[1]), tc);
      tgeVectorAddd(3, sv, tv, sv);
      tgeVectorAddd(4, sc, tc, sc);
      tgeVectorSubd(3, ev, sv, dv);
      tgeVectorSubd(4, ec, sc, dc);
    }
    else if (h-1 < sv[1]) {
      tgeVectorMuld(3, dv, fabs((sv[1]-(h-1))/dv[1]), tv);
      tgeVectorMuld(4, dc, fabs((sv[1]-(h-1))/dv[1]), tc);
      tgeVectorAddd(3, sv, tv, sv);
      tgeVectorAddd(4, sc, tc, sc);
      tgeVectorSubd(3, ev, sv, dv);
      tgeVectorSubd(4, ec, sc, dc);
    }
    // ev.x
    if (0 > ev[0]) {
      tgeVectorMuld(3, dv, fabs(ev[0]/dv[0]), tv);
      tgeVectorMuld(4, dc, fabs(ev[0]/dv[0]), tc);
      tgeVectorSubd(3, ev, tv, ev);
      tgeVectorSubd(4, ec, tc, ec);
      tgeVectorSubd(3, ev, sv, dv);
      tgeVectorSubd(4, ec, sc, dc);
    }
    else if (w-1 < ev[0]) {
      tgeVectorMuld(3, dv, fabs((ev[0]-(w-1))/dv[0]), tv);
      tgeVectorMuld(4, dc, fabs((ev[0]-(w-1))/dv[0]), tc);
      tgeVectorSubd(3, ev, tv, ev);
      tgeVectorSubd(4, ec, tc, ec);
      tgeVectorSubd(3, ev, sv, dv);
      tgeVectorSubd(4, ec, sc, dc);
    }
    // ev.y
    if (0 > ev[1]) {
      tgeVectorMuld(3, dv, fabs(ev[1]/dv[1]), tv);
      tgeVectorDivd(4, dc, fabs(ev[1]/dv[1]), tc);
      tgeVectorSubd(3, ev, tv, ev);
      tgeVectorSubd(4, ec, tc, ec);
      tgeVectorSubd(3, ev, sv, dv);
      tgeVectorSubd(4, ec, sc, dc);
    }
    else if (h-1 < ev[1]) {
      tgeVectorMuld(3, dv, fabs((ev[1]-(h-1))/dv[1]), tv);
      tgeVectorMuld(4, dc, fabs((ev[1]-(h-1))/dv[1]), tc);
      tgeVectorSubd(3, ev, tv, ev);
      tgeVectorSubd(4, ec, tc, ec);
      tgeVectorSubd(3, ev, sv, dv);
      tgeVectorSubd(4, ec, sc, dc);
    }
  }

  if (0 > sv[0] || w-1 < sv[0] || 0 > ev[0] || w-1 < ev[0] ||
      0 > sv[1] || h-1 < sv[1] || 0 > ev[1] || h-1 < ev[1]) {
    return;
  }

  for (i = 0; i < 3; i++) {
    svi[i] = (int)floor(sv[i]);
    evi[i] = (int)floor(ev[i]);
    dvi[i] = evi[i] - svi[i];
  }

  if (svi[0] == evi[0] && svi[1] == evi[0]) {
    // 1 dot
    if (sv[2] < ev[2]) {
      tgeDrawVertex(va);
    }
    else {
      tgeDrawVertex(vb);
    }
    return;
  }
  else if (svi[0] == evi[0]) {
    // vertical line
    count = abs(evi[1] - svi[1]);
    tgeVectorDivd(4, dc, fabs(dv[1])-1.0, dc);
    tgeVectorDivd(3, dv, fabs(dv[1])-1.0, dv);
  }
  else if (svi[1] == evi[1]) {
    // horizontal line
    count = abs(evi[0] - svi[0]);
    tgeVectorDivd(4, dc, fabs(dv[0])-1.0, dc);
    tgeVectorDivd(3, dv, fabs(dv[0])-1.0, dv);
  }
  else if (1 < fabs((double)(evi[0]-svi[0])/(double)(evi[1]-svi[1]))) {
    // y = ax + b, |a| > 1
    count = abs(evi[0] - svi[0]);
    tgeVectorDivd(4, dc, fabs(dv[0])-1.0, dc);
    tgeVectorDivd(3, dv, fabs(dv[0])-1.0, dv);
  }
  else {
    // y = ax + b, |a| <= 1
    count = abs(evi[1] - svi[1]);
    tgeVectorDivd(4, dc, fabs(dv[1])-1.0, dc);
    tgeVectorDivd(3, dv, fabs(dv[1])-1.0, dv);
  }

  for (i = 0; i < 4; i++) {
    if (i < 3) {
      tv[i] = sv[i];
    }
    tc[i] = sc[i];
  }
  if (_ge->enableBlend) {
    switch (_ge->blendSrcFactor) {
    case TGE_ONE: sfactor = CVBlendOne; break;
    case TGE_ZERO: sfactor = CVBlendZero; break;
    case TGE_SRC_COLOR: sfactor = CVBlendSrcColor; break;
    case TGE_ONE_MINUS_SRC_COLOR: sfactor = CVBlendOneMinusSrcColor; break;
    case TGE_DST_COLOR: sfactor = CVBlendDstColor; break;
    case TGE_ONE_MINUS_DST_COLOR: sfactor = CVBlendOneMinusDstColor; break;
    case TGE_SRC_ALPHA: sfactor = CVBlendSrcAlpha; break;
    case TGE_ONE_MINUS_SRC_ALPHA: sfactor = CVBlendOneMinusSrcAlpha; break;
    case TGE_DST_ALPHA: sfactor = CVBlendDstAlpha; break;
    case TGE_ONE_MINUS_DST_ALPHA: sfactor = CVBlendOneMinusDstAlpha; break;
    }
    switch (_ge->blendDstFactor) {
    case TGE_ONE: dfactor = CVBlendOne; break;
    case TGE_ZERO: dfactor = CVBlendZero; break;
    case TGE_SRC_COLOR: dfactor = CVBlendSrcColor; break;
    case TGE_ONE_MINUS_SRC_COLOR: dfactor = CVBlendOneMinusSrcColor; break;
    case TGE_DST_COLOR: dfactor = CVBlendDstColor; break;
    case TGE_ONE_MINUS_DST_COLOR: dfactor = CVBlendOneMinusDstColor; break;
    case TGE_SRC_ALPHA: dfactor = CVBlendSrcAlpha; break;
    case TGE_ONE_MINUS_SRC_ALPHA: dfactor = CVBlendOneMinusSrcAlpha; break;
    case TGE_DST_ALPHA: dfactor = CVBlendDstAlpha; break;
    case TGE_ONE_MINUS_DST_ALPHA: dfactor = CVBlendOneMinusDstAlpha; break;
    }
  }
  for (i = 0; i < count; i++) {
    x = (int)floor(tv[0]);
    y = (int)floor(tv[1]);
    if (!_ge->enableDepthTest ||
        ((_ge->depthMask) ? zbuf_set(_ge->depthBuffer, x, y, tv[2])
         : zbuf_test(_ge->depthBuffer, x, y, tv[2]))) {
      c[0] = (int)floor(tc[0] * 255);
      c[1] = (int)floor(tc[1] * 255);
      c[2] = (int)floor(tc[2] * 255);
      c[3] = (int)floor(tc[3] * 255);
      if (_ge->enableBlend) {
        cv_blend_pixel2_ex(_ge->screen, x, y, CVDepthRGBA8, &c,
                           sfactor, dfactor);
      }
      else {
        cv_set_pixel_ex(_ge->screen, x, y, CVDepthRGBA8, &c);
      }
    }
    for (j = 0; j < 4; j++) {
      if (j < 3) {
        tv[j] += dv[j];
      }
      tc[j] += dc[j];
      tc[j] = TGE_BETWEEN(tc[j], 0.0, 1.0);
    }
  }
}

static void tgeDrawTriangle(TGEVertex* va, TGEVertex* vb, TGEVertex* vc)
{
  STL_DRAWTRI tri;
  TGEVertex* v;
  TGEint i;
  TGEint w = _ge->screen->width;
  TGEint h = _ge->screen->height;
  if (_ge->enableCullFace && _ge->cullFace == TGE_FRONT_AND_BACK) {
    return;             /* any triangles not drawn */
  }
  if (!va->calced) tgeCalcVertex(va);
  if (!vb->calced) tgeCalcVertex(vb);
  if (!vc->calced) tgeCalcVertex(vc);
  /* cull test */
  if (_ge->enableCullFace) {
    TGEenum face = tgeFaceTest(va, vb, vc);
    if (_ge->cullFace == face) {
#ifdef TGE_DRAW_TRIANGLE_DEBUG
      fprintf(stderr, "skip: cull\n");
#endif
      return;
    }
  }
  /* clip x-axis */
  if (va->multVertex[0] < 0 &&
      vb->multVertex[0] < 0 &&
      vc->multVertex[0] < 0) {
#ifdef TGE_DRAW_TRIANGLE_DEBUG
    fprintf(stderr, "skip: xaxis\n");
#endif
    return;
  }
  if (va->multVertex[0] > w &&
      vb->multVertex[0] > w &&
      vc->multVertex[0] > w) {
#ifdef TGE_DRAW_TRIANGLE_DEBUG
    fprintf(stderr, "skip: xaxis (2)\n");
#endif
    return;
  }
  /* clip y-axis */
  if (va->multVertex[1] < 0 &&
      vb->multVertex[1] < 0 &&
      vc->multVertex[1] < 0) {
#ifdef TGE_DRAW_TRIANGLE_DEBUG
    fprintf(stderr, "skip: yaxis\n");
#endif
    return;
  }
  if (va->multVertex[1] > h &&
      vb->multVertex[1] > h &&
      vc->multVertex[1] > h) {
#ifdef TGE_DRAW_TRIANGLE_DEBUG
    fprintf(stderr, "skip: yaxis (2)\n");
#endif
    return;
  }
  /* clip z-axis */
  if (_ge->projection.far < va->multVertex[2] ||
      _ge->projection.far < vb->multVertex[2] ||
      _ge->projection.far < vc->multVertex[2]) {
#ifdef TGE_DRAW_TRIANGLE_DEBUG
    fprintf(stderr, "skip: zaxis (far)\n");
#endif
    return;
  }
  if (_ge->projection.near > va->multVertex[2] ||
      _ge->projection.near > vb->multVertex[2] ||
      _ge->projection.near > vc->multVertex[2]) {
#ifdef TGE_DRAW_TRIANGLE_DEBUG
    fprintf(stderr, "skip: zaxis (near)\n");
#endif
    return;
  }
  for (i = 0; i < 3; i++) {
    switch (i) {
    case 0: v = va; break;
    case 1: v = vb; break;
    default: v = vc;
    }
    tri.po[i].m_fX = v->multVertex[0];
    tri.po[i].m_fY = v->multVertex[1];
    if (_ge->enablePolygonOffsetFill) {
      tri.z[i] = (v->multVertex[2] * _ge->polygonOffsetFactor +
                  1.0 * _ge->polygonOffsetUnits);
    }
    else {
      tri.z[i] = v->multVertex[2];
    }
    tri.clrVtx[i] = MakeARGB(v->compColor[3], v->compColor[0],
                             v->compColor[1], v->compColor[2]);
    tri.fUU[i] = v->texCoord[0];
    tri.fVV[i] = v->texCoord[1];
  }
#ifdef TGE_DRAW_TRIANGLE_DEBUG
  fprintf(stderr, "drawTriangle: (%f, %f, %f) (%f, %f, %f) (%f, %f, %f)\n",
          va->multVertex[0],va->multVertex[1],va->multVertex[2],
          vb->multVertex[0],vb->multVertex[1],vb->multVertex[2],
          vc->multVertex[0],vc->multVertex[1],vc->multVertex[2]);
  fprintf(stderr, "color: (%08x, %08x, %08x)\n",
          tri.clrVtx[0], tri.clrVtx[1], tri.clrVtx[2]);
#endif
  StlDrawTriangle(&tri);
}

/**
 * Determine the front and back of the polygon.
 * @result TGE_FRONT: front, TGE_BACK: back
 */
static TGEenum tgeFaceTest(const TGEVertex* va, const TGEVertex* vb, const TGEVertex* vc)
{
  TGEdouble v1[2];
  TGEdouble v2[2];
  TGEdouble p;
  v1[0] = vb->multVertex[0] - va->multVertex[0];
  v1[1] = vb->multVertex[1] - va->multVertex[1];
  v2[0] = vc->multVertex[0] - va->multVertex[0];
  v2[1] = vc->multVertex[1] - va->multVertex[1];
  p = tgeVectorCrossd(v1, v2);
  return (0.0 < p) ? TGE_BACK : TGE_FRONT;
}

//////////////////////////////////////////////////////////////////////
// Projection matrix
//////////////////////////////////////////////////////////////////////

void tgeMatrixMode(TGEenum mode)
{
  if (mode != TGE_MODELVIEW && mode != TGE_PROJECTION) {
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
  _ge->matrixMode = mode;
}

void tgeLoadIdentity(void)
{
  TGEdouble m[] = {
    1.0, 0.0, 0.0, 0.0,
    0.0, 1.0, 0.0, 0.0,
    0.0, 0.0, 1.0, 0.0,
    0.0, 0.0, 0.0, 1.0
  };
  tgeLoadMatrixd(m);
}

void tgeLoadMatrixd(const TGEdouble* m)
{
  TGEint i;
  TGEdouble* gm;
  switch (_ge->matrixMode) {
  case TGE_MODELVIEW:  gm = _ge->modelMatrix; break;
  case TGE_PROJECTION: gm = _ge->projectionMatrix; break;
  default: return;
  }
  for (i = 0; i < 16; i++) gm[i] = m[i];
  tgeMakeScreenMatrix();
}

void tgeLoadMatrixf(const TGEfloat* m)
{
  TGEint i;
  TGEdouble* gm;
  switch (_ge->matrixMode) {
  case TGE_MODELVIEW:  gm = _ge->modelMatrix; break;
  case TGE_PROJECTION: gm = _ge->projectionMatrix; break;
  default: return;
  }
  for (i = 0; i < 16; i++) gm[i] = m[i];
  tgeMakeScreenMatrix();
}

void tgeFrustum(TGEdouble left, TGEdouble right, TGEdouble bottom, TGEdouble top, TGEdouble near, TGEdouble far)
{
  TGEdouble pw;
  TGEdouble ph;
  TGEdouble w = _ge->screen->width;
  TGEdouble h = _ge->screen->height;
  if (left >= right || top >= bottom || near <= 0 || far <= 0 || near == far) {
    _ge->error = TGE_INVALID_VALUE;
#ifdef TGE_DEBUG
    fprintf(stderr, "tgeFrustum error: TGE_INVALID_VALUE\n");
#endif
    return;
  }
  _ge->projection.type = TGE_FRUSTUM;
  _ge->projection.left = left;
  _ge->projection.right = right;
  _ge->projection.bottom = bottom;
  _ge->projection.top = top;
  _ge->projection.near = near;
  _ge->projection.far = far;
  zbuf_set_range(_ge->depthBuffer, near, far);

  pw = right - left;
  ph = bottom - top;
  if (w/h == pw/ph) {
    _ge->projectionToScreenScale = w/pw;
  }
  else if (w/h < pw/ph) {
    _ge->projectionToScreenScale = h/ph;
  }
  else {
    _ge->projectionToScreenScale = w/pw;
  }

#if 0
  if (1) {
    TGEdouble m[] = {
      2*near/(right-left), 0.0, 0.0, 0.0,
      0.0, 2*near/(top-bottom), 0.0, 0.0,
      (right+left)/(right-left), (top+bottom)/(top-bottom), -(far+near)/(far-near), -1.0,
      0.0, 0.0, -(2*far*near)/(far-near), 0.0
    };
    tgeMultMatrixd(m);
  }
#endif
}

void tgeOrtho(TGEdouble left, TGEdouble right, TGEdouble bottom, TGEdouble top, TGEdouble near, TGEdouble far)
{
  if (left == right || top == bottom || near == far) {
    _ge->error = TGE_INVALID_VALUE;
    return;
  }
  _ge->projection.type = TGE_ORTHO;
  _ge->projection.left = left;
  _ge->projection.right = right;
  _ge->projection.bottom = bottom;
  _ge->projection.top = top;
  _ge->projection.near = near;
  _ge->projection.far = far;
  zbuf_set_range(_ge->depthBuffer, near, far);

#if 0
  if (1) {
    TGEdouble m[] = {
      2/(right-left), 0.0, 0.0, 0.0,
      0.0, 2/(top-bottom), 0.0, 0.0,
      0.0, 0.0, -2/(far-near), 0.0,
      (right+left)/(right-left), (top+bottom)/(top-bottom), (far+near)/(far-near), 1.0
    };
    tgeMultMatrixd(m);
  }
#endif
}

void tgeTranslated(TGEdouble x, TGEdouble y, TGEdouble z)
{
  TGEdouble m[] = {
    1.0, 0.0, 0.0, 0.0,
    0.0, 1.0, 0.0, 0.0,
    0.0, 0.0, 1.0, 0.0,
    x,   y,   z, 1.0
  };
  tgeMultMatrixd(m);
}

void tgeRotated(TGEdouble angle, TGEdouble x, TGEdouble y, TGEdouble z)
{
  TGEdouble ra = TGE_DEG2RAD(angle);
  TGEdouble c  = cos(ra);
  TGEdouble s  = sin(ra);
  TGEdouble mc = 1.0 - c;
  TGEdouble v[] = {x, y, z};

  tgeVectorNormd(3, v, v);
  x = v[0]; y = v[1]; z = v[2];
  if (1) {
    TGEdouble m[] = {
      x* x* mc+c,   y*x*mc+z*s, x*z*mc-y*s, 0.0,
      x*y*mc-z*s, y*y*mc+c,   y*z*mc+x*s, 0.0,
      x*z*mc+y*s, y*z*mc-x*s, z*z*mc+c,   0.0,
      0.0, 0.0, 0.0, 1.0
    };
    tgeMultMatrixd(m);
  }
}

void tgeScaled(TGEdouble x, TGEdouble y, TGEdouble z)
{
  TGEdouble m[] = {
    x, 0.0, 0.0, 0.0,
    0.0,   y, 0.0, 0.0,
    0.0, 0.0,   z, 0.0,
    0.0, 0.0, 0.0, 1.0
  };
  tgeMultMatrixd(m);
}

void tgeMultMatrixd(const TGEdouble* m)
{
  TGEdouble* gm;
  switch (_ge->matrixMode) {
  case TGE_MODELVIEW:  gm = _ge->modelMatrix; break;
  case TGE_PROJECTION: gm = _ge->projectionMatrix; break;
  default: return;
  }
  tgeMultMatrixInternald(gm, m, gm);
  tgeMakeScreenMatrix();
}

static void tgeMakeScreenMatrix(void)
{
  tgeMultMatrixInternald(_ge->modelMatrix,
                         _ge->projectionMatrix,
                         _ge->screenMatrix);
}

//////////////////////////////////////////////////////////////////////
// vector/matrix utilities
//////////////////////////////////////////////////////////////////////

TGEdouble tgeVectorCrossd(const TGEdouble* va, const TGEdouble* vb)
{
  return va[0]*vb[1] - va[1]*vb[0];
}

TGEdouble tgeVectorDotd(TGEsizei size, const TGEdouble* va, const TGEdouble* vb)
{
  TGEdouble x = 0.0;
  TGEuint i;
  for (i = 0; i < size; i++) x += va[i]*vb[i];
  return x;
}

TGEdouble tgeVectorNormd(TGEsizei size, const TGEdouble* va, TGEdouble* vb)
{
  TGEdouble d = 0.0;
  TGEdouble d2 = 0.0;
  TGEuint i;
  for (i = 0; i < size; i++) d += va[i]*va[i];
  if (d == 1.0) {
    for (i = 0; i < size; i++) vb[i] = va[i];
    return 1.0;
  }
  d = sqrt(d);
  d2 = 1.0 / d;
  for (i = 0; i < size; i++) vb[i] = va[i] * d2;
  return d;
}

TGEdouble tgeVectorDistd(TGEsizei size, const TGEdouble* v)
{
  TGEdouble d = 0.0;
  TGEuint i;
  for (i = 0; i < size; i++) d += v[i]*v[i];
  return sqrt(d);
}

void tgeVectorAddd(TGEsizei size, const TGEdouble* va, const TGEdouble* vb, TGEdouble* vc)
{
  TGEsizei i;
  for (i = 0; i < size; i++) vc[i] = va[i]+vb[i];
}

void tgeVectorSubd(TGEsizei size, const TGEdouble* va, const TGEdouble* vb, TGEdouble* vc)
{
  TGEsizei i;
  for (i = 0; i < size; i++) vc[i] = va[i]-vb[i];
}

void tgeVectorMuld(TGEsizei size, const TGEdouble* va, TGEdouble f, TGEdouble* vb)
{
  TGEsizei i;
  for (i = 0; i < size; i++) vb[i] = va[i]*f;
}

void tgeVectorDivd(TGEsizei size, const TGEdouble* va, TGEdouble f, TGEdouble* vb)
{
  TGEsizei i;
  for (i = 0; i < size; i++) vb[i] = va[i]/f;
}

void tgeMultMatrixInternald(const TGEdouble* ma, const TGEdouble* mb, TGEdouble* mc)
{
  TGEint i;
  TGEdouble m[] = {
    // row 1
    ma[0]*mb[0]  + ma[1]*mb[4]  + ma[2]*mb[8]  + ma[3]*mb[12],
    ma[0]*mb[1]  + ma[1]*mb[5]  + ma[2]*mb[9]  + ma[3]*mb[13],
    ma[0]*mb[2]  + ma[1]*mb[6]  + ma[2]*mb[10] + ma[3]*mb[14],
    ma[0]*mb[3]  + ma[1]*mb[7]  + ma[2]*mb[11] + ma[3]*mb[15],
    // row 2
    ma[4]*mb[0]  + ma[5]*mb[4]  + ma[6]*mb[8]  + ma[7]*mb[12],
    ma[4]*mb[1]  + ma[5]*mb[5]  + ma[6]*mb[9]  + ma[7]*mb[13],
    ma[4]*mb[2]  + ma[5]*mb[6]  + ma[6]*mb[10] + ma[7]*mb[14],
    ma[4]*mb[3]  + ma[5]*mb[7]  + ma[6]*mb[11] + ma[7]*mb[15],
    // row 3
    ma[8]*mb[0]  + ma[9]*mb[4]  + ma[10]*mb[8]  + ma[11]*mb[12],
    ma[8]*mb[1]  + ma[9]*mb[5]  + ma[10]*mb[9]  + ma[11]*mb[13],
    ma[8]*mb[2]  + ma[9]*mb[6]  + ma[10]*mb[10] + ma[11]*mb[14],
    ma[8]*mb[3]  + ma[9]*mb[7]  + ma[10]*mb[11] + ma[11]*mb[15],
    // row 4
    ma[12]*mb[0] + ma[13]*mb[4] + ma[14]*mb[8]  + ma[15]*mb[12],
    ma[12]*mb[1] + ma[13]*mb[5] + ma[14]*mb[9]  + ma[15]*mb[13],
    ma[12]*mb[2] + ma[13]*mb[6] + ma[14]*mb[10] + ma[15]*mb[14],
    ma[12]*mb[3] + ma[13]*mb[7] + ma[14]*mb[11] + ma[15]*mb[15]
  };
  for (i = 0; i < 16; i++) mc[i] = m[i];
}

void tgeTransposeMatrixd(const TGEdouble* m, TGEdouble* mout)
{
  TGEint i;
  TGEdouble mat[] = {
    m[0], m[4], m[8],  m[12],
    m[1], m[5], m[9],  m[13],
    m[2], m[6], m[10], m[14],
    m[3], m[7], m[11], m[15]
  };
  for (i = 0; i < 16; i++) mout[i] = mat[i];
}

void tgeInvertMatrixd(const TGEdouble* m, TGEdouble* mout)
{
  TGEint i;
  TGEdouble m1[] = {
    m[0],  m[1],  m[2],  m[3],  1, 0, 0, 0,
    m[4],  m[5],  m[6],  m[7],  0, 1, 0, 0,
    m[8],  m[9],  m[10], m[11], 0, 0, 1, 0,
    m[12], m[13], m[14], m[15], 0, 0, 0, 1
  };
  TGEdouble v[8];
  TGEdouble dx;

  // col1
  dx = 1.0 / m1[0];
  tgeVectorMuld(8, &m1[0], dx, &m1[0]);
  tgeVectorMuld(8, &m1[0], m1[8], v);
  tgeVectorSubd(8, &m1[8], v, &m1[8]);
  tgeVectorMuld(8, &m1[0], m1[16], v);
  tgeVectorSubd(8, &m1[16], v, &m1[16]);
  tgeVectorMuld(8, &m1[0], m1[24], v);
  tgeVectorSubd(8, &m1[24], v, &m1[24]);
  // col2
  dx = 1.0 / m1[9];
  tgeVectorMuld(8, &m1[8], dx, &m1[8]);
  tgeVectorMuld(8, &m1[8], m1[1], v);
  tgeVectorSubd(8, &m1[0], v, &m1[0]);
  tgeVectorMuld(8, &m1[8], m1[17], v);
  tgeVectorSubd(8, &m1[16], v, &m1[16]);
  tgeVectorMuld(8, &m1[8], m1[25], v);
  tgeVectorSubd(8, &m1[24], v, &m1[24]);
  // col3
  dx = 1.0 / m1[18];
  tgeVectorMuld(8, &m1[16], dx, &m1[16]);
  tgeVectorMuld(8, &m1[16], m1[2], v);
  tgeVectorSubd(8, &m1[0], v, &m1[0]);
  tgeVectorMuld(8, &m1[16], m1[10], v);
  tgeVectorSubd(8, &m1[8], v, &m1[8]);
  tgeVectorMuld(8, &m1[16], m1[26], v);
  tgeVectorSubd(8, &m1[24], v, &m1[24]);
  // col4
  dx = 1.0 / m1[27];
  tgeVectorMuld(8, &m1[24], dx, &m1[24]);
  tgeVectorMuld(8, &m1[24], m1[3], v);
  tgeVectorSubd(8, &m1[0], v, &m1[0]);
  tgeVectorMuld(8, &m1[24], m1[11], v);
  tgeVectorSubd(8, &m1[8], v, &m1[8]);
  tgeVectorMuld(8, &m1[24], m1[19], v);
  tgeVectorSubd(8, &m1[16], v, &m1[16]);

  for (i = 0; i < 4; i++) {
    mout[i*4+0] = m1[i*8+4];
    mout[i*4+1] = m1[i*8+5];
    mout[i*4+2] = m1[i*8+6];
    mout[i*4+3] = m1[i*8+7];
  }
}

void tgeDumpMatrixd(TGEint rs, TGEint cs, const TGEdouble* va)
{
  TGEint r, c;
  for (r = 0; r < rs; r++) {
    printf("|");
    for (c = 0; c < cs; c++) {
      if (c != 0) printf(", ");
      printf("%8f", va[r*cs+c]);
    }
    printf("|\n");
  }
}

void tgeMultVectorMatrixd(const TGEdouble* va, const TGEdouble* ma, TGEdouble* vb)
{
  TGEdouble v[4];
  TGEint i;
  v[0] = va[0]*ma[0] + va[1]*ma[4] + va[2]*ma[8]  + va[3]*ma[12];
  v[1] = va[0]*ma[1] + va[1]*ma[5] + va[2]*ma[9]  + va[3]*ma[13];
  v[2] = va[0]*ma[2] + va[1]*ma[6] + va[2]*ma[10] + va[3]*ma[14];
  v[3] = va[0]*ma[3] + va[1]*ma[7] + va[2]*ma[11] + va[3]*ma[15];
  for (i = 0; i < 4; i++) vb[i] = v[i];
}


//////////////////////////////////////////////////////////////////////
// value type converter
//////////////////////////////////////////////////////////////////////

// converg to TGEfloat

void tgeByteArrayGetfv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEfloat* value)
{
  const TGEbyte* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUByteArrayGetfv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEfloat* value)
{
  const TGEubyte* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeShortArrayGetfv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEfloat* value)
{
  const TGEshort* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUShortArrayGetfv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEfloat* value)
{
  const TGEushort* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeIntArrayGetfv(const TGEvoid* data, TGEint index,
                      TGEint stride, TGEsizei size, TGEfloat* value)
{
  const TGEint* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUIntArrayGetfv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEfloat* value)
{
  const TGEuint* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeFloatArrayGetfv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEfloat* value)
{
  const TGEfloat* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeDoubleArrayGetfv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEfloat* value)
{
  const TGEdouble* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}

// convert to TGEdouble

void tgeByteArrayGetdv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEdouble* value)
{
  const TGEbyte* p = data;
  int i;
  p = data + index * (sizeof(TGEbyte)*size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUByteArrayGetdv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEdouble* value)
{
  const TGEubyte* p = data;
  int i;
  p = p + index * (size + stride);
  p = data + index * (sizeof(TGEubyte)*size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeShortArrayGetdv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEdouble* value)
{
  const TGEshort* p = data;
  int i;
  p = data + index * (sizeof(TGEshort)*size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUShortArrayGetdv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEdouble* value)
{
  const TGEushort* p = data;
  int i;
  p = data + index * (sizeof(TGEushort)*size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeIntArrayGetdv(const TGEvoid* data, TGEint index,
                      TGEint stride, TGEsizei size, TGEdouble* value)
{
  const TGEint* p = data;
  int i;
  p = data + index * (sizeof(TGEint)*size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUIntArrayGetdv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEdouble* value)
{
  const TGEuint* p = data;
  int i;
  p = data + index * (sizeof(TGEuint)*size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeFloatArrayGetdv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEdouble* value)
{
  const TGEfloat* p = data;
  int i;
  p = data + index * (sizeof(TGEfloat)*size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeDoubleArrayGetdv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEdouble* value)
{
  const TGEdouble* p = data;
  int i;
  p = data + index * (sizeof(TGEdouble)*size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}


// convert to TGEint

void tgeByteArrayGetiv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEint* value)
{
  const TGEbyte* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUByteArrayGetiv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEint* value)
{
  const TGEubyte* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeShortArrayGetiv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEint* value)
{
  const TGEshort* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUShortArrayGetiv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEint* value)
{
  const TGEushort* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeIntArrayGetiv(const TGEvoid* data, TGEint index,
                      TGEint stride, TGEsizei size, TGEint* value)
{
  const TGEint* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUIntArrayGetiv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEint* value)
{
  const TGEuint* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeFloatArrayGetiv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEint* value)
{
  const TGEfloat* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeDoubleArrayGetiv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEint* value)
{
  const TGEdouble* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}


// convert to TGEuint

void tgeByteArrayGetuiv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEuint* value)
{
  const TGEbyte* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUByteArrayGetuiv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEuint* value)
{
  const TGEubyte* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeShortArrayGetuiv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEuint* value)
{
  const TGEshort* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUShortArrayGetuiv(const TGEvoid* data, TGEint index,
                          TGEint stride, TGEsizei size, TGEuint* value)
{
  const TGEushort* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeIntArrayGetuiv(const TGEvoid* data, TGEint index,
                       TGEint stride, TGEsizei size, TGEuint* value)
{
  const TGEint* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeUIntArrayGetuiv(const TGEvoid* data, TGEint index,
                        TGEint stride, TGEsizei size, TGEuint* value)
{
  const TGEuint* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeFloatArrayGetuiv(const TGEvoid* data, TGEint index,
                         TGEint stride, TGEsizei size, TGEuint* value)
{
  const TGEfloat* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}
void tgeDoubleArrayGetuiv(const TGEvoid* data, TGEint index,
                          TGEint stride, TGEsizei size, TGEuint* value)
{
  const TGEdouble* p = data;
  int i;
  p = p + index * (size + stride);
  for (i = 0; i < size; i++) value[i] = p[i];
}


//
// TGEI
//

TGEboolean tgeiVertexIsInnerTriangled(const TGEdouble* va, const TGEdouble* vb, const TGEdouble* vc, const TGEdouble* v)
{
  TGEdouble pa[2];
  TGEdouble pb[2];
  TGEdouble pc[2];
  TGEdouble a,b,c;
  tgeVectorSubd(2, v, va, pa);
  tgeVectorSubd(2, v, vb, pb);
  tgeVectorSubd(2, v, vc, pc);
  a = tgeVectorCrossd(pa, pb);
  b = tgeVectorCrossd(pb, pc);
  c = tgeVectorCrossd(pc, pa);
  if ((0 < a && 0 < b && 0 < c) || (0 > a && 0 > b && 0 > c)) {
    return TGE_TRUE;
  }
  return TGE_FALSE;
}

void tgeiVertexToViewport(TGEsizei size, TGEenum type, const TGEvoid* iv, TGEvoid* ov)
{
  TGEVertex tv;
  TGEdouble v[3];
  TGEint i;
  TGEboolean eLight = _ge->enableLight;
  TGEboolean eTex    = _ge->enableTexture;

  tv.calced = TGE_FALSE;
  switch (type) {
  case TGE_INT:            tgeIntArrayGetdv(iv, 0, 0, size, v);    break;
  case TGE_UNSIGNED_INT:   tgeUIntArrayGetdv(iv, 0, 0, size, v);   break;
  case TGE_FLOAT:          tgeFloatArrayGetdv(iv, 0, 0, size, v);  break;
  case TGE_DOUBLE:         tgeDoubleArrayGetdv(iv, 0, 0, size, v); break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }
  if (size == 2) {
    v[2] = 0.0;
  }
  for (i = 0; i < 3; i++) {
    tv.vertex[i] = v[i];
    tv.color[i] = (int)floor(_ge->color[i] * 255.0);
  }

  _ge->enableLight = TGE_FALSE;
  _ge->enableTexture = TGE_FALSE;
  tgeCalcVertex(&tv);
  _ge->enableLight = eLight;
  _ge->enableTexture = eTex;

  switch (type) {
  case TGE_INT:            tgeDoubleArrayGetiv(tv.multVertex, 0, 0, size, ov);    break;
  case TGE_UNSIGNED_INT:   tgeDoubleArrayGetuiv(tv.multVertex, 0, 0, size, ov);   break;
  case TGE_FLOAT:          tgeDoubleArrayGetfv(tv.multVertex, 0, 0, size, ov);  break;
  case TGE_DOUBLE:         tgeDoubleArrayGetdv(tv.multVertex, 0, 0, size, ov);  break;
  }
}

TGEItessellator* tgeiNewTess(void)
{
  TGEItessellator* self;

  self = malloc(sizeof(*self));
  if (!self) {
    return NULL;
  }
  memset(self, 0, sizeof(*self));
  return self;
}

void tgeiTessVertexPointer(TGEItessellator* self, TGEenum type, TGEint stride, TGEint size, const TGEvoid* vertex)
{
  TGEint dim = 2;       /* fixed */
  TGEint i;
  TGEdouble v[3];
  TGEdouble va[3];
  TGEdouble vb[3];
  TGEdouble vc[3];
  TGEint index, pi, ni;
  TGEdouble dist, maxdist;
  tgeGetdv getdvFunc;
  TGEdouble cp, cp2;
  struct {
    TGEint pi;
    TGEint ni;
  }* ptflag;
  TGEsizei rest;
  TGEint last;
  TGEboolean outside;
  TGEint* triangles;
  TGEint* t;
  TGEint count;
  TGEdouble pa[3];
  TGEdouble pb[3];

  if (self->index) {
    free(self->index);
    self->index = NULL;
    self->nindex = 0;
  }

  self->size = size;
  self->type = type;
  self->data = vertex;

  ptflag = malloc(sizeof(*ptflag) * size);
  if (!ptflag) {
    _ge->error = TGE_OUT_OF_MEMORY;
    return;
  }
  triangles = malloc(sizeof(*triangles)*(size-2)*6);
  if (!triangles) {
    free(ptflag);
    _ge->error = TGE_OUT_OF_MEMORY;
    return;
  }
  for (i = 0; i < size; i++) {
    ptflag[i].ni = (i == size-1) ? 0 : i+1;
    ptflag[i].pi = (i == 0) ? size-1 : i-1;
  }

  switch (type) {
  case TGE_INT:            getdvFunc = tgeIntArrayGetdv;    break;
  case TGE_UNSIGNED_INT:   getdvFunc = tgeUIntArrayGetdv;   break;
  case TGE_FLOAT:          getdvFunc = tgeFloatArrayGetdv;  break;
  case TGE_DOUBLE:         getdvFunc = tgeDoubleArrayGetdv; break;
  default:
    _ge->error = TGE_INVALID_ENUM;
    return;
  }

  /* find most distant point */
  index = 0;
  maxdist = 0.0;
  for (i = 0; i < size; i++) {
    getdvFunc(vertex, i, stride, dim, va);
    dist = tgeVectorDistd(2, va);
    if (maxdist < dist) {
      maxdist = dist;
      index = i;
    }
  }
  //fprintf(stderr, "distant point index: %d\n", index);

  /* get triangle rotation direction */
  pi = ptflag[index].pi;
  ni = ptflag[index].ni;
  getdvFunc(vertex, pi, stride, dim, va);
  getdvFunc(vertex, index, stride, dim, vb);
  getdvFunc(vertex, ni, stride, dim, vc);
  tgeVectorSubd(2, vb, va, pa);
  tgeVectorSubd(2, vc, va, pb);
  cp = tgeVectorCrossd(pa, pb);

  /* divide to triangles */
  count = 0;
  t = triangles;
  rest = size;
  while (3 < rest) {
    pi = ptflag[index].pi;
    ni = ptflag[index].ni;
    getdvFunc(vertex, pi, stride, dim, va);
    getdvFunc(vertex, index, stride, dim, vb);
    getdvFunc(vertex, ni, stride, dim, vc);
    last = ptflag[index].pi;
    i = ptflag[ptflag[index].ni].ni;
    outside = TGE_TRUE;
    tgeVectorSubd(2, vb, va, pa);
    tgeVectorSubd(2, vc, va, pb);
    cp2 = tgeVectorCrossd(pa, pb);
    if ((cp < 0 && cp2 < 0) || (cp > 0 && cp2 > 0)) {
      for (; i != last; i = ptflag[i].ni) {
        getdvFunc(vertex, i, stride, dim, v);
        if (tgeiVertexIsInnerTriangled(va, vb, vc, v)) {
          outside = TGE_FALSE;
          break;
        }
      }
      if (outside) {
        //fprintf(stderr, "tri: %d, %d, %d\n", pi, index, ni);
        if (0 < cp) {
          t[0] = pi;
          t[1] = index;
          t[2] = ni;
        }
        else {
          t[0] = ni;
          t[1] = index;
          t[2] = pi;
        }
        ptflag[ni].pi = pi;
        ptflag[pi].ni = ni;
        rest--;
        count++;
        t += 3;
      }
      else {
        //fprintf(stderr, "other vertex in triangle region\n");
      }
    }
    else {
      //fprintf(stderr, "concave point\n");
    }
    //fprintf(stderr, "next index: %d rest point: %d\n", ni, rest);
    index = ni;
  }
  pi = ptflag[index].pi;
  ni = ptflag[index].ni;
  //fprintf(stderr, "tri: %d, %d, %d\n", pi, index, ni);
  if (0 < cp) {
    t[0] = pi;
    t[1] = index;
    t[2] = ni;
  }
  else {
    t[0] = ni;
    t[1] = index;
    t[2] = pi;
  }
  count++;
  self->index = triangles;
  self->nindex = count*3;
  free(ptflag);
}

const TGEint* tgeiTessIndex(TGEItessellator* self, TGEsizei* size)
{
  *size = self->nindex;
  return self->index;
}

void tgeiDeleteTess(TGEItessellator* self)
{
  if (self) {
    if (self->index) {
      free(self->index);
      self->index = NULL;
      self->nindex = 0;
    }
    free(self);
  }
}
