Subversion Repositories eduke32

Rev

Rev 7787 | Rev 7843 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

/**************************************************************************************************
"POLYMOST" code originally written by Ken Silverman
Ken Silverman's official web site: http://www.advsys.net/ken

"POLYMOST2" changes Copyright (c) 2018, Alex Dawson
**************************************************************************************************/



#ifdef USE_OPENGL

#include "build.h"
#include "common.h"
#include "engine_priv.h"
#include "kplib.h"
#include "mdsprite.h"
#include "polymost.h"
#include "tilepacker.h"

extern char textfont[2048], smalltextfont[2048];

int32_t rendmode=0;
int32_t usemodels=1;
int32_t usehightile=1;

typedef struct { float x, cy[2], fy[2]; int32_t tag; int16_t n, p, ctag, ftag; } vsptyp;
#define VSPMAX 2048 //<- careful!
static vsptyp vsp[VSPMAX];
static int32_t gtag, viewportNodeCount;
static float xbl, xbr, xbt, xbb;
int32_t domost_rejectcount;
#ifdef YAX_ENABLE
typedef struct { float x, cy[2]; int32_t tag; int16_t n, p, ctag; } yax_vsptyp;
static yax_vsptyp yax_vsp[YAX_MAXBUNCHES*2][VSPMAX];
typedef struct { float x0, x1, cy[2], fy[2]; } yax_hole_t;
static yax_hole_t yax_holecf[2][VSPMAX];
static int32_t yax_holencf[2];
static int32_t yax_drawcf = -1;
#endif

static float dxb1[MAXWALLSB], dxb2[MAXWALLSB];

//POGOTODO: the SCISDIST could be set to 0 now to allow close objects to render properly,
//          but there's a nasty rendering bug that needs to be dug into when setting SCISDIST lower than 1
#define SCISDIST 1.f  //close plane clipping distance

#define SOFTROTMAT 0

float shadescale = 1.0f;
int32_t shadescale_unbounded = 0;

int32_t r_polymostDebug = 0;
int32_t r_enablepolymost2 = 0;
int32_t r_usenewshading = 4;
int32_t r_npotwallmode = 2;

static float gviewxrange;
static float ghoriz, ghoriz2;
double gxyaspect;
float gyxscale, ghalfx, grhalfxdown10, grhalfxdown10x, ghalfy;
float gcosang, gsinang, gcosang2, gsinang2;
float gchang, gshang, gctang, gstang, gvisibility;
float gtang = 0.f;

static vec3d_t xtex, ytex, otex, xtex2, ytex2, otex2;

float fcosglobalang, fsinglobalang;
float fxdim, fydim, fydimen, fviewingrange;

float fsearchx, fsearchy, fsearchz;
int psectnum, pwallnum, pbottomwall, pisbottomwall, psearchstat, doeditorcheck = 0;

static int32_t drawpoly_srepeat = 0, drawpoly_trepeat = 0;
#define MAX_DRAWPOLY_VERTS 8
#define BUFFER_OFFSET(bytes) (GLintptr) ((GLubyte*) NULL + (bytes))
// these cvars are never used directly in rendering -- only when glinit() is called/renderer reset
// We do this because we don't want to accidentally overshoot our existing buffer's bounds
uint32_t r_persistentStreamBuffer = 1;
uint32_t persistentStreamBuffer = r_persistentStreamBuffer;
int32_t r_drawpolyVertsBufferLength = 30000;
int32_t drawpolyVertsBufferLength = r_drawpolyVertsBufferLength;
static GLuint drawpolyVertsID = 0;
static GLint drawpolyVertsOffset = 0;
static int32_t drawpolyVertsSubBufferIndex = 0;
static GLsync drawpolyVertsSync[3] = { 0 };
static float defaultDrawpolyVertsArray[MAX_DRAWPOLY_VERTS*5];
static float* drawpolyVerts = defaultDrawpolyVertsArray;

struct glfiltermodes glfiltermodes[NUMGLFILTERMODES] =
{
    {"GL_NEAREST",GL_NEAREST,GL_NEAREST},
    {"GL_LINEAR",GL_LINEAR,GL_LINEAR},
    {"GL_NEAREST_MIPMAP_NEAREST",GL_NEAREST_MIPMAP_NEAREST,GL_NEAREST},
    {"GL_LINEAR_MIPMAP_NEAREST",GL_LINEAR_MIPMAP_NEAREST,GL_LINEAR},
    {"GL_NEAREST_MIPMAP_LINEAR",GL_NEAREST_MIPMAP_LINEAR,GL_NEAREST},
    {"GL_LINEAR_MIPMAP_LINEAR",GL_LINEAR_MIPMAP_LINEAR,GL_LINEAR}
};

int32_t glanisotropy = 0;            // 0 = maximum supported by card
int32_t gltexfiltermode = TEXFILTER_OFF;

#ifdef EDUKE32_GLES
int32_t glusetexcompr = 2;
int32_t glusetexcache = 0, glusememcache = 0;
#else
int32_t glusetexcompr = 1;
int32_t glusetexcache = 2, glusememcache = 1;
int32_t r_polygonmode = 0;     // 0:GL_FILL,1:GL_LINE,2:GL_POINT //FUK
static int32_t lastglpolygonmode = 0; //FUK
#endif
#ifdef USE_GLEXT
int32_t glmultisample = 0, glnvmultisamplehint = 0;
int32_t r_detailmapping = 1;
int32_t r_glowmapping = 1;
#endif

int32_t gltexmaxsize = 0;      // 0 means autodetection on first run
int32_t gltexmiplevel = 0;              // discards this many mipmap levels
int32_t glprojectionhacks = 1;
static GLuint polymosttext = 0;
int32_t glrendmode = REND_POLYMOST;
int32_t r_shadeinterpolate = 1;

// This variable, and 'shadeforfullbrightpass' control the drawing of
// fullbright tiles.  Also see 'fullbrightloadingpass'.

int32_t r_fullbrights = 1;
int32_t r_vertexarrays = 1;
#ifdef USE_GLEXT
//POGOTODO: we no longer support rendering without VBOs -- update any outdated pre-GL2 code that renders without VBOs
int32_t r_vbos = 1;
int32_t r_vbocount = 64;
#endif
int32_t r_animsmoothing = 1;
int32_t r_downsize = 0;
int32_t r_downsizevar = -1;

int32_t r_yshearing = 0;

// used for fogcalc
static float fogresult, fogresult2;
coltypef fogcol, fogtable[MAXPALOOKUPS];

static uint32_t currentShaderProgramID = 0;
static GLenum currentActiveTexture = 0;
static uint32_t currentTextureID = 0;

static GLuint quadVertsID = 0;
static GLuint polymost2BasicShaderProgramID = 0;
static GLint texSamplerLoc = -1;
static GLint fullBrightSamplerLoc = -1;
static GLint projMatrixLoc = -1;
static GLint mvMatrixLoc = -1;
static GLint texOffsetLoc = -1;
static GLint texScaleLoc = -1;
static GLint tintLoc = -1;
static GLint alphaLoc = -1;
static GLint fogRangeLoc = -1;
static GLint fogColorLoc = -1;

#define PALSWAP_TEXTURE_SIZE 2048
int32_t r_useindexedcolortextures = -1;
static GLuint tilesheetTexIDs[MAXTILESHEETS];
static GLint tilesheetSize = 0;
static vec2f_t tilesheetHalfTexelSize = { 0.f, 0.f };
static int32_t lastbasepal = -1;
static GLuint paletteTextureIDs[MAXBASEPALS];
static GLuint palswapTextureID = 0;
extern char const *polymost1Frag;
extern char const *polymost1Vert;
static GLuint polymost1CurrentShaderProgramID = 0;
static GLuint polymost1BasicShaderProgramID = 0;
static GLuint polymost1ExtendedShaderProgramID = 0;
static GLint polymost1TexSamplerLoc = -1;
static GLint polymost1PalSwapSamplerLoc = -1;
static GLint polymost1PaletteSamplerLoc = -1;
static GLint polymost1DetailSamplerLoc = -1;
static GLint polymost1GlowSamplerLoc = -1;
static GLint polymost1TexturePosSizeLoc = -1;
static vec4f_t polymost1TexturePosSize = { 0.f, 0.f, 1.f, 1.f };
static GLint polymost1HalfTexelSizeLoc = -1;
static vec2f_t polymost1HalfTexelSize = { 0.f, 0.f };
static GLint polymost1PalswapPosLoc = -1;
static vec2f_t polymost1PalswapPos = { 0.f, 0.f };
static GLint polymost1PalswapSizeLoc = -1;
static vec2f_t polymost1PalswapSize = { 0.f, 0.f };
static vec2f_t polymost1PalswapInnerSize = { 0.f, 0.f };
static GLint polymost1ClampLoc = -1;
static float polymost1Clamp = 0.f;
static GLint polymost1ShadeLoc = -1;
static float polymost1Shade = 0.f;
static GLint polymost1NumShadesLoc = -1;
static float polymost1NumShades = 64.f;
static GLint polymost1VisFactorLoc = -1;
static float polymost1VisFactor = 128.f;
static GLint polymost1FogEnabledLoc = -1;
static float polymost1FogEnabled = 1.f;
static GLint polymost1UseColorOnlyLoc = -1;
static float polymost1UseColorOnly = 0.f;
static GLint polymost1UsePaletteLoc = -1;
static float polymost1UsePalette = 1.f;
static GLint polymost1UseDetailMappingLoc = -1;
static float polymost1UseDetailMapping = 0.f;
static GLint polymost1UseGlowMappingLoc = -1;
static float polymost1UseGlowMapping = 0.f;
static GLint polymost1NPOTEmulationLoc = -1;
static float polymost1NPOTEmulation = 0.f;
static GLint polymost1NPOTEmulationFactorLoc = -1;
static float polymost1NPOTEmulationFactor = 1.f;
static GLint polymost1NPOTEmulationXOffsetLoc = -1;
static float polymost1NPOTEmulationXOffset = 0.f;
static GLint polymost1RotMatrixLoc = -1;
static float polymost1RotMatrix[16] = { 1.f, 0.f, 0.f, 0.f,
                                        0.f, 1.f, 0.f, 0.f,
                                        0.f, 0.f, 1.f, 0.f,
                                        0.f, 0.f, 0.f, 1.f };
static GLint polymost1ShadeInterpolateLoc = -1;
static float polymost1ShadeInterpolate = 1.f;

static inline float float_trans(uint32_t maskprops, uint8_t blend)
{
    switch (maskprops)
    {
    case DAMETH_TRANS1:
    case DAMETH_TRANS2:
        return glblend[blend].def[maskprops-2].alpha;
    default:
        return 1.0f;
    }
}

char ptempbuf[MAXWALLSB<<1];

// polymost ART sky control
int32_t r_parallaxskyclamping = 1;
int32_t r_parallaxskypanning = 1;

#define MIN_CACHETIME_PRINT 10

// this was faster in MSVC but slower with GCC... currently unknown on ARM where both
// the FPU and possibly the optimization path in the compiler need improvement
#if 0
static inline int32_t __float_as_int(float f) { return *(int32_t *) &f; }
static inline float __int_as_float(int32_t d) { return *(float *) &d; }
static inline float Bfabsf(float f) { return __int_as_float(__float_as_int(f)&0x7fffffff); }
#else
#define Bfabsf fabsf
#endif

int32_t mdtims, omdtims;
uint8_t alphahackarray[MAXTILES];
int32_t drawingskybox = 0;
int32_t hicprecaching = 0;

hitdata_t polymost_hitdata;

void polymost_outputGLDebugMessage(uint8_t severity, const char* format, ...)
{
    static char msg[8192];
    va_list vArgs;

    if (!glinfo.debugoutput ||
        r_polymostDebug < severity)
    {
        return;
    }

    va_start(vArgs, format);
    Bvsnprintf(msg, sizeof(msg), format, vArgs);
    va_end(vArgs);

    glDebugMessageInsertARB(GL_DEBUG_SOURCE_APPLICATION_ARB,
                            GL_DEBUG_TYPE_OTHER_ARB,
                            0,
                            GL_DEBUG_SEVERITY_HIGH_ARB+severity-1,
                            -1,
                            msg);
}

#if 0
static inline int32_t gltexmayhavealpha(int32_t dapicnum, int32_t dapalnum)
{
    const int32_t j = (dapicnum&(GLTEXCACHEADSIZ-1));
    pthtyp *pth;

    for (pth=texcache.list[j]; pth; pth=pth->next)
        if (pth->picnum == dapicnum && pth->palnum == dapalnum)
            return ((pth->flags&PTH_HASALPHA) != 0);

    return 1;
}
#endif

void gltexinvalidate(int32_t dapicnum, int32_t dapalnum, int32_t dameth)
{
    const int32_t pic = (dapicnum&(GLTEXCACHEADSIZ-1));

    for (pthtyp *pth=texcache.list[pic]; pth; pth=pth->next)
        if (pth->picnum == dapicnum && pth->palnum == dapalnum &&
            (pth->flags & PTH_CLAMPED) == TO_PTH_CLAMPED(dameth))
        {
            pth->flags |= PTH_INVALIDATED;
            if (pth->flags & PTH_HASFULLBRIGHT)
                pth->ofb->flags |= PTH_INVALIDATED;
        }
}

//Make all textures "dirty" so they reload, but not re-allocate
//This should be much faster than polymost_glreset()
//Use this for palette effects ... but not ones that change every frame!
void gltexinvalidatetype(int32_t type)
{
    for (bssize_t j=0; j<=GLTEXCACHEADSIZ-1; j++)
    {
        for (pthtyp *pth=texcache.list[j]; pth; pth=pth->next)
        {
            if (type == INVALIDATE_ALL ||
                (type == INVALIDATE_ALL_NON_INDEXED && !(pth->flags & PTH_INDEXED)) ||
                (type == INVALIDATE_ART && pth->hicr == NULL) ||
                (type == INVALIDATE_ART_NON_INDEXED && pth->hicr == NULL && !(pth->flags & PTH_INDEXED)))
            {
                pth->flags |= PTH_INVALIDATED;
                if (pth->flags & PTH_HASFULLBRIGHT)
                    pth->ofb->flags |= PTH_INVALIDATED;
            }
        }
    }

    clearskins(type);

#ifdef DEBUGGINGAIDS
    OSD_Printf("gltexinvalidateall()\n");
#endif
}

static void bind_2d_texture(GLuint texture, int filter)
{
    if (filter == -1)
        filter = gltexfiltermode;

    glBindTexture(GL_TEXTURE_2D, texture);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, glfiltermodes[filter].mag);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, glfiltermodes[filter].min);
#ifdef USE_GLEXT
    if (glinfo.maxanisotropy > 1.f)
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, glanisotropy);
#endif
}

void gltexapplyprops(void)
{
    if (videoGetRenderMode() == REND_CLASSIC)
        return;

    if (glinfo.maxanisotropy > 1.f)
    {
        if (glanisotropy <= 0 || glanisotropy > glinfo.maxanisotropy)
            glanisotropy = (int32_t)glinfo.maxanisotropy;
    }

    gltexfiltermode = clamp(gltexfiltermode, 0, NUMGLFILTERMODES-1);
    r_useindexedcolortextures = !gltexfiltermode;

    for (bssize_t i=0; i<=GLTEXCACHEADSIZ-1; i++)
    {
        for (pthtyp *pth=texcache.list[i]; pth; pth=pth->next)
        {
            if (pth->flags & PTH_INDEXED)
            {
                //POGO: indexed textures should not be filtered
                continue;
            }

            int32_t const filter = (pth->flags & PTH_FORCEFILTER) ? TEXFILTER_ON : -1;

            bind_2d_texture(pth->glpic, filter);

            if (r_fullbrights && pth->flags & PTH_HASFULLBRIGHT)
                bind_2d_texture(pth->ofb->glpic, filter);
        }
    }

    for (bssize_t i=0; i<nextmodelid; i++)
    {
        md2model_t *m = (md2model_t *)models[i];

        if (m->mdnum < 2)
            continue;

        for (bssize_t j = 0; j < m->numskins * HICTINT_MEMORY_COMBINATIONS; j++)
        {
            if (!m->texid[j])
                continue;
            bind_2d_texture(m->texid[j], -1);
        }

        for (mdskinmap_t *sk = m->skinmap; sk; sk = sk->next)
            for (bssize_t j = 0; j < HICTINT_MEMORY_COMBINATIONS; j++)
            {
                if (!sk->texid[j])
                    continue;
                bind_2d_texture(sk->texid[j], (sk->flags & HICR_FORCEFILTER) ? TEXFILTER_ON : -1);
            }
    }
}

//--------------------------------------------------------------------------------------------------

float glox1, gloy1, glox2, gloy2, gloyxscale, gloxyaspect, glohoriz2, glotang;

//Use this for both initialization and uninitialization of OpenGL.
static int32_t gltexcacnum = -1;

//in-place multiply m0=m0*m1
static float* multiplyMatrix4f(float m0[4*4], const float m1[4*4])
{
    float mR[4*4];

#define multMatrix4RowCol(r, c) mR[r*4+c] = m0[r*4]*m1[c] + m0[r*4+1]*m1[c+4] + m0[r*4+2]*m1[c+8] + m0[r*4+3]*m1[c+12]

    multMatrix4RowCol(0, 0);
    multMatrix4RowCol(0, 1);
    multMatrix4RowCol(0, 2);
    multMatrix4RowCol(0, 3);

    multMatrix4RowCol(1, 0);
    multMatrix4RowCol(1, 1);
    multMatrix4RowCol(1, 2);
    multMatrix4RowCol(1, 3);

    multMatrix4RowCol(2, 0);
    multMatrix4RowCol(2, 1);
    multMatrix4RowCol(2, 2);
    multMatrix4RowCol(2, 3);

    multMatrix4RowCol(3, 0);
    multMatrix4RowCol(3, 1);
    multMatrix4RowCol(3, 2);
    multMatrix4RowCol(3, 3);

    Bmemcpy(m0, mR, sizeof(float)*4*4);

    return m0;

#undef multMatrix4RowCol
}

static void calcmat(vec3f_t a0, const vec2f_t *offset, float f, float mat[16], int16_t angle)
{
    float g;
    float k0, k1, k2, k3, k4, k5, k6, k7;

    k0 = a0.y;
    k1 = a0.x;
    a0.x += offset->x;
    a0.z += offset->y;
    f = gcosang2*gshang;
    g = gsinang2*gshang;
    k4 = (float)sintable[(angle+1024)&2047] * (1.f/16384.f);
    k5 = (float)sintable[(angle+512)&2047] * (1.f/16384.f);
    k2 = k0*(1-k4)+k1*k5;
    k3 = k1*(1-k4)-k0*k5;
    k6 = f*gstang - gsinang*gctang; k7 = g*gstang + gcosang*gctang;
    mat[0] = k4*k6 + k5*k7; mat[4] = gchang*gstang; mat[ 8] = k4*k7 - k5*k6; mat[12] = k2*k6 + k3*k7;
    k6 = f*gctang + gsinang*gstang; k7 = g*gctang - gcosang*gstang;
    mat[1] = k4*k6 + k5*k7; mat[5] = gchang*gctang; mat[ 9] = k4*k7 - k5*k6; mat[13] = k2*k6 + k3*k7;
    k6 =           gcosang2*gchang; k7 =           gsinang2*gchang;
    mat[2] = k4*k6 + k5*k7; mat[6] =-gshang;        mat[10] = k4*k7 - k5*k6; mat[14] = k2*k6 + k3*k7;

    mat[12] = (mat[12] + a0.y*mat[0]) + (a0.z*mat[4] + a0.x*mat[ 8]);
    mat[13] = (mat[13] + a0.y*mat[1]) + (a0.z*mat[5] + a0.x*mat[ 9]);
    mat[14] = (mat[14] + a0.y*mat[2]) + (a0.z*mat[6] + a0.x*mat[10]);
}

static GLuint polymost2_compileShader(GLenum shaderType, const char* const source, int * pLength = nullptr)
{
    GLuint shaderID = glCreateShader(shaderType);
    if (shaderID == 0)
    {
        return 0;
    }

    glShaderSource(shaderID,
                   1,
                   &source,
                   pLength);
    glCompileShader(shaderID);

    GLint compileStatus;
    glGetShaderiv(shaderID, GL_COMPILE_STATUS, &compileStatus);
    if (!compileStatus)
    {
        GLint logLength;
        glGetShaderiv(shaderID, GL_INFO_LOG_LENGTH, &logLength);
        OSD_Printf("Compile Status: %u\n", compileStatus);
        if (logLength > 0)
        {
            char *infoLog = (char*)Xmalloc(logLength);
            glGetShaderInfoLog(shaderID, logLength, &logLength, infoLog);
            OSD_Printf("Log:\n%s\n", infoLog);
            free(infoLog);
        }
    }

    return shaderID;
}

static GLuint polymost2_compileShader(GLenum shaderType, const char* const source, int length)
{
    return polymost2_compileShader(shaderType, source, &length);
}

void polymost_glreset()
{
    for (bssize_t i=0; i<=MAXPALOOKUPS-1; i++)
    {
        fogtable[i].r = palookupfog[i].r * (1.f/255.f);
        fogtable[i].g = palookupfog[i].g * (1.f/255.f);
        fogtable[i].b = palookupfog[i].b * (1.f/255.f);
        fogtable[i].a = 0;
    }

    //Reset if this is -1 (meaning 1st texture call ever), or > 0 (textures in memory)
    if (gltexcacnum < 0)
    {
        gltexcacnum = 0;

        //Hack for polymost_dorotatesprite calls before 1st polymost_drawrooms()
        gcosang = gcosang2 = 16384.f/262144.f;
        gsinang = gsinang2 = 0.f;
    }
    else
    {
        for (bssize_t i = 0; i <= GLTEXCACHEADSIZ-1; i++)
        {
            for (pthtyp *pth = texcache.list[i]; pth;)
            {
                pthtyp *const next = pth->next;

                if (pth->flags & PTH_HASFULLBRIGHT)
                {
                    glDeleteTextures(1, &pth->ofb->glpic);
                    Xfree(pth->ofb);
                }

                glDeleteTextures(1, &pth->glpic);
                Xfree(pth);
                pth = next;
            }

            texcache.list[i] = NULL;
        }

        clearskins(INVALIDATE_ALL);
    }

    if (polymosttext)
        glDeleteTextures(1,&polymosttext);
    polymosttext=0;

#ifdef USE_GLEXT
    md_freevbos();
#endif

    Bmemset(texcache.list,0,sizeof(texcache.list));
    glox1 = -1;

    texcache_freeptrs();
    texcache_syncmemcache();

#ifdef DEBUGGINGAIDS
    OSD_Printf("polymost_glreset()\n");
#endif
}

#if defined EDUKE32_GLES
static void Polymost_DetermineTextureFormatSupport(void);
#endif

// reset vertex pointers to polymost default
void polymost_resetVertexPointers()
{
    polymost_outputGLDebugMessage(3, "polymost_resetVertexPointers()");

    glBindBuffer(GL_ARRAY_BUFFER, drawpolyVertsID);

    glVertexPointer(3, GL_FLOAT, 5*sizeof(float), 0);
    glTexCoordPointer(2, GL_FLOAT, 5*sizeof(float), (GLvoid*) (3*sizeof(float)));

#ifdef USE_GLEXT
    if (r_detailmapping)
    {
        glClientActiveTexture(GL_TEXTURE3);
        glTexCoordPointer(2, GL_FLOAT, 5*sizeof(float), (GLvoid*) (3*sizeof(float)));
    }
    if (r_glowmapping)
    {
        glClientActiveTexture(GL_TEXTURE4);
        glTexCoordPointer(2, GL_FLOAT, 5*sizeof(float), (GLvoid*) (3*sizeof(float)));
    }
    glClientActiveTexture(GL_TEXTURE0);
#endif

    polymost_resetProgram();
}

void polymost_disableProgram()
{
    if (videoGetRenderMode() != REND_POLYMOST)
        return;

    polymost_outputGLDebugMessage(3, "polymost_disableProgram()");

    useShaderProgram(0);
}

void polymost_resetProgram()
{
    if (videoGetRenderMode() != REND_POLYMOST)
        return;

    polymost_outputGLDebugMessage(3, "polymost_resetProgram()");

    if (r_enablepolymost2)
        useShaderProgram(polymost2BasicShaderProgramID);
    else
        useShaderProgram(polymost1CurrentShaderProgramID);

    // ensure that palswapTexture and paletteTexture[curbasepal] is bound
    glActiveTexture(GL_TEXTURE1);
    glBindTexture(GL_TEXTURE_2D, palswapTextureID);
    glActiveTexture(GL_TEXTURE2);
    glBindTexture(GL_TEXTURE_2D, paletteTextureIDs[curbasepal]);
    glActiveTexture(GL_TEXTURE0);
}

static void polymost_setCurrentShaderProgram(uint32_t programID)
{
    polymost_outputGLDebugMessage(3, "polymost_setCurrentShaderProgram(programID:%u)", programID);

    polymost1CurrentShaderProgramID = programID;
    useShaderProgram(programID);

    //update the uniform locations
    polymost1TexSamplerLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "s_texture");
    polymost1PalSwapSamplerLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "s_palswap");
    polymost1PaletteSamplerLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "s_palette");
    polymost1DetailSamplerLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "s_detail");
    polymost1GlowSamplerLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "s_glow");
    polymost1TexturePosSizeLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_texturePosSize");
    polymost1HalfTexelSizeLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_halfTexelSize");
    polymost1PalswapPosLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_palswapPos");
    polymost1PalswapSizeLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_palswapSize");
    polymost1ClampLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_clamp");
    polymost1ShadeLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_shade");
    polymost1NumShadesLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_numShades");
    polymost1VisFactorLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_visFactor");
    polymost1FogEnabledLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_fogEnabled");
    polymost1UsePaletteLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_usePalette");
    polymost1UseColorOnlyLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_useColorOnly");
    polymost1UseDetailMappingLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_useDetailMapping");
    polymost1UseGlowMappingLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_useGlowMapping");
    polymost1NPOTEmulationLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_npotEmulation");
    polymost1NPOTEmulationFactorLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_npotEmulationFactor");
    polymost1NPOTEmulationXOffsetLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_npotEmulationXOffset");
    polymost1RotMatrixLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_rotMatrix");
    polymost1ShadeInterpolateLoc = glGetUniformLocation(polymost1CurrentShaderProgramID, "u_shadeInterpolate");

    //set the uniforms to the current values
    glUniform4f(polymost1TexturePosSizeLoc, polymost1TexturePosSize.x, polymost1TexturePosSize.y, polymost1TexturePosSize.z, polymost1TexturePosSize.w);
    glUniform2f(polymost1HalfTexelSizeLoc, polymost1HalfTexelSize.x, polymost1HalfTexelSize.y);
    glUniform2f(polymost1PalswapPosLoc, polymost1PalswapPos.x, polymost1PalswapPos.y);
    glUniform2f(polymost1PalswapSizeLoc, polymost1PalswapInnerSize.x, polymost1PalswapInnerSize.y);
    glUniform1f(polymost1ClampLoc, polymost1Clamp);
    glUniform1f(polymost1ShadeLoc, polymost1Shade);
    glUniform1f(polymost1NumShadesLoc, polymost1NumShades);
    glUniform1f(polymost1VisFactorLoc, polymost1VisFactor);
    glUniform1f(polymost1FogEnabledLoc, polymost1FogEnabled);
    glUniform1f(polymost1UseColorOnlyLoc, polymost1UseColorOnly);
    glUniform1f(polymost1UsePaletteLoc, polymost1UsePalette);
    glUniform1f(polymost1UseDetailMappingLoc, polymost1UseDetailMapping);
    glUniform1f(polymost1UseGlowMappingLoc, polymost1UseGlowMapping);
    glUniform1f(polymost1NPOTEmulationLoc, polymost1NPOTEmulation);
    glUniform1f(polymost1NPOTEmulationFactorLoc, polymost1NPOTEmulationFactor);
    glUniform1f(polymost1NPOTEmulationXOffsetLoc, polymost1NPOTEmulationXOffset);
    glUniformMatrix4fv(polymost1RotMatrixLoc, 1, false, polymost1RotMatrix);
    glUniform1f(polymost1ShadeInterpolateLoc, polymost1ShadeInterpolate);
}

void polymost_setTexturePosSize(vec4f_t const &texturePosSize)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID)
        return;

    polymost1TexturePosSize = texturePosSize;
    glUniform4f(polymost1TexturePosSizeLoc, polymost1TexturePosSize.x, polymost1TexturePosSize.y, polymost1TexturePosSize.z, polymost1TexturePosSize.w);
}

void polymost_setHalfTexelSize(vec2f_t const &halfTexelSize)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID || (halfTexelSize.x == polymost1HalfTexelSize.x && halfTexelSize.y == polymost1HalfTexelSize.y))
        return;

    polymost1HalfTexelSize = halfTexelSize;
    glUniform2f(polymost1HalfTexelSizeLoc, polymost1HalfTexelSize.x, polymost1HalfTexelSize.y);
}

static void polymost_setPalswap(uint32_t index)
{
    static uint32_t lastPalswapIndex;

    if (currentShaderProgramID != polymost1CurrentShaderProgramID || index == lastPalswapIndex)
        return;

    lastPalswapIndex = index;
    polymost1PalswapPos.x = index*polymost1PalswapSize.x;
    polymost1PalswapPos.y = floorf(polymost1PalswapPos.x);
    polymost1PalswapPos = { polymost1PalswapPos.x - polymost1PalswapPos.y + (0.5f/PALSWAP_TEXTURE_SIZE),
                            polymost1PalswapPos.y * polymost1PalswapSize.y + (0.5f/PALSWAP_TEXTURE_SIZE) };
    glUniform2f(polymost1PalswapPosLoc, polymost1PalswapPos.x, polymost1PalswapPos.y);
}

static void polymost_setPalswapSize(uint32_t width, uint32_t height)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID)
        return;

    polymost1PalswapSize = { width*(1.f/PALSWAP_TEXTURE_SIZE),
                             height*(1.f/PALSWAP_TEXTURE_SIZE) };

    polymost1PalswapInnerSize = { (width-1)*(1.f/PALSWAP_TEXTURE_SIZE),
                                  (height-1)*(1.f/PALSWAP_TEXTURE_SIZE) };

    glUniform2f(polymost1PalswapSizeLoc, polymost1PalswapInnerSize.x, polymost1PalswapInnerSize.y);
}

char polymost_getClamp()
{
    return polymost1Clamp;
}

void polymost_setClamp(char clamp)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID ||
        clamp == polymost1Clamp)
        return;

    polymost1Clamp = clamp;
    glUniform1f(polymost1ClampLoc, polymost1Clamp);
}

static void polymost_setShade(int32_t shade)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID)
        return;

    if (globalflags & GLOBAL_NO_GL_TILESHADES)
        shade = 0;

    static int32_t lastShade;
    static int32_t lastNumShades;

    if (shade != lastShade)
    {
        lastShade = shade;
        polymost1Shade = shade;
        glUniform1f(polymost1ShadeLoc, polymost1Shade);
    }

    if (numshades != lastNumShades)
    {
        lastNumShades = numshades;
        polymost1NumShades = numshades;
        glUniform1f(polymost1NumShadesLoc, polymost1NumShades);
    }
}

void polymost_setVisibility(float visibility)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID)
        return;

    float visFactor = visibility * fviewingrange * (1.f / (64.f * 65536.f));
    if (visFactor == polymost1VisFactor)
        return;

    polymost1VisFactor = visFactor;
    glUniform1f(polymost1VisFactorLoc, polymost1VisFactor);
}

void polymost_setFogEnabled(char fogEnabled)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID || fogEnabled == polymost1FogEnabled)
        return;

    polymost1FogEnabled = fogEnabled;
    glUniform1f(polymost1FogEnabledLoc, polymost1FogEnabled);
}

void polymost_useColorOnly(char useColorOnly)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID || useColorOnly == polymost1UseColorOnly)
        return;

    polymost1UseColorOnly = useColorOnly;
    glUniform1f(polymost1UseColorOnlyLoc, polymost1UseColorOnly);
}

void polymost_usePaletteIndexing(char usePaletteIndexing)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID || usePaletteIndexing == polymost1UsePalette)
        return;

    polymost1UsePalette = usePaletteIndexing;
    glUniform1f(polymost1UsePaletteLoc, polymost1UsePalette);
}

void polymost_useDetailMapping(char useDetailMapping)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID || useDetailMapping == polymost1UseDetailMapping)
        return;

    if (useDetailMapping)
        polymost_setCurrentShaderProgram(polymost1ExtendedShaderProgramID);

    polymost1UseDetailMapping = useDetailMapping;
    glUniform1f(polymost1UseDetailMappingLoc, polymost1UseDetailMapping);
}

void polymost_useGlowMapping(char useGlowMapping)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID || useGlowMapping == polymost1UseGlowMapping)
        return;

    if (useGlowMapping)
        polymost_setCurrentShaderProgram(polymost1ExtendedShaderProgramID);

    polymost1UseGlowMapping = useGlowMapping;
    glUniform1f(polymost1UseGlowMappingLoc, polymost1UseGlowMapping);
}

void polymost_npotEmulation(char npotEmulation, float factor, float xOffset)
{
    if (currentShaderProgramID != polymost1CurrentShaderProgramID || npotEmulation == polymost1NPOTEmulation)
        return;

    polymost1NPOTEmulation = npotEmulation;
    glUniform1f(polymost1NPOTEmulationLoc, polymost1NPOTEmulation);
    polymost1NPOTEmulationFactor = factor;
    glUniform1f(polymost1NPOTEmulationFactorLoc, polymost1NPOTEmulationFactor);
    polymost1NPOTEmulationXOffset = xOffset;
    glUniform1f(polymost1NPOTEmulationXOffsetLoc, polymost1NPOTEmulationXOffset);
}

void polymost_shadeInterpolate(int32_t shadeInterpolate)
{
    if (currentShaderProgramID == polymost1CurrentShaderProgramID)
    {
        polymost1ShadeInterpolate = shadeInterpolate;
        glUniform1f(polymost1ShadeInterpolateLoc, polymost1ShadeInterpolate);
    }
}

void polymost_activeTexture(GLenum texture)
{
    currentActiveTexture = texture;
    glad_glActiveTexture(texture);
}

//POGOTODO: replace this and polymost_activeTexture with proper draw call organization
void polymost_bindTexture(GLenum target, uint32_t textureID)
{
    if (currentTextureID != textureID ||
        textureID == 0 ||
        currentActiveTexture != GL_TEXTURE0 ||
        videoGetRenderMode() != REND_POLYMOST)
    {
        glad_glBindTexture(target, textureID);
        if (currentActiveTexture == GL_TEXTURE0)
        {
            currentTextureID = textureID;
        }
    }
}

static void polymost_bindPth(pthtyp const * const pPth)
{
    Bassert(pPth);

    vec4f_t texturePosSize = { 0.f, 0.f, 1.f, 1.f };
    vec2f_t halfTexelSize = { 0.f, 0.f };
    if ((pPth->flags & PTH_INDEXED) &&
        !(pPth->flags & PTH_HIGHTILE))
    {
        Tile tile;
        char tileIsPacked = tilepacker_getTile(waloff[pPth->picnum] ? pPth->picnum+1 : 0, &tile);
        //POGO: check the width and height to ensure that the tile hasn't been changed for a user tile that has different dimensions
        if (tileIsPacked &&
            (!waloff[pPth->picnum] ||
             (tile.rect.width == (uint32_t) tilesiz[pPth->picnum].y &&
              tile.rect.height == (uint32_t) tilesiz[pPth->picnum].x)))
        {
            texturePosSize = { tile.rect.u/(float) tilesheetSize,
                               tile.rect.v/(float) tilesheetSize,
                               tile.rect.width/(float) tilesheetSize,
                               tile.rect.height/(float) tilesheetSize };
            halfTexelSize = tilesheetHalfTexelSize;
        }
    }
    polymost_setTexturePosSize(texturePosSize);
    polymost_setHalfTexelSize(halfTexelSize);
    glBindTexture(GL_TEXTURE_2D, pPth->glpic);
}

void useShaderProgram(uint32_t shaderID)
{
    glUseProgram(shaderID);
    currentShaderProgramID = shaderID;
}

// one-time initialization of OpenGL for polymost
void polymost_glinit()
{
    glHint(GL_FOG_HINT, GL_NICEST);
    glFogi(GL_FOG_MODE, (r_usenewshading < 2) ? GL_EXP2 : GL_LINEAR);
    glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);

    glPixelStorei(GL_PACK_ALIGNMENT, 1);
    glPixelStorei(GL_UNPACK_ALIGNMENT, 1);

    if (glinfo.depthclamp)
        glEnable(GL_DEPTH_CLAMP);

    //glHint(GL_LINE_SMOOTH_HINT, GL_NICEST);
    //glEnable(GL_LINE_SMOOTH);

    if (r_useindexedcolortextures == -1)
    {
        //POGO: r_useindexedcolortextures has never been set, so force it to be enabled
        gltexfiltermode = 0;
    }

#ifdef USE_GLEXT
    if (glmultisample > 0 && glinfo.multisample)
    {
        if (glinfo.nvmultisamplehint)
            glHint(GL_MULTISAMPLE_FILTER_HINT_NV, glnvmultisamplehint ? GL_NICEST:GL_FASTEST);
        glEnable(GL_MULTISAMPLE);
    }

    if (r_persistentStreamBuffer && ((!glinfo.bufferstorage) || (!glinfo.sync)))
    {
        OSD_Printf("Your OpenGL implementation doesn't support the required extensions for persistent stream buffers. Disabling...\n");
        r_persistentStreamBuffer = 0;
    }
#endif

    //POGOTODO: require a max texture size >= 2048

    persistentStreamBuffer = r_persistentStreamBuffer;
    drawpolyVertsBufferLength = r_drawpolyVertsBufferLength;

    drawpolyVertsOffset = 0;
    drawpolyVertsSubBufferIndex = 0;

    GLuint ids[2];
    glGenBuffers(2, ids);
    drawpolyVertsID = ids[0];
    glBindBuffer(GL_ARRAY_BUFFER, drawpolyVertsID);
    if (persistentStreamBuffer)
    {
        // reset the sync objects, as old ones we had from any last GL context are gone now
        Bmemset(drawpolyVertsSync, 0, sizeof(drawpolyVertsSync));

        GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
        // we want to triple-buffer to avoid having to wait for the buffer to become available again,
        // so triple the buffer size we expect to use
        glBufferStorage(GL_ARRAY_BUFFER, 3*drawpolyVertsBufferLength*sizeof(float)*5, NULL, flags);
        drawpolyVerts = (float*) glMapBufferRange(GL_ARRAY_BUFFER, 0, 3*drawpolyVertsBufferLength*sizeof(float)*5, flags);
    }
    else
    {
        drawpolyVerts = defaultDrawpolyVertsArray;
        glBufferData(GL_ARRAY_BUFFER, drawpolyVertsBufferLength*sizeof(float)*5, NULL, GL_STREAM_DRAW);
    }
    glBindBuffer(GL_ARRAY_BUFFER, 0);

    currentTextureID = 0;

    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &tilesheetSize);
#ifdef _MSC_VER
    if (tilesheetSize > 8192)
        tilesheetSize = 8192;
#endif
    tilesheetHalfTexelSize = { 0.5f/tilesheetSize, 0.5f/tilesheetSize };
    vec2_t maxTexDimensions = { tilesheetSize, tilesheetSize };
    char allPacked = false;
    static int numTilesheets = 0;
    //POGO: only pack the tilesheets once
    if (numTilesheets == 0)
    {
        // add a blank texture for tileUID 0
        tilepacker_addTile(0, 2, 2);
        for (int picnum = 0; picnum < MAXTILES; ++picnum)
        {
            tilepacker_addTile(picnum+1, (uint32_t) tilesiz[picnum].y, (uint32_t) tilesiz[picnum].x);
        }

        do
        {
            tilepacker_initTilesheet(numTilesheets, tilesheetSize, tilesheetSize);
            allPacked = tilepacker_pack(numTilesheets);
            ++numTilesheets;
        } while (!allPacked && numTilesheets < MAXTILESHEETS);
    }
    for (int i = 0; i < numTilesheets; ++i)
    {
        glGenTextures(1, tilesheetTexIDs+i);
        glBindTexture(GL_TEXTURE_2D, tilesheetTexIDs[i]);
        uploadtextureindexed(true, {0, 0}, maxTexDimensions, (intptr_t) NULL);
    }

    const char blankTex[] = {255, 255,
                             255, 255};
    Tile blankTile;
    tilepacker_getTile(0, &blankTile);
    glBindTexture(GL_TEXTURE_2D, tilesheetTexIDs[blankTile.tilesheetID]);
    uploadtextureindexed(false, {(int32_t) blankTile.rect.u, (int32_t) blankTile.rect.v}, {2, 2}, (intptr_t) blankTex);

    quadVertsID = ids[1];
    glBindBuffer(GL_ARRAY_BUFFER, quadVertsID);
    const float quadVerts[] =
        {
            -0.5f, 1.0f, 0.0f, 0.0f, 1.0f, //top-left
            -0.5f, 0.0f, 0.0f, 0.0f, 0.0f, //bottom-left
             0.5f, 1.0f, 0.0f, 1.0f, 1.0f, //top-right
             0.5f, 0.0f, 0.0f, 1.0f, 0.0f  //bottom-right
        };
    glBufferData(GL_ARRAY_BUFFER, sizeof(quadVerts), quadVerts, GL_STATIC_DRAW);

    //specify format/arrangement for vertex positions:
    glVertexAttribPointer(0, 3, GL_FLOAT, false, sizeof(float) * 5, 0);
    //specify format/arrangement for vertex texture coords:
    glVertexAttribPointer(1, 2, GL_FLOAT, false, sizeof(float) * 5, (const void*) (sizeof(float) * 3));

    glBindBuffer(GL_ARRAY_BUFFER, 0);

    const char* const POLYMOST2_BASIC_VERTEX_SHADER_CODE =
        "#version 110\n\
        \n\
        // input\n\
        attribute vec3 i_vertPos;\n\
        attribute vec2 i_texCoord;\n\
        uniform mat4 u_mvMatrix;\n\
        uniform mat4 u_projMatrix;\n\
        uniform vec2 u_texOffset;\n\
        uniform vec2 u_texScale;\n\
        \n\
        // output\n\
        varying vec2 v_texCoord;\n\
        varying float v_distance;\n\
        \n\
        void main()\n\
        {\n\
           vec4 eyeCoordPosition = u_mvMatrix * vec4(i_vertPos, 1.0);\n\
           gl_Position = u_projMatrix * eyeCoordPosition;\n\
           \n\
           eyeCoordPosition.xyz /= eyeCoordPosition.w;\n\
           \n\
           v_texCoord = i_texCoord * u_texScale + u_texOffset;\n\
           v_distance = eyeCoordPosition.z;\n\
        }\n"
;
    const char* const POLYMOST2_BASIC_FRAGMENT_SHADER_CODE =
        "#version 110\n\
        \n\
        varying vec2 v_texCoord;\n\
        uniform sampler2D s_texture;\n\
        uniform sampler2D s_fullBright;\n\
        \n\
        uniform vec4 u_tint;\n\
        uniform float u_alpha;\n\
        \n\
        varying float v_distance;\n\
        uniform vec2 u_fogRange;\n\
        uniform vec4 u_fogColor;\n\
        \n\
        const float c_zero = 0.0;\n\
        const float c_one  = 1.0;\n\
        \n\
        void main()\n\
        {\n\
            vec4 color = texture2D(s_texture, v_texCoord);\n\
            vec4 fullBrightColor = texture2D(s_fullBright, v_texCoord);\n\
            \n\
            float fogFactor = clamp((u_fogRange.y-v_distance)/(u_fogRange.y-u_fogRange.x), c_zero, c_one);\n\
            \n\
            color.rgb = mix(u_fogColor.rgb, color.rgb, fogFactor);\n\
            color.rgb *= u_tint.rgb * u_tint.a * color.a;\n\
            color.rgb = mix(color.rgb, fullBrightColor.rgb, fullBrightColor.a);\n\
            \n\
            color.a *= u_alpha;\n\
            \n\
            gl_FragColor = color;\n\
        }\n"
;

    polymost2BasicShaderProgramID = glCreateProgram();
    GLuint polymost2BasicVertexShaderID = polymost2_compileShader(GL_VERTEX_SHADER, POLYMOST2_BASIC_VERTEX_SHADER_CODE);
    GLuint polymost2BasicFragmentShaderID = polymost2_compileShader(GL_FRAGMENT_SHADER, POLYMOST2_BASIC_FRAGMENT_SHADER_CODE);
    glBindAttribLocation(polymost2BasicShaderProgramID, 0, "i_vertPos");
    glBindAttribLocation(polymost2BasicShaderProgramID, 1, "i_texCoord");
    glAttachShader(polymost2BasicShaderProgramID, polymost2BasicVertexShaderID);
    glAttachShader(polymost2BasicShaderProgramID, polymost2BasicFragmentShaderID);
    glLinkProgram(polymost2BasicShaderProgramID);

    // Get the attribute/uniform locations
    texSamplerLoc = glGetUniformLocation(polymost2BasicShaderProgramID, "s_texture");
    fullBrightSamplerLoc = glGetUniformLocation(polymost2BasicShaderProgramID, "s_fullBright");
    projMatrixLoc = glGetUniformLocation(polymost2BasicShaderProgramID, "u_projMatrix");
    mvMatrixLoc = glGetUniformLocation(polymost2BasicShaderProgramID, "u_mvMatrix");
    texOffsetLoc = glGetUniformLocation(polymost2BasicShaderProgramID, "u_texOffset");
    texScaleLoc = glGetUniformLocation(polymost2BasicShaderProgramID, "u_texScale");
    tintLoc = glGetUniformLocation(polymost2BasicShaderProgramID, "u_tint");
    alphaLoc = glGetUniformLocation(polymost2BasicShaderProgramID, "u_alpha");
    fogRangeLoc = glGetUniformLocation(polymost2BasicShaderProgramID, "u_fogRange");
    fogColorLoc = glGetUniformLocation(polymost2BasicShaderProgramID, "u_fogColor");

    polymost1ExtendedShaderProgramID = glCreateProgram();
    GLuint polymost1BasicVertexShaderID = polymost2_compileShader(GL_VERTEX_SHADER, polymost1Vert);
    GLuint polymost1ExtendedFragmentShaderID = polymost2_compileShader(GL_FRAGMENT_SHADER, polymost1Frag);
    glAttachShader(polymost1ExtendedShaderProgramID, polymost1BasicVertexShaderID);
    glAttachShader(polymost1ExtendedShaderProgramID, polymost1ExtendedFragmentShaderID);
    glLinkProgram(polymost1ExtendedShaderProgramID);

    int polymost1BasicFragLen = strlen(polymost1Frag);
    char* polymost1BasicFrag = (char*) malloc(polymost1BasicFragLen);
    memcpy(polymost1BasicFrag, polymost1Frag, polymost1BasicFragLen);
    char* extDefineSubstr = strstr(polymost1BasicFrag, " #define POLYMOST1_EXTENDED");
    if (extDefineSubstr)
    {
        //Disable extensions for basic fragment shader
        extDefineSubstr[0] = '/';
        extDefineSubstr[1] = '/';
    }
    polymost1BasicShaderProgramID = glCreateProgram();
    GLuint polymost1BasicFragmentShaderID = polymost2_compileShader(GL_FRAGMENT_SHADER, polymost1BasicFrag, polymost1BasicFragLen);
    glAttachShader(polymost1BasicShaderProgramID, polymost1BasicVertexShaderID);
    glAttachShader(polymost1BasicShaderProgramID, polymost1BasicFragmentShaderID);
    glLinkProgram(polymost1BasicShaderProgramID);
    free(polymost1BasicFrag);
    polymost1BasicFrag = 0;

    // set defaults
    polymost_setCurrentShaderProgram(polymost1ExtendedShaderProgramID);
    glUniform1i(polymost1TexSamplerLoc, 0);
    glUniform1i(polymost1PalSwapSamplerLoc, 1);
    glUniform1i(polymost1PaletteSamplerLoc, 2);
    glUniform1i(polymost1DetailSamplerLoc, 3);
    glUniform1i(polymost1GlowSamplerLoc, 4);
    polymost_setPalswapSize(256, numshades+1);
    polymost_setCurrentShaderProgram(polymost1BasicShaderProgramID);
    glUniform1i(polymost1TexSamplerLoc, 0);
    glUniform1i(polymost1PalSwapSamplerLoc, 1);
    glUniform1i(polymost1PaletteSamplerLoc, 2);
    useShaderProgram(0);

    lastbasepal = -1;
    for (int basepalnum = 0; basepalnum < MAXBASEPALS; ++basepalnum)
    {
        paletteTextureIDs[basepalnum] = 0;
        uploadbasepalette(basepalnum);
    }
    palswapTextureID = 0;
    for (int palookupnum = 0; palookupnum < MAXPALOOKUPS; ++palookupnum)
    {
        uploadpalswap(palookupnum);
    }

    glEnableClientState(GL_VERTEX_ARRAY);
    glEnableClientState(GL_TEXTURE_COORD_ARRAY);

    polymost_resetVertexPointers();

    texcache_init();
    texcache_loadoffsets();
    texcache_openfiles();

    texcache_setupmemcache();
    texcache_checkgarbage();

#if defined EDUKE32_GLES
    Polymost_DetermineTextureFormatSupport();
#endif
}

void polymost_init()
{
    lastbasepal = -1;
    polymost_resetVertexPointers();
}

////////// VISIBILITY FOG ROUTINES //////////

// only for r_usenewshading < 2 (not preferred)
static void fogcalc_old(int32_t shade, int32_t vis)
{
    float f;

    if (r_usenewshading == 1)
    {
        f = 0.9f * shade;
        f = (vis > 239) ? (float)(gvisibility * (vis - 240 + f)) :
                          (float)(gvisibility * (vis + 16 + f));
    }
    else
    {
        f = (shade < 0) ? shade * 3.5f : shade * .66f;
        f = (vis > 239) ? (float)(gvisibility * ((vis - 240 + f) / (klabs(vis - 256)))) :
                          (float)(gvisibility * (vis + 16 + f));
    }

    fogresult = clamp(f, 0.001f, 100.0f);
}

// For GL_LINEAR fog:
#define FOGDISTCONST 600
#define FULLVIS_BEGIN 2.9e30f
#define FULLVIS_END 3.0e30f

static inline void fogcalc(int32_t shade, int32_t vis, int32_t pal)
{
    fogcol = fogtable[pal];

    if (r_usenewshading < 2)
    {
        fogcalc_old(shade, vis);
        return;
    }

    float combvis = (float) globalvisibility * (uint8_t) (vis+16);

    if (combvis == 0.f)
    {
        if (r_usenewshading == 2 && shade > 0)
        {
            // beg = -D*shade, end = D*(NUMSHADES-1-shade)
            //  => end/beg = -(NUMSHADES-1-shade)/shade
            fogresult = -FULLVIS_BEGIN;
            fogresult2 = FULLVIS_BEGIN * (float)(numshades-1-shade) / shade;
        }
        else
        {
            fogresult  = FULLVIS_BEGIN;
            fogresult2 = FULLVIS_END;
        }
    }
    else if (r_usenewshading == 3 && shade >= numshades-1)
    {
        fogresult = -1;
        fogresult2 = 0;
    }
    else
    {
        combvis = 1.f/combvis;
        fogresult = (r_usenewshading == 3 && shade > 0) ? 0.f : -(FOGDISTCONST * shade) * combvis;
        fogresult2 = (FOGDISTCONST * (numshades-1-shade)) * combvis;
    }
}

#define GL_FOG_MAX 1.0e37f

void polymost2_calc_fog(int32_t shade, int32_t vis, int32_t pal)
{
    if (nofog) return;

    fogcol = fogtable[pal];

    if (((uint8_t)(vis + 16)) > 0 && g_visibility > 0)
    {
        constexpr GLfloat glfogconstant = 262144.f;
        GLfloat fogrange = (frealmaxshade * glfogconstant) / (((uint8_t)(vis + 16)) * globalvisibility);

        fogresult = 0.f - (((min(shade, 0) - 0.5f) / frealmaxshade) * fogrange); // min() = subtract shades from fog
        fogresult2 = fogrange - (((shade - 0.5f) / frealmaxshade) * fogrange);
    }
    else
    {
        fogresult = 0.f;
        fogresult2 = -GL_FOG_MAX; // hide fog behind the camera
    }
}

void calc_and_apply_fog(int32_t shade, int32_t vis, int32_t pal)
{
    if (nofog) return;

    if (r_usenewshading == 4)
    {
        fogresult = 0.f;
        fogcol = fogtable[pal];

        if (((uint8_t)(vis + 16)) > 0 && globalvisibility > 0)
        {
            constexpr GLfloat glfogconstant = 262144.f;
            GLfloat fogrange = (frealmaxshade * glfogconstant) / (((uint8_t)(vis + 16)) * globalvisibility);

            fogresult = 0.f - (((min(shade, 0) - 0.5f) / frealmaxshade) * fogrange); // min() = subtract shades from fog
            fogresult2 = fogrange - (((shade - 0.5f) / frealmaxshade) * fogrange);
        }
        else
        {
            fogresult = 0.f;
            fogresult2 = -GL_FOG_MAX; // hide fog behind the camera
        }

        glFogf(GL_FOG_START, fogresult);
        glFogf(GL_FOG_END, fogresult2);
        glFogfv(GL_FOG_COLOR, (GLfloat *)&fogcol);

        return;
    }

    fogcalc(shade, vis, pal);
    glFogfv(GL_FOG_COLOR, (GLfloat *)&fogcol);

    if (r_usenewshading < 2)
        glFogf(GL_FOG_DENSITY, fogresult);
    else
    {
        glFogf(GL_FOG_START, fogresult);
        glFogf(GL_FOG_END, fogresult2);
    }
}

void calc_and_apply_fog_factor(int32_t shade, int32_t vis, int32_t pal, float factor)
{
    if (nofog) return;

    if (r_usenewshading == 4)
    {
        fogcol = fogtable[pal];

        if (((uint8_t)(vis + 16)) > 0 && ((((uint8_t)(vis + 16)) / 8.f) + shade) > 0)
        {
            GLfloat normalizedshade = (shade - 0.5f) / frealmaxshade;
            GLfloat fogrange = (((uint8_t)(vis + 16)) / (8.f * frealmaxshade)) + normalizedshade;

            // subtract shades from fog
            if (normalizedshade > 0.f && normalizedshade < 1.f)
                fogrange = (fogrange - normalizedshade) / (1.f - normalizedshade);

            fogresult = -(GL_FOG_MAX * fogrange);
            fogresult2 = GL_FOG_MAX - (GL_FOG_MAX * fogrange);
        }
        else
        {
            fogresult = 0.f;
            fogresult2 = -GL_FOG_MAX; // hide fog behind the camera
        }

        glFogf(GL_FOG_START, fogresult);
        glFogf(GL_FOG_END, fogresult2);
        glFogfv(GL_FOG_COLOR, (GLfloat *)&fogcol);

        return;
    }

    // NOTE: for r_usenewshading >= 2, the fog beginning/ending distance results are
    // unused.
    fogcalc(shade, vis, pal);
    glFogfv(GL_FOG_COLOR, (GLfloat *)&fogcol);

    if (r_usenewshading < 2)
        glFogf(GL_FOG_DENSITY, fogresult*factor);
    else
    {
        glFogf(GL_FOG_START, (GLfloat) FULLVIS_BEGIN);
        glFogf(GL_FOG_END, (GLfloat) FULLVIS_END);
    }
}
////////////////////


static float get_projhack_ratio(void)
{
    if (glprojectionhacks && !r_yshearing)
    {
        float const projhack_zoom = 1.4f *
        // adjust for the FOV, increasing the FOV reduces the zenith glitch
        // don't apply if the zenith is cut from the viewing area
        (65536.f / fviewingrange) *
        (float)(windowxy2.y-windowxy1.y+1) /
        (float)(windowxy2.x-windowxy1.x+1) *
        (float)(xdim)/(float)(ydim);
        if (projhack_zoom < 1.f)
            return 1.f;
        static constexpr float const maxcoshoriz = 0.540971179375801f; // 128/sqrt(128^2+199^2) = cos of an horiz diff of 199
        float const factor = (projhack_zoom - 1.f) * (1.f / maxcoshoriz);
        return 1.f + (factor * (1.f - Bfabsf(gchang)));
    }

    // No projection hacks (legacy or new-aspect)
    return 1.f;
}

static void resizeglcheck(void)
{
#ifndef EDUKE32_GLES
    //FUK
    if (lastglpolygonmode != r_polygonmode)
    {
        lastglpolygonmode = r_polygonmode;
        switch (r_polygonmode)
        {
        default:
        case 0:
            glPolygonMode(GL_FRONT_AND_BACK,GL_FILL); break;
        case 1:
            glPolygonMode(GL_FRONT_AND_BACK,GL_LINE); break;
        case 2:
            glPolygonMode(GL_FRONT_AND_BACK,GL_POINT); break;
        }
    }
    if (r_polygonmode) //FUK
    {
        glClearColor(1.0,1.0,1.0,0.0);
        glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);
    }
#else
    glPolygonMode(GL_FRONT_AND_BACK,GL_FILL);
#endif

    if ((glox1 != windowxy1.x) || (gloy1 != windowxy1.y) || (glox2 != windowxy2.x) || (gloy2 != windowxy2.y) || (gloxyaspect != gxyaspect) || (gloyxscale != gyxscale) || (glohoriz2 != ghoriz2) || (glotang != gtang))
    {
        const int32_t ourxdimen = (windowxy2.x-windowxy1.x+1);
        float ratio = get_projhack_ratio();
        const int32_t fovcorrect = (int32_t)(ourxdimen*ratio - ourxdimen);

        ratio = 1.f/ratio;

        glox1 = (float)windowxy1.x; gloy1 = (float)windowxy1.y;
        glox2 = (float)windowxy2.x; gloy2 = (float)windowxy2.y;

        glViewport(windowxy1.x-(fovcorrect/2), ydim-(windowxy2.y+1),
                    ourxdimen+fovcorrect, windowxy2.y-windowxy1.y+1);

        glMatrixMode(GL_PROJECTION);

        float m[4][4];
        Bmemset(m,0,sizeof(m));

        float const nearclip = 4.0f / (gxyaspect * gyxscale * 1024.f);
        float const farclip = 64.f;

        gloxyaspect = gxyaspect;
        gloyxscale = gyxscale;
        glohoriz2 = ghoriz2;
        glotang = gtang;

        m[0][0] = 1.f;
        m[1][1] = fxdimen / (fydimen * ratio);
        m[2][0] = 2.f * ghoriz2 * gstang / fxdimen;
        m[2][1] = 2.f * ghoriz2 * gctang / fydimen;
        m[2][2] = (farclip + nearclip) / (farclip - nearclip);
        m[2][3] = 1.f;
        m[3][2] = -(2.f * farclip * nearclip) / (farclip - nearclip);
        glLoadMatrixf(&m[0][0]);

        glMatrixMode(GL_MODELVIEW);
        glLoadIdentity();

        if (!nofog) polymost_setFogEnabled(true);
    }
}

static void fixtransparency(coltype *dapic, vec2_t dasiz, vec2_t dasiz2, int32_t dameth)
{
    if (!(dameth & DAMETH_MASKPROPS))
        return;

    vec2_t doxy = { dasiz2.x-1, dasiz2.y-1 };

    if (dameth & DAMETH_CLAMPED)
        doxy = { min(doxy.x, dasiz.x), min(doxy.y, dasiz.y) };
    else  dasiz = dasiz2; //Make repeating textures duplicate top/left parts

    dasiz.x--; dasiz.y--; //Hacks for optimization inside loop
    int32_t const naxsiz2 = -dasiz2.x;

    //Set transparent pixels to average color of neighboring opaque pixels
    //Doing this makes bilinear filtering look much better for masked textures (I.E. sprites)
    for (bssize_t y=doxy.y; y>=0; y--)
    {
        coltype * wpptr = &dapic[y*dasiz2.x+doxy.x];

        for (bssize_t x=doxy.x; x>=0; x--,wpptr--)
        {
            if (wpptr->a) continue;

            int r = 0, g = 0, b = 0, j = 0;

            if ((x>     0) && (wpptr[     -1].a)) { r += wpptr[     -1].r; g += wpptr[     -1].g; b += wpptr[     -1].b; j++; }
            if ((x<dasiz.x) && (wpptr[     +1].a)) { r += wpptr[     +1].r; g += wpptr[     +1].g; b += wpptr[     +1].b; j++; }
            if ((y>     0) && (wpptr[naxsiz2].a)) { r += wpptr[naxsiz2].r; g += wpptr[naxsiz2].g; b += wpptr[naxsiz2].b; j++; }
            if ((y<dasiz.y) && (wpptr[dasiz2.x].a)) { r += wpptr[dasiz2.x].r; g += wpptr[dasiz2.x].g; b += wpptr[dasiz2.x].b; j++; }

            switch (j)
            {
            case 1:
                wpptr->r =   r            ; wpptr->g =   g            ; wpptr->b =   b            ; break;
            case 2:
                wpptr->r = ((r   +  1)>>1); wpptr->g = ((g   +  1)>>1); wpptr->b = ((b   +  1)>>1); break;
            case 3:
                wpptr->r = ((r*85+128)>>8); wpptr->g = ((g*85+128)>>8); wpptr->b = ((b*85+128)>>8); break;
            case 4:
                wpptr->r = ((r   +  2)>>2); wpptr->g = ((g   +  2)>>2); wpptr->b = ((b   +  2)>>2); break;
            }
        }
    }
}

#if defined EDUKE32_GLES
// sorted first in increasing order of size, then in decreasing order of quality
static int32_t const texfmts_rgb_mask[] = { GL_RGB5_A1, GL_RGBA, 0 };
static int32_t const texfmts_rgb[] = { GL_RGB565, GL_RGB5_A1, GL_RGB, GL_RGBA, 0 };
static int32_t const texfmts_rgba[] = { GL_RGBA4, GL_RGBA, 0 } ;

static int32_t texfmt_rgb_mask;
static int32_t texfmt_rgb;
static int32_t texfmt_rgba;

#if defined EDUKE32_IOS
static int32_t const comprtexfmts_rgb[] = { GL_ETC1_RGB8_OES, 0 };
static int32_t const comprtexfmts_rgba[] = { 0 };
static int32_t const comprtexfmts_rgb_mask[] = { 0 };
#else
static int32_t const comprtexfmts_rgb[] =
{
#ifdef GL_COMPRESSED_RGB8_ETC2
    GL_COMPRESSED_RGB8_ETC2,
#endif
#ifdef GL_ETC1_RGB8_OES
    GL_ETC1_RGB8_OES,
#endif
    0
    };
// TODO: waiting on etcpak support for ETC2 with alpha
static int32_t const comprtexfmts_rgba[] = { /*GL_COMPRESSED_RGBA8_ETC2_EAC,*/ 0 };
static int32_t const comprtexfmts_rgb_mask[] = { /*GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2,*/ 0 };
#endif

static int32_t comprtexfmt_rgb_mask;
static int32_t comprtexfmt_rgb;
static int32_t comprtexfmt_rgba;

# ifdef __cplusplus
extern "C" {
# endif
extern uint64_t ProcessRGB(uint8_t const *);
extern uint64_t ProcessRGB_ETC2(uint8_t const *);
# ifdef __cplusplus
}
# endif

typedef uint64_t (*ETCFunction_t)(uint8_t const *);

static ETCFunction_t Polymost_PickETCFunction(int32_t const comprtexfmt)
{
    switch (comprtexfmt)
    {
# ifdef GL_ETC1_RGB8_OES
        case GL_ETC1_RGB8_OES:
            return ProcessRGB;
# endif

# ifdef GL_COMPRESSED_RGB8_ETC2
        case GL_COMPRESSED_RGB8_ETC2:
            return ProcessRGB_ETC2;
# endif

# if 0
        case GL_COMPRESSED_RGBA8_ETC2_EAC:
            fallthrough__;
        case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
            fallthrough__;
# endif

        default:
            EDUKE32_UNREACHABLE_SECTION(return NULL);
    }
}

static int Polymost_ConfirmNoGLError(void)
{
    GLenum checkerr, err = GL_NO_ERROR;
    while ((checkerr = glGetError()) != GL_NO_ERROR)
        err = checkerr;

    return err == GL_NO_ERROR;
}

static int32_t Polymost_TryDummyTexture(coltype const * const pic, int32_t const * formats)
{
    while (*formats)
    {
        glTexImage2D(GL_TEXTURE_2D, 0, *formats, 4,4, 0, GL_RGBA, GL_UNSIGNED_BYTE, pic);

        if (Polymost_ConfirmNoGLError())
            return *formats;

        ++formats;
    }

    initputs("No texture formats supported?!\n");

    return 0;
}

static int32_t Polymost_TryCompressedDummyTexture(coltype const * const pic, int32_t const * formats)
{
    while (*formats)
    {
        ETCFunction_t func = Polymost_PickETCFunction(*formats);
        uint64_t const comprpic = func((uint8_t const *)pic);
        jwzgles_glCompressedTexImage2D(GL_TEXTURE_2D, 0, *formats, 4,4, 0, sizeof(uint64_t), &comprpic);

        if (Polymost_ConfirmNoGLError())
            return *formats;

        ++formats;
    }

    return 0;
}

static void Polymost_DetermineTextureFormatSupport(void)
{
    // init dummy texture to trigger possible failure of all compression modes
    coltype pic[4*4] = { { 0, 0, 0, 0 } };
    GLuint tex = 0;

    glGenTextures(1, &tex);
    glBindTexture(GL_TEXTURE_2D, tex);

    BuildGLErrorCheck(); // XXX: Clear errors.

    texfmt_rgb = Polymost_TryDummyTexture(pic, texfmts_rgb);
    texfmt_rgba = Polymost_TryDummyTexture(pic, texfmts_rgba);
    texfmt_rgb_mask = Polymost_TryDummyTexture(pic, texfmts_rgb_mask);

    comprtexfmt_rgb = Polymost_TryCompressedDummyTexture(pic, comprtexfmts_rgb);
    comprtexfmt_rgba = Polymost_TryCompressedDummyTexture(pic, comprtexfmts_rgba);
    comprtexfmt_rgb_mask = Polymost_TryCompressedDummyTexture(pic, comprtexfmts_rgb_mask);

    glDeleteTextures(1, &tex);
}
#endif

static void Polymost_SendTexToDriver(int32_t const doalloc,
                                     vec2_t const siz,
                                     int32_t const texfmt,
                                     coltype const * const pic,
                                     int32_t const intexfmt,
#if defined EDUKE32_GLES
                                     int32_t const comprtexfmt,
                                     int32_t const texcompress_ok,
#endif
                                     int32_t const level)
{
#if defined EDUKE32_GLES
    if (texcompress_ok && comprtexfmt && (siz.x & 3) == 0 && (siz.y & 3) == 0)
    {
        size_t const picLength = siz.x * siz.y;
        size_t const fourRows = siz.x << 2u;
        GLsizei const imageSize = picLength >> 1u; // 4x4 pixels --> 8 bytes
        uint8_t * const comprpic = (uint8_t *)Xaligned_alloc(8, imageSize);

        ETCFunction_t func = Polymost_PickETCFunction(comprtexfmt);

        coltype buf[4*4];
        uint64_t * out = (uint64_t *)comprpic;
        for (coltype const * row = pic, * const pic_end = pic + picLength; row < pic_end; row += fourRows)
            for (coltype const * block = row, * const row_end = row + siz.x; block < row_end; block += 4)
            {
                buf[0] = block[0];
                buf[1] = block[siz.x];
                buf[2] = block[siz.x*2];
                buf[3] = block[siz.x*3];
                buf[4] = block[1];
                buf[5] = block[siz.x+1];
                buf[6] = block[siz.x*2+1];
                buf[7] = block[siz.x*3+1];
                buf[8] = block[2];
                buf[9] = block[siz.x+2];
                buf[10] = block[siz.x*2+2];
                buf[11] = block[siz.x*3+2];
                buf[12] = block[3];
                buf[13] = block[siz.x+3];
                buf[14] = block[siz.x*2+3];
                buf[15] = block[siz.x*3+3];

                *out++ = func((uint8_t const *)buf);
            }

        if (doalloc & 1)
            jwzgles_glCompressedTexImage2D(GL_TEXTURE_2D, level, comprtexfmt, siz.x,siz.y, 0, imageSize, comprpic);
        else
            jwzgles_glCompressedTexSubImage2D(GL_TEXTURE_2D, level, 0,0, siz.x,siz.y, comprtexfmt, imageSize, comprpic);

        Xaligned_free(comprpic);

        return;
    }
#endif

#if B_BIG_ENDIAN
    GLenum type = GL_UNSIGNED_INT_8_8_8_8;
#else
    GLenum type = GL_UNSIGNED_INT_8_8_8_8_REV;
#endif
    if (doalloc & 1)
        glTexImage2D(GL_TEXTURE_2D, level, intexfmt, siz.x,siz.y, 0, texfmt, type, pic);
    else
        glTexSubImage2D(GL_TEXTURE_2D, level, 0,0, siz.x,siz.y, texfmt, type, pic);
}

void uploadtexture(int32_t doalloc, vec2_t siz, int32_t texfmt,
                   coltype *pic, vec2_t tsiz, int32_t dameth)
{
    const int artimmunity = !!(dameth & DAMETH_ARTIMMUNITY);
    const int hi = !!(dameth & DAMETH_HI);
    const int nodownsize = !!(dameth & DAMETH_NODOWNSIZE) || artimmunity;
    const int nomiptransfix  = !!(dameth & DAMETH_NOFIX);
    const int texcompress_ok = !(dameth & DAMETH_NOTEXCOMPRESS) && (glusetexcompr == 2 || (glusetexcompr && !artimmunity));

#if !defined EDUKE32_GLES
    int32_t intexfmt;
    if (texcompress_ok && glinfo.texcompr)
        intexfmt = GL_COMPRESSED_RGBA;
    else
        intexfmt = GL_RGBA8;
#else
    const int hasalpha  = !!(dameth & (DAMETH_HASALPHA|DAMETH_ONEBITALPHA));
    const int onebitalpha  = !!(dameth & DAMETH_ONEBITALPHA);

    int32_t const intexfmt = hasalpha ? (onebitalpha ? texfmt_rgb_mask : texfmt_rgba) : texfmt_rgb;
    int32_t const comprtexfmt = hasalpha ? (onebitalpha ? comprtexfmt_rgb_mask : comprtexfmt_rgba) : comprtexfmt_rgb;
#endif

    dameth &= ~DAMETH_UPLOADTEXTURE_MASK;

    if (gltexmaxsize <= 0)
    {
        GLint i = 0;
        glGetIntegerv(GL_MAX_TEXTURE_SIZE, &i);
        if (!i) gltexmaxsize = 6;   // 2^6 = 64 == default GL max texture size
        else
        {
            gltexmaxsize = 0;
            for (; i>1; i>>=1) gltexmaxsize++;
#ifdef EDUKE32_GLES
            while ((1<<(gltexmaxsize-1)) > xdim)
                gltexmaxsize--;
#endif
        }
    }

    gltexmiplevel = max(0, min(gltexmaxsize-1, gltexmiplevel));

    int miplevel = gltexmiplevel;

    while ((siz.x >> miplevel) > (1 << gltexmaxsize) || (siz.y >> miplevel) > (1 << gltexmaxsize))
        miplevel++;

    if (hi && !nodownsize && r_downsize > miplevel)
        miplevel = r_downsize;

    // don't use mipmaps if mipmapping is disabled
    //POGO: until the texcacheheader can be updated, generate the mipmaps texcache expects if it's enabled
    if (!glusetexcache &&
        (glfiltermodes[gltexfiltermode].min == GL_NEAREST ||
         glfiltermodes[gltexfiltermode].min == GL_LINEAR))
    {
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
    }

    if (!miplevel)
        Polymost_SendTexToDriver(doalloc, siz, texfmt, pic,
                                 intexfmt,
#if defined EDUKE32_GLES
                                 comprtexfmt,
                                 texcompress_ok,
#endif
                                 0);

    // don't generate mipmaps if we're not going to use them
    if (!glusetexcache &&
        (glfiltermodes[gltexfiltermode].min == GL_NEAREST ||
         glfiltermodes[gltexfiltermode].min == GL_LINEAR))
    {
        return;
    }

    vec2_t siz2 = siz;

    for (bssize_t j=1; (siz2.x > 1) || (siz2.y > 1); j++)
    {
        vec2_t const siz3 = { max(1, siz2.x >> 1), max(1, siz2.y >> 1) };  // this came from the GL_ARB_texture_non_power_of_two spec
        //x3 = ((x2+1)>>1); y3 = ((y2+1)>>1);

        for (bssize_t y=0; y<siz3.y; y++)
        {
            coltype *wpptr = &pic[y*siz3.x];
            coltype const *rpptr = &pic[(y<<1)*siz2.x];

            for (bssize_t x=0; x<siz3.x; x++,wpptr++,rpptr+=2)
            {
                int32_t r=0, g=0, b=0, a=0, k=0;

                if (rpptr[0].a)                  { r += rpptr[0].r; g += rpptr[0].g; b += rpptr[0].b; a += rpptr[0].a; k++; }
                if ((x+x+1 < siz2.x) && (rpptr[1].a)) { r += rpptr[1].r; g += rpptr[1].g; b += rpptr[1].b; a += rpptr[1].a; k++; }
                if (y+y+1 < siz2.y)
                {
                    if ((rpptr[siz2.x].a)) { r += rpptr[siz2.x  ].r; g += rpptr[siz2.x  ].g; b += rpptr[siz2.x  ].b; a += rpptr[siz2.x  ].a; k++; }
                    if ((x+x+1 < siz2.x) && (rpptr[siz2.x+1].a)) { r += rpptr[siz2.x+1].r; g += rpptr[siz2.x+1].g; b += rpptr[siz2.x+1].b; a += rpptr[siz2.x+1].a; k++; }
                }
                switch (k)
                {
                case 0:
                case 1:
                    wpptr->r = r; wpptr->g = g; wpptr->b = b; wpptr->a = a; break;
                case 2:
                    wpptr->r = ((r+1)>>1); wpptr->g = ((g+1)>>1); wpptr->b = ((b+1)>>1); wpptr->a = ((a+1)>>1); break;
                case 3:
                    wpptr->r = ((r*85+128)>>8); wpptr->g = ((g*85+128)>>8); wpptr->b = ((b*85+128)>>8); wpptr->a = ((a*85+128)>>8); break;
                case 4:
                    wpptr->r = ((r+2)>>2); wpptr->g = ((g+2)>>2); wpptr->b = ((b+2)>>2); wpptr->a = ((a+2)>>2); break;
                default:
                    EDUKE32_UNREACHABLE_SECTION(break);
                }
                //if (wpptr->a) wpptr->a = 255;
            }
        }

        if (!nomiptransfix)
        {
            vec2_t const tsizzle = { (tsiz.x + (1 << j)-1) >> j, (tsiz.y + (1 << j)-1) >> j };

            fixtransparency(pic, tsizzle, siz3, dameth);
        }

        if (j >= miplevel)
            Polymost_SendTexToDriver(doalloc, siz3, texfmt, pic,
                                     intexfmt,
#if defined EDUKE32_GLES
                                     comprtexfmt,
                                     texcompress_ok,
#endif
                                     j - miplevel);

        siz2 = siz3;
    }
}

void uploadtextureindexed(int32_t doalloc, vec2_t offset, vec2_t siz, intptr_t tile)
{
    if (doalloc & 1)
    {
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1);

        glTexImage2D(GL_TEXTURE_2D, 0, GL_RED, siz.y, siz.x, 0, GL_RED, GL_UNSIGNED_BYTE, (void*) tile);
    }
    else
    {
        glTexSubImage2D(GL_TEXTURE_2D, 0, offset.x, offset.y, siz.y, siz.x, GL_RED, GL_UNSIGNED_BYTE, (void*) tile);
    }
}

void uploadbasepalette(int32_t basepalnum)
{
    if (!polymost1BasicShaderProgramID)
    {
        //POGO: if we haven't initialized properly yet, we shouldn't be uploading base palettes
        return;
    }
    if (!basepaltable[basepalnum])
    {
        return;
    }

    //POGO: this is only necessary for GL fog/vertex color shade compatibility, since those features don't index into shade tables
    uint8_t basepalWFullBrightInfo[4*256];
    for (int i = 0; i < 256; ++i)
    {
        basepalWFullBrightInfo[i*4] = basepaltable[basepalnum][i*3];
        basepalWFullBrightInfo[i*4+1] = basepaltable[basepalnum][i*3+1];
        basepalWFullBrightInfo[i*4+2] = basepaltable[basepalnum][i*3+2];
        basepalWFullBrightInfo[i*4+3] = 0-(IsPaletteIndexFullbright(i) != 0);
    }

    char allocateTexture = !paletteTextureIDs[basepalnum];
    if (allocateTexture)
    {
        glGenTextures(1, &paletteTextureIDs[basepalnum]);
    }
    glBindTexture(GL_TEXTURE_2D, paletteTextureIDs[basepalnum]);
    if (allocateTexture)
    {
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 256, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, basepalWFullBrightInfo);
    }
    else
    {
        glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 1, GL_RGBA, GL_UNSIGNED_BYTE, basepalWFullBrightInfo);
    }
}

void uploadpalswap(int32_t palookupnum)
{
    if (!polymost1BasicShaderProgramID)
    {
        //POGO: if we haven't initialized properly yet, we shouldn't be uploading palette swap tables
        return;
    }
    if (!palookup[palookupnum])
    {
        return;
    }

    char allocateTexture = !palswapTextureID;
    if (allocateTexture)
    {
        glGenTextures(1, &palswapTextureID);
    }
    glBindTexture(GL_TEXTURE_2D, palswapTextureID);
    if (allocateTexture)
    {
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, 1);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RED, PALSWAP_TEXTURE_SIZE, PALSWAP_TEXTURE_SIZE, 0, GL_RED, GL_UNSIGNED_BYTE, NULL);
    }

    int32_t column = palookupnum%(PALSWAP_TEXTURE_SIZE/256);
    int32_t row = palookupnum/(PALSWAP_TEXTURE_SIZE/256);
    int32_t rowOffset = (numshades+1)*row;
    if (rowOffset > PALSWAP_TEXTURE_SIZE)
    {
        OSD_Printf("Polymost: palswaps are too large for palswap tilesheet!\n");
        return;
    }
    glTexSubImage2D(GL_TEXTURE_2D, 0, 256*column, rowOffset, 256, numshades+1, GL_RED, GL_UNSIGNED_BYTE, palookup[palookupnum]);
}


#if 0
// TODO: make configurable
static int32_t tile_is_sky(int32_t tilenum)
{
    return return (tilenum >= 78 /*CLOUDYOCEAN*/ && tilenum <= 99 /*REDSKY2*/);
}
# define clamp_if_tile_is_sky(x, y) (tile_is_sky(x) ? (y) : GL_REPEAT)
#else
# define clamp_if_tile_is_sky(x, y) (GL_REPEAT)
#endif

static void polymost_setuptexture(const int32_t dameth, int filter)
{
    const GLuint clamp_mode = glinfo.clamptoedge ? GL_CLAMP_TO_EDGE : GL_CLAMP;

    gltexfiltermode = clamp(gltexfiltermode, 0, NUMGLFILTERMODES-1);

    if (filter == -1)
        filter = gltexfiltermode;

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, glfiltermodes[filter].mag);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, glfiltermodes[filter].min);

#ifdef USE_GLEXT
    if (glinfo.maxanisotropy > 1.f)
    {
        uint32_t i = (unsigned)Blrintf(glinfo.maxanisotropy);

        if ((unsigned)glanisotropy > i)
            glanisotropy = i;

        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, glanisotropy);
    }
#endif

    if (!(dameth & DAMETH_CLAMPED))
    {
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, clamp_if_tile_is_sky(dapic, clamp_mode));
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
    }
    else
    {
        // For sprite textures, clamping looks better than wrapping
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, clamp_mode);
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, clamp_mode);
    }
}

static void gloadtile_art_indexed(int32_t dapic, int32_t dameth, pthtyp *pth, int32_t doalloc)
{
    vec2_16_t const & tsizart = tilesiz[dapic];
    vec2_t siz = { tsizart.x, tsizart.y };
    //POGOTODO: npoty
    char npoty = 0;

    //POGOTODO: if !glinfo.texnpot, then we could allocate a texture of the pow2 size, and then populate the subportion using buffersubdata func
    //if (!glinfo.texnpot)

    Tile tile = {};
    if (waloff[dapic])
    {
        char tileIsPacked = tilepacker_getTile(dapic+1, &tile);
        if (tileIsPacked &&
            tile.rect.width == (uint32_t) tsizart.y &&
            tile.rect.height == (uint32_t) tsizart.x)
        {
            pth->glpic = tilesheetTexIDs[tile.tilesheetID];
            doalloc = false;
        }
        else if (doalloc)
        {
            glGenTextures(1, (GLuint *)&pth->glpic);
        }
        glBindTexture(GL_TEXTURE_2D, pth->glpic);

        if (doalloc)
        {
            const GLuint clamp_mode = glinfo.clamptoedge ? GL_CLAMP_TO_EDGE : GL_CLAMP;
            if (!(dameth & DAMETH_CLAMPED))
            {
                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, clamp_if_tile_is_sky(dapic, clamp_mode));
                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
            }
            else
            {
                // For sprite textures, clamping looks better than wrapping
                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, clamp_mode);
                glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, clamp_mode);
            }
        }
        uploadtextureindexed(doalloc, {(int32_t) tile.rect.u, (int32_t) tile.rect.v}, siz, waloff[dapic]);
    }
    else
    {
        tilepacker_getTile(0, &tile);
        pth->glpic = tilesheetTexIDs[tile.tilesheetID];
    }

    pth->picnum = dapic;
    pth->palnum = 0;
    pth->shade = 0;
    pth->effects = 0;
    pth->flags = TO_PTH_CLAMPED(dameth) | TO_PTH_NOTRANSFIX(dameth) | (PTH_HASALPHA|PTH_ONEBITALPHA) | (npoty*PTH_NPOTWALL) | PTH_INDEXED;
    pth->hicr = NULL;
}

void gloadtile_art(int32_t dapic, int32_t dapal, int32_t tintpalnum, int32_t dashade, int32_t dameth, pthtyp *pth, int32_t doalloc)
{
    if (dameth & PTH_INDEXED)
    {
        return gloadtile_art_indexed(dapic, dameth, pth, doalloc);
    }

    static int32_t fullbrightloadingpass = 0;
    vec2_16_t const & tsizart = tilesiz[dapic];
    vec2_t siz = { 0, 0 }, tsiz = { tsizart.x, tsizart.y };
    int const picdim = tsiz.x*tsiz.y;
    char hasalpha = 0, hasfullbright = 0;
    char npoty = 0;

    texcacheheader cachead;
    char texcacheid[BMAX_PATH];
    {
        // Absolutely disgusting.
        uint32_t firstint = 0;
        if (waloff[dapic])
            Bmemcpy(&firstint, (void *)waloff[dapic], min(4, picdim));
        sprintf(texcacheid, "%08x", firstint);
    }
    texcache_calcid(texcacheid, texcacheid, picdim | ((unsigned)dapal<<24u), DAMETH_NARROW_MASKPROPS(dameth) | ((unsigned)dapic<<8u) | ((unsigned)dashade<<24u), tintpalnum);
    int32_t gotcache = texcache_readtexheader(texcacheid, &cachead, 0);

    if (gotcache && !texcache_loadtile(&cachead, &doalloc, pth))
    {
        hasalpha = !!(cachead.flags & CACHEAD_HASALPHA);
        hasfullbright = !!(cachead.flags & CACHEAD_HASFULLBRIGHT);
        npoty = !!(cachead.flags & CACHEAD_NPOTWALL);
    }
    else
    {
        if (!glinfo.texnpot)
        {
            for (siz.x = 1; siz.x < tsiz.x; siz.x += siz.x) { }
            for (siz.y = 1; siz.y < tsiz.y; siz.y += siz.y) { }
        }
        else
        {
            if ((tsiz.x|tsiz.y) == 0)
                siz.x = siz.y = 1;
            else
                siz = tsiz;
        }

        coltype *pic = (coltype *)Xmalloc(siz.x*siz.y*sizeof(coltype));

        if (!waloff[dapic])
        {
            //Force invalid textures to draw something - an almost purely transparency texture
            //This allows the Z-buffer to be updated for mirrors (which are invalidated textures)
            pic[0].r = pic[0].g = pic[0].b = 0; pic[0].a = 1;
            tsiz.x = tsiz.y = 1; hasalpha = 1;
        }
        else
        {
            const int dofullbright = !(picanm[dapic].sf & PICANM_NOFULLBRIGHT_BIT) && !(globalflags & GLOBAL_NO_GL_FULLBRIGHT);

            for (bssize_t y = 0; y < siz.y; y++)
            {
                coltype *wpptr = &pic[y * siz.x];
                int32_t y2 = (y < tsiz.y) ? y : y - tsiz.y;

                for (bssize_t x = 0; x < siz.x; x++, wpptr++)
                {
                    int32_t dacol;
                    int32_t x2 = (x < tsiz.x) ? x : x-tsiz.x;

                    if ((dameth & DAMETH_CLAMPED) && (x >= tsiz.x || y >= tsiz.y)) //Clamp texture
                    {
                        wpptr->r = wpptr->g = wpptr->b = wpptr->a = 0;
                        continue;
                    }

                    dacol = *(char *)(waloff[dapic]+x2*tsiz.y+y2);

                    if (dacol == 255)
                    {
                        wpptr->a = 0;
                        hasalpha = 1;
                    }
                    else
                        wpptr->a = 255;

                    char *p = (char *)(palookup[dapal])+(int32_t)(dashade<<8);
                    dacol = (uint8_t)p[dacol];

                    if (!fullbrightloadingpass)
                    {
                        // regular texture
                        if (IsPaletteIndexFullbright(dacol) && dofullbright)
                            hasfullbright = 1;
                    }
                    else
                    {
                        // texture with only fullbright areas
                        if (!IsPaletteIndexFullbright(dacol))    // regular colors
                        {
                            wpptr->a = 0;
                            hasalpha = 1;
                        }
                    }

                    bricolor((palette_t *)wpptr, dacol);

                    if (!fullbrightloadingpass && tintpalnum >= 0)
                    {
                        polytint_t const & tint = hictinting[tintpalnum];
                        polytintflags_t const effect = tint.f;
                        uint8_t const r = tint.r;
                        uint8_t const g = tint.g;
                        uint8_t const b = tint.b;

                        if (effect & HICTINT_GRAYSCALE)
                        {
                            wpptr->g = wpptr->r = wpptr->b = (uint8_t) ((wpptr->r * GRAYSCALE_COEFF_RED) +
                                                                  (wpptr->g * GRAYSCALE_COEFF_GREEN) +
                                                                  (wpptr->b * GRAYSCALE_COEFF_BLUE));
                        }

                        if (effect & HICTINT_INVERT)
                        {
                            wpptr->b = 255 - wpptr->b;
                            wpptr->g = 255 - wpptr->g;
                            wpptr->r = 255 - wpptr->r;
                        }

                        if (effect & HICTINT_COLORIZE)
                        {
                            wpptr->b = min((int32_t)((wpptr->b) * b) >> 6, 255);
                            wpptr->g = min((int32_t)((wpptr->g) * g) >> 6, 255);
                            wpptr->r = min((int32_t)((wpptr->r) * r) >> 6, 255);
                        }

                        switch (effect & HICTINT_BLENDMASK)
                        {
                            case HICTINT_BLEND_SCREEN:
                                wpptr->b = 255 - (((255 - wpptr->b) * (255 - b)) >> 8);
                                wpptr->g = 255 - (((255 - wpptr->g) * (255 - g)) >> 8);
                                wpptr->r = 255 - (((255 - wpptr->r) * (255 - r)) >> 8);
                                break;
                            case HICTINT_BLEND_OVERLAY:
                                wpptr->b = wpptr->b < 128 ? (wpptr->b * b) >> 7 : 255 - (((255 - wpptr->b) * (255 - b)) >> 7);
                                wpptr->g = wpptr->g < 128 ? (wpptr->g * g) >> 7 : 255 - (((255 - wpptr->g) * (255 - g)) >> 7);
                                wpptr->r = wpptr->r < 128 ? (wpptr->r * r) >> 7 : 255 - (((255 - wpptr->r) * (255 - r)) >> 7);
                                break;
                            case HICTINT_BLEND_HARDLIGHT:
                                wpptr->b = b < 128 ? (wpptr->b * b) >> 7 : 255 - (((255 - wpptr->b) * (255 - b)) >> 7);
                                wpptr->g = g < 128 ? (wpptr->g * g) >> 7 : 255 - (((255 - wpptr->g) * (255 - g)) >> 7);
                                wpptr->r = r < 128 ? (wpptr->r * r) >> 7 : 255 - (((255 - wpptr->r) * (255 - r)) >> 7);
                                break;
                        }
                    }

                    //swap r & b so that we deal with the data as BGRA
                    uint8_t tmpR = wpptr->r;
                    wpptr->r = wpptr->b;
                    wpptr->b = tmpR;
                }
            }
        }

        if (doalloc) glGenTextures(1,(GLuint *)&pth->glpic); //# of textures (make OpenGL allocate structure)
        glBindTexture(GL_TEXTURE_2D, pth->glpic);

        fixtransparency(pic,tsiz,siz,dameth);

        if (polymost_want_npotytex(dameth, siz.y) && tsiz.x == siz.x && tsiz.y == siz.y)  // XXX
        {
            const int32_t nextpoty = 1 << ((picsiz[dapic] >> 4) + 1);
            const int32_t ydif = nextpoty - siz.y;
            coltype *paddedpic;

            Bassert(ydif < siz.y);

            paddedpic = (coltype *)Xrealloc(pic, siz.x * nextpoty * sizeof(coltype));

            pic = paddedpic;
            Bmemcpy(&pic[siz.x * siz.y], pic, siz.x * ydif * sizeof(coltype));
            siz.y = tsiz.y = nextpoty;

            npoty = 1;
        }

        uploadtexture(doalloc, siz, GL_BGRA, pic, tsiz,
                      dameth | DAMETH_ARTIMMUNITY |
                      (dapic >= MAXUSERTILES ? (DAMETH_NOTEXCOMPRESS|DAMETH_NODOWNSIZE) : 0) | /* never process these short-lived tiles */
                      (hasfullbright ? DAMETH_HASFULLBRIGHT : 0) |
                      (npoty ? DAMETH_NPOTWALL : 0) |
                      (hasalpha ? (DAMETH_HASALPHA|DAMETH_ONEBITALPHA) : 0));

        Xfree(pic);
    }

    polymost_setuptexture(dameth, -1);

    pth->picnum = dapic;
    pth->palnum = dapal;
    pth->shade = dashade;
    pth->effects = 0;
    pth->flags = TO_PTH_CLAMPED(dameth) | TO_PTH_NOTRANSFIX(dameth) | (hasalpha*(PTH_HASALPHA|PTH_ONEBITALPHA)) | (npoty*PTH_NPOTWALL);
    pth->hicr = NULL;

#if defined USE_GLEXT && !defined EDUKE32_GLES
    if (!gotcache && glinfo.texcompr && glusetexcache && glusetexcompr == 2 && dapic < MAXUSERTILES)
    {
        cachead.quality = 0;
        cachead.xdim = tsiz.x;
        cachead.ydim = tsiz.y;

        cachead.flags = (check_nonpow2(siz.x) || check_nonpow2(siz.y)) * CACHEAD_NONPOW2 |
                        npoty * CACHEAD_NPOTWALL |
                        hasalpha * CACHEAD_HASALPHA | hasfullbright * CACHEAD_HASFULLBRIGHT | CACHEAD_NODOWNSIZE;

        texcache_writetex_fromdriver(texcacheid, &cachead);
    }
#endif

    if (hasfullbright && !fullbrightloadingpass)
    {
        // Load the ONLY texture that'll be assembled with the regular one to
        // make the final texture with fullbright pixels.
        fullbrightloadingpass = 1;

        if (!pth->ofb)
            pth->ofb = (pthtyp *)Xcalloc(1,sizeof(pthtyp));

        pth->flags |= PTH_HASFULLBRIGHT;

        gloadtile_art(dapic, dapal, -1, 0, (dameth & ~DAMETH_MASKPROPS) | DAMETH_MASK, pth->ofb, 1);

        fullbrightloadingpass = 0;
    }
}

int32_t gloadtile_hi(int32_t dapic,int32_t dapalnum, int32_t facen, hicreplctyp *hicr,
                            int32_t dameth, pthtyp *pth, int32_t doalloc, polytintflags_t effect)
{
    if (!hicr) return -1;

    char *fn;

    if (facen > 0)
    {
        if (!hicr->skybox || facen > 6 || !hicr->skybox->face[facen-1])
            return -1;

        fn = hicr->skybox->face[facen-1];
    }
    else
    {
        if (!hicr->filename)
            return -1;

        fn = hicr->filename;
    }

    buildvfs_kfd filh;
    if (EDUKE32_PREDICT_FALSE((filh = kopen4load(fn, 0)) == buildvfs_kfd_invalid))
    {
        OSD_Printf("hightile: %s (pic %d) not found\n", fn, dapic);
        return -2;
    }

    int32_t picfillen = kfilelength(filh);
    kclose(filh);       // FIXME: shouldn't have to do this. bug in cache1d.c

    int32_t startticks = timerGetTicks(), willprint = 0;

    char onebitalpha = 1;
    char hasalpha;
    texcacheheader cachead;
    char texcacheid[BMAX_PATH];
    texcache_calcid(texcacheid, fn, picfillen+(dapalnum<<8), DAMETH_NARROW_MASKPROPS(dameth), effect & HICTINT_IN_MEMORY);
    int32_t gotcache = texcache_readtexheader(texcacheid, &cachead, 0);
    vec2_t siz = { 0, 0 }, tsiz = { 0, 0 };

    if (gotcache && !texcache_loadtile(&cachead, &doalloc, pth))
    {
        tsiz = { cachead.xdim, cachead.ydim };
        hasalpha = !!(cachead.flags & CACHEAD_HASALPHA);
    }
    else
    {
        // CODEDUP: mdloadskin

        int32_t isart = 0;

        gotcache = 0;   // the compressed version will be saved to disk

        int32_t const length = kpzbufload(fn);
        if (length == 0)
            return -1;

        // tsizx/y = replacement texture's natural size
        // xsiz/y = 2^x size of replacement

#ifdef WITHKPLIB
        kpgetdim(kpzbuf,picfillen,&tsiz.x,&tsiz.y);
#endif

        if (tsiz.x == 0 || tsiz.y == 0)
        {
            if (artCheckUnitFileHeader((uint8_t *)kpzbuf, picfillen))
                return -1;

            tsiz = { B_LITTLE16(B_UNBUF16(&kpzbuf[16])), B_LITTLE16(B_UNBUF16(&kpzbuf[18])) };

            if (tsiz.x == 0 || tsiz.y == 0)
                return -1;

            isart = 1;
        }

        pth->siz = tsiz;

        if (!glinfo.texnpot)
        {
            for (siz.x=1; siz.x<tsiz.x; siz.x+=siz.x) { }
            for (siz.y=1; siz.y<tsiz.y; siz.y+=siz.y) { }
        }
        else
            siz = tsiz;

        if (isart)
        {
            if (tsiz.x * tsiz.y + ARTv1_UNITOFFSET > picfillen)
                return -2;
        }

        int32_t const bytesperline = siz.x * sizeof(coltype);
        coltype *pic = (coltype *)Xcalloc(siz.y, bytesperline);

        static coltype *lastpic = NULL;
        static char *lastfn = NULL;
        static int32_t lastsize = 0;

        if (lastpic && lastfn && !Bstrcmp(lastfn,fn))
        {
            willprint=1;
            Bmemcpy(pic, lastpic, siz.x*siz.y*sizeof(coltype));
        }
        else
        {
            if (isart)
            {
                artConvertRGB((palette_t *)pic, (uint8_t *)&kpzbuf[ARTv1_UNITOFFSET], siz.x, tsiz.x, tsiz.y);
            }
#ifdef WITHKPLIB
            else
            {
                if (kprender(kpzbuf,picfillen,(intptr_t)pic,bytesperline,siz.x,siz.y))
                {
                    Xfree(pic);
                    return -2;
                }
            }
#endif

            willprint=2;

            if (hicprecaching)
            {
                lastfn = fn;  // careful...
                if (!lastpic)
                {
                    lastpic = (coltype *)Xmalloc(siz.x*siz.y*sizeof(coltype));
                    lastsize = siz.x*siz.y;
                }
                else if (lastsize < siz.x*siz.y)
                {
                    Xfree(lastpic);
                    lastpic = (coltype *)Xmalloc(siz.x*siz.y*sizeof(coltype));
                }
                if (lastpic)
                    Bmemcpy(lastpic, pic, siz.x*siz.y*sizeof(coltype));
            }
            else if (lastpic)
            {
                DO_FREE_AND_NULL(lastpic);
                lastfn = NULL;
                lastsize = 0;
            }
        }

        char *cptr = britable[gammabrightness ? 0 : curbrightness];

        polytint_t const & tint = hictinting[dapalnum];
        int32_t r = (glinfo.bgra) ? tint.r : tint.b;
        int32_t g = tint.g;
        int32_t b = (glinfo.bgra) ? tint.b : tint.r;

        char al = 255;

        for (bssize_t y = 0, j = 0; y < tsiz.y; ++y, j += siz.x)
        {
            coltype tcol, *rpptr = &pic[j];

            for (bssize_t x = 0; x < tsiz.x; ++x)
            {
                tcol.b = cptr[rpptr[x].b];
                tcol.g = cptr[rpptr[x].g];
                tcol.r = cptr[rpptr[x].r];
                al &= tcol.a = rpptr[x].a;
                onebitalpha &= tcol.a == 0 || tcol.a == 255;

                if (effect & HICTINT_GRAYSCALE)
                {
                    tcol.g = tcol.r = tcol.b = (uint8_t) ((tcol.b * GRAYSCALE_COEFF_RED) +
                                                          (tcol.g * GRAYSCALE_COEFF_GREEN) +
                                                          (tcol.r * GRAYSCALE_COEFF_BLUE));
                }

                if (effect & HICTINT_INVERT)
                {
                    tcol.b = 255 - tcol.b;
                    tcol.g = 255 - tcol.g;
                    tcol.r = 255 - tcol.r;
                }

                if (effect & HICTINT_COLORIZE)
                {
                    tcol.b = min((int32_t)((tcol.b) * r) >> 6, 255);
                    tcol.g = min((int32_t)((tcol.g) * g) >> 6, 255);
                    tcol.r = min((int32_t)((tcol.r) * b) >> 6, 255);
                }

                switch (effect & HICTINT_BLENDMASK)
                {
                    case HICTINT_BLEND_SCREEN:
                        tcol.b = 255 - (((255 - tcol.b) * (255 - r)) >> 8);
                        tcol.g = 255 - (((255 - tcol.g) * (255 - g)) >> 8);
                        tcol.r = 255 - (((255 - tcol.r) * (255 - b)) >> 8);
                        break;
                    case HICTINT_BLEND_OVERLAY:
                        tcol.b = tcol.b < 128 ? (tcol.b * r) >> 7 : 255 - (((255 - tcol.b) * (255 - r)) >> 7);
                        tcol.g = tcol.g < 128 ? (tcol.g * g) >> 7 : 255 - (((255 - tcol.g) * (255 - g)) >> 7);
                        tcol.r = tcol.r < 128 ? (tcol.r * b) >> 7 : 255 - (((255 - tcol.r) * (255 - b)) >> 7);
                        break;
                    case HICTINT_BLEND_HARDLIGHT:
                        tcol.b = r < 128 ? (tcol.b * r) >> 7 : 255 - (((255 - tcol.b) * (255 - r)) >> 7);
                        tcol.g = g < 128 ? (tcol.g * g) >> 7 : 255 - (((255 - tcol.g) * (255 - g)) >> 7);
                        tcol.r = b < 128 ? (tcol.r * b) >> 7 : 255 - (((255 - tcol.r) * (255 - b)) >> 7);
                        break;
                }

                rpptr[x] = tcol;
            }
        }

        hasalpha = (al != 255);
        onebitalpha &= hasalpha;

        if ((!(dameth & DAMETH_CLAMPED)) || facen) //Duplicate texture pixels (wrapping tricks for non power of 2 texture sizes)
        {
            if (siz.x > tsiz.x)  // Copy left to right
            {
                for (int32_t y = 0, *lptr = (int32_t *)pic; y < tsiz.y; y++, lptr += siz.x)
                    Bmemcpy(&lptr[tsiz.x], lptr, (siz.x - tsiz.x) << 2);
            }

            if (siz.y > tsiz.y)  // Copy top to bottom
                Bmemcpy(&pic[siz.x * tsiz.y], pic, (siz.y - tsiz.y) * siz.x << 2);
        }

        if (!glinfo.bgra)
        {
            for (bssize_t i=siz.x*siz.y, j=0; j<i; j++)
                swapchar(&pic[j].r, &pic[j].b);
        }

        // end CODEDUP

        if (tsiz.x>>r_downsize <= tilesiz[dapic].x || tsiz.y>>r_downsize <= tilesiz[dapic].y)
            hicr->flags |= HICR_ARTIMMUNITY;

        if ((doalloc&3)==1)
            glGenTextures(1, &pth->glpic); //# of textures (make OpenGL allocate structure)
        glBindTexture(GL_TEXTURE_2D, pth->glpic);

        fixtransparency(pic,tsiz,siz,dameth);

        int32_t const texfmt = glinfo.bgra ? GL_BGRA : GL_RGBA;

        uploadtexture(doalloc,siz,texfmt,pic,tsiz,
                      dameth | DAMETH_HI | DAMETH_NOFIX |
                      TO_DAMETH_NODOWNSIZE(hicr->flags) |
                      TO_DAMETH_NOTEXCOMPRESS(hicr->flags) |
                      TO_DAMETH_ARTIMMUNITY(hicr->flags) |
                      (onebitalpha ? DAMETH_ONEBITALPHA : 0) |
                      (hasalpha ? DAMETH_HASALPHA : 0));

        Xfree(pic);
    }

    // precalculate scaling parameters for replacement
    if (facen > 0)
        pth->scale = { (float)tsiz.x * (1.0f/64.f), (float)tsiz.y * (1.0f/64.f) };
    else
        pth->scale = { (float)tsiz.x / (float)tilesiz[dapic].x, (float)tsiz.y / (float)tilesiz[dapic].y };

    polymost_setuptexture(dameth, (hicr->flags & HICR_FORCEFILTER) ? TEXFILTER_ON : -1);

    if (tsiz.x>>r_downsize <= tilesiz[dapic].x || tsiz.y>>r_downsize <= tilesiz[dapic].y)
        hicr->flags |= HICR_ARTIMMUNITY;

    pth->picnum = dapic;
    pth->effects = effect;
    pth->flags = TO_PTH_CLAMPED(dameth) | TO_PTH_NOTRANSFIX(dameth) |
                 PTH_HIGHTILE | ((facen>0) * PTH_SKYBOX) |
                 (onebitalpha ? PTH_ONEBITALPHA : 0) |
                 (hasalpha ? PTH_HASALPHA : 0) |
                 ((hicr->flags & HICR_FORCEFILTER) ? PTH_FORCEFILTER : 0);
    pth->skyface = facen;
    pth->hicr = hicr;

#if defined USE_GLEXT && !defined EDUKE32_GLES
    if (!gotcache && glinfo.texcompr && glusetexcache && !(hicr->flags & HICR_NOTEXCOMPRESS) &&
        (glusetexcompr == 2 || (glusetexcompr && !(hicr->flags & HICR_ARTIMMUNITY))))
    {
        const int32_t nonpow2 = check_nonpow2(siz.x) || check_nonpow2(siz.y);

        // save off the compressed version
        cachead.quality = (hicr->flags & (HICR_NODOWNSIZE|HICR_ARTIMMUNITY)) ? 0 : r_downsize;
        cachead.xdim = tsiz.x >> cachead.quality;
        cachead.ydim = tsiz.y >> cachead.quality;

        // handle nodownsize:
        cachead.flags = nonpow2 * CACHEAD_NONPOW2 | (hasalpha ? CACHEAD_HASALPHA : 0) |
                        ((hicr->flags & (HICR_NODOWNSIZE|HICR_ARTIMMUNITY)) ? CACHEAD_NODOWNSIZE : 0);

        ///            OSD_Printf("Caching \"%s\"\n", fn);
        texcache_writetex_fromdriver(texcacheid, &cachead);

        if (willprint)
        {
            int32_t etime = timerGetTicks() - startticks;
            if (etime >= MIN_CACHETIME_PRINT)
                OSD_Printf("Load tile %4d: p%d-m%d-e%d %s... cached... %d ms\n", dapic, dapalnum, dameth, effect,
                           willprint == 2 ? fn : "", etime);
            willprint = 0;
        }
        else
            OSD_Printf("Cached \"%s\"\n", fn);
    }
#endif

    if (willprint)
    {
        int32_t etime = timerGetTicks()-startticks;
        if (etime>=MIN_CACHETIME_PRINT)
            OSD_Printf("Load tile %4d: p%d-m%d-e%d %s... %d ms\n", dapic, dapalnum, dameth, effect,
                       willprint==2 ? fn : "", etime);
    }

    return 0;
}

#ifdef USE_GLEXT
void polymost_setupdetailtexture(const int32_t texunits, const int32_t tex)
{
    glActiveTexture(texunits);

    glBindTexture(GL_TEXTURE_2D, tex);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);

    glClientActiveTexture(texunits);
    glEnableClientState(GL_TEXTURE_COORD_ARRAY);
}

void polymost_setupglowtexture(const int32_t texunits, const int32_t tex)
{
    glActiveTexture(texunits);

    glBindTexture(GL_TEXTURE_2D, tex);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);

    glClientActiveTexture(texunits);
    glEnableClientState(GL_TEXTURE_COORD_ARRAY);
}
#endif


//(dpx,dpy) specifies an n-sided polygon. The polygon must be a convex clockwise loop.
//    n must be <= 8 (assume clipping can double number of vertices)
//method: 0:solid, 1:masked(255 is transparent), 2:transluscent #1, 3:transluscent #2
//    +4 means it's a sprite, so wraparound isn't needed

// drawpoly's hack globals
static int32_t pow2xsplit = 0, skyclamphack = 0, skyzbufferhack = 0;
static float drawpoly_alpha = 0.f;
static uint8_t drawpoly_blend = 0;

static inline pthtyp *our_texcache_fetch(int32_t dameth)
{
    if (r_usenewshading == 4)
        return texcache_fetch(globalpicnum, globalpal, getpalookup(!(globalflags & GLOBAL_NO_GL_TILESHADES), globalshade), dameth);

    return texcache_fetch(globalpicnum, globalpal, getpalookup(!(globalflags & GLOBAL_NO_GL_TILESHADES) ? globvis>>3 : 0, globalshade), dameth);
}

int32_t polymost_maskWallHasTranslucency(uwalltype const * const wall)
{
    if (wall->cstat & CSTAT_WALL_TRANSLUCENT)
        return true;

    //POGO: only hightiles may have translucency in their texture
    if (!usehightile)
        return false;

    uint8_t pal = wall->pal;
    if (palookup[pal] == NULL)
        pal = 0;

    pthtyp* pth = texcache_fetch(wall->picnum, pal, 0, DAMETH_MASK | DAMETH_WALL);
    return pth && (pth->flags & PTH_HASALPHA) && !(pth->flags & PTH_ONEBITALPHA);
}

int32_t polymost_spriteHasTranslucency(uspritetype const * const tspr)
{
    if ((tspr->cstat & (CSTAT_SPRITE_TRANSLUCENT | CSTAT_SPRITE_RESERVED1)) ||
        ((unsigned)tspr->owner < MAXSPRITES && spriteext[tspr->owner].alpha))
        return true;

    //POGO: only hightiles may have translucency in their texture
    if (!usehightile)
        return false;

    uint8_t pal = tspr->shade;
    if (palookup[pal] == NULL)
        pal = 0;

    pthtyp* pth = texcache_fetch(tspr->picnum, pal, 0, DAMETH_MASK | DAMETH_CLAMPED);
    return pth && (pth->flags & PTH_HASALPHA) && !(pth->flags & PTH_ONEBITALPHA);
}

static void polymost2_drawVBO(GLenum mode,
                              int32_t vertexBufferID,
                              int32_t indexBufferID,
                              const int32_t numElements,
                              float projectionMatrix[4*4],
                              float modelViewMatrix[4*4],
                              int32_t dameth,
                              float texScale[2],
                              float texOffset[2],
                              char cullFaces)
{
    if (dameth == DAMETH_BACKFACECULL ||
    #ifdef YAX_ENABLE
        g_nodraw ||
    #endif
        (uint32_t)globalpicnum >= MAXTILES)
    {
        return;
    }

    glDisableClientState(GL_VERTEX_ARRAY);
    glDisableClientState(GL_TEXTURE_COORD_ARRAY);

    if (cullFaces)
    {
        glEnable(GL_CULL_FACE);
    }
    //POGOTODO: this is temporary, the permanent fix is to not allow the transform to affect the windings in the first place in polymost2_drawSprite()
    if (cullFaces == 1)
    {
        glCullFace(GL_BACK);
    }
    else
    {
        glCullFace(GL_FRONT);
    }

    //POGOTODO: in the future, state changes like binding these buffers can be batched.  For now, just switch on every VBO rendered
    glBindBuffer(GL_ARRAY_BUFFER, vertexBufferID);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indexBufferID);

    glEnableVertexAttribArray(0);
    glEnableVertexAttribArray(1);

    if (palookup[globalpal] == NULL)
    {
        globalpal = 0;
    }

    //Load texture (globalpicnum)
    setgotpic(globalpicnum);
    if (!waloff[globalpicnum])
    {
        tileLoad(globalpicnum);
    }

    pthtyp *pth = our_texcache_fetch(dameth | (r_useindexedcolortextures ? PTH_INDEXED : 0));

    if (!pth)
    {
        if (editstatus)
        {
            Bsprintf(ptempbuf, "pth==NULL! (bad pal?) pic=%d pal=%d", globalpicnum, globalpal);
            polymost_printtext256(8,8, editorcolors[15],editorcolors[5], ptempbuf, 0);
        }
        return;
    }

    glActiveTexture(GL_TEXTURE1);
    //POGO: temporarily swapped out blankTextureID for 0 (as the blank texture has been moved into the dynamic tilesheets)
    glBindTexture(GL_TEXTURE_2D, (pth && pth->flags & PTH_HASFULLBRIGHT && r_fullbrights) ? pth->ofb->glpic : 0);
    glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT);
    glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT);

    glActiveTexture(GL_TEXTURE0);
    polymost_bindPth(pth);
    glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT);
    glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT);

    //POGOTODO: handle tinting & shading completely with fragment shader
    //POGOTODO: handle fullbright & glow completely with fragment shader

    //POGOTODO: glAlphaFunc is deprecated, move this into the fragment shader
    float const al = waloff[globalpicnum] ? alphahackarray[globalpicnum] != 0 ? alphahackarray[globalpicnum] * (1.f/255.f):
                             (pth && pth->hicr && pth->hicr->alphacut >= 0.f ? pth->hicr->alphacut : 0.f) : 0.f;
    glAlphaFunc(GL_GREATER, al);
    //POGOTODO: batch this, only apply it to sprites that actually need blending
    glEnable(GL_BLEND);
    glEnable(GL_ALPHA_TEST);

    handle_blend((dameth & DAMETH_MASKPROPS) > DAMETH_MASK, drawpoly_blend, (dameth & DAMETH_MASKPROPS) == DAMETH_TRANS2);

    useShaderProgram(polymost2BasicShaderProgramID);

    //POGOTODO: batch uniform binding
    float tint[4] = {1.0f, 1.0f, 1.0f, 1.0f};
    polytint_t const & polytint = hictinting[globalpal];
    //POGOTODO: full bright pass uses its own globalshade...
    tint[0] = (1.f-(polytint.sr*(1.f/255.f)))*getshadefactor(globalshade)+(polytint.sr*(1.f/255.f));
    tint[1] = (1.f-(polytint.sg*(1.f/255.f)))*getshadefactor(globalshade)+(polytint.sg*(1.f/255.f));
    tint[2] = (1.f-(polytint.sb*(1.f/255.f)))*getshadefactor(globalshade)+(polytint.sb*(1.f/255.f));

    // spriteext full alpha control
    float alpha = float_trans(dameth & DAMETH_MASKPROPS, drawpoly_blend) * (1.f - drawpoly_alpha);

    if (pth)
    {
        // tinting
        polytintflags_t const tintflags = hictinting[globalpal].f;
        if (!(tintflags & HICTINT_PRECOMPUTED))
        {
            if (pth->flags & PTH_HIGHTILE)
            {
                if (pth->palnum != globalpal || (pth->effects & HICTINT_IN_MEMORY) || (tintflags & HICTINT_APPLYOVERALTPAL))
                    hictinting_apply(tint, globalpal);
            }
            else if (tintflags & (HICTINT_USEONART|HICTINT_ALWAYSUSEART))
                hictinting_apply(tint, globalpal);
        }

        // global tinting
        if ((pth->flags & PTH_HIGHTILE) && have_basepal_tint())
            hictinting_apply(tint, MAXPALOOKUPS-1);
    }

    glUniformMatrix4fv(projMatrixLoc, 1, false, projectionMatrix);
    glUniformMatrix4fv(mvMatrixLoc, 1, false, modelViewMatrix);
    glUniform1i(texSamplerLoc, 0);
    glUniform1i(fullBrightSamplerLoc, 1);
    glUniform2fv(texOffsetLoc, 1, texOffset);
    glUniform2fv(texScaleLoc, 1, texScale);
    glUniform4fv(tintLoc, 1, tint);
    glUniform1f(alphaLoc, alpha);
    const float fogRange[2] = {fogresult, fogresult2};
    glUniform2fv(fogRangeLoc, 1, fogRange);
    glUniform4fv(fogColorLoc, 1, (GLfloat*) &fogcol);

    if (indexBufferID == 0)
    {
        glDrawArrays(mode,
                     0,
                     numElements);
    }
    else
    {
        glDrawElements(mode,
                       numElements,
                       GL_UNSIGNED_SHORT,
                       0);
    }

    glDisableVertexAttribArray(0);
    glDisableVertexAttribArray(1);

    //POGOTODO: again, these state changes should be batched in the future, rather than on each VBO rendered
    glBindBuffer(GL_ARRAY_BUFFER, 0);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);

    glDisable(GL_CULL_FACE);

    glEnableClientState(GL_VERTEX_ARRAY);
    glEnableClientState(GL_TEXTURE_COORD_ARRAY);

    //polymost_resetVertexPointers();
}

void polymost_updatePalette()
{
    if (videoGetRenderMode() != REND_POLYMOST)
        return;

    polymost_setPalswap(globalpal);
    polymost_setShade(globalshade);

    //POGO: only bind the base pal once when it's swapped
    if (curbasepal != lastbasepal)
    {
        glActiveTexture(GL_TEXTURE2);
        glBindTexture(GL_TEXTURE_2D, paletteTextureIDs[curbasepal]);
        lastbasepal = curbasepal;
        glActiveTexture(GL_TEXTURE0);
    }
}

static void polymost_lockSubBuffer(uint32_t subBufferIndex)
{
    if (drawpolyVertsSync[subBufferIndex])
    {
        glDeleteSync(drawpolyVertsSync[subBufferIndex]);
    }

    drawpolyVertsSync[subBufferIndex] = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
}

static void polymost_waitForSubBuffer(uint32_t subBufferIndex)
{
    if (drawpolyVertsSync[subBufferIndex])
    {
        while (true)
        {
            // we only need to flush if there's a possibility that drawpolyVertsBufferLength is
            // so small that we can eat through 3 times the buffer size in a single frame
            GLenum waitResult = glClientWaitSync(drawpolyVertsSync[subBufferIndex], GL_SYNC_FLUSH_COMMANDS_BIT, 500000);
            if (waitResult == GL_ALREADY_SIGNALED ||
                waitResult == GL_CONDITION_SATISFIED)
            {
                return;
            }
            if (waitResult == GL_WAIT_FAILED)
            {
                OSD_Printf("polymost_waitForSubBuffer: Wait failed! Error 0x%X. Disabling r_persistentStreamBuffer.\n", glGetError());
                r_persistentStreamBuffer = 0;
                videoResetMode();
                if (videoSetGameMode(fullscreen,xres,yres,bpp,upscalefactor))
                {
                    OSD_Printf("polymost_waitForSubBuffer: Video reset failed.  Please ensure r_persistentStreamBuffer = 0 and try restarting the game.\n");
                    Bexit(1);
                }
                return;
            }

            static char loggedLongWait = false;
            if (waitResult == GL_TIMEOUT_EXPIRED &&
                !loggedLongWait)
            {
                OSD_Printf("polymost_waitForSubBuffer(): Had to wait for the drawpoly buffer to become available.  For performance, try increasing buffer size with r_drawpolyVertsBufferLength.\n");
                loggedLongWait = true;
            }
        }
    }
}

static void polymost_updaterotmat(void)
{
    if (currentShaderProgramID == polymost1CurrentShaderProgramID)
    {
        float matrix[16] = {
            1.f, 0.f, 0.f, 0.f,
            0.f, 1.f, 0.f, 0.f,
            0.f, 0.f, 1.f, 0.f,
            0.f, 0.f, 0.f, 1.f,
        };
#if !SOFTROTMAT
        //Up/down rotation
        float udmatrix[16] = {
            1.f, 0.f, 0.f, 0.f,
            0.f, gchang, -gshang, 0.f,
            0.f, gshang, gchang, 0.f,
            0.f, 0.f, 0.f, 1.f,
        };
        // Tilt rotation
        float tiltmatrix[16] = {
            gctang, -gstang, 0.f, 0.f,
            gstang, gctang, 0.f, 0.f,
            0.f, 0.f, 1.f, 0.f,
            0.f, 0.f, 0.f, 1.f,
        };
        multiplyMatrix4f(matrix, udmatrix);
        multiplyMatrix4f(matrix, tiltmatrix);
#endif
        Bmemcpy(polymost1RotMatrix, matrix, sizeof(matrix));
        glUniformMatrix4fv(polymost1RotMatrixLoc, 1, false, polymost1RotMatrix);
    }
}

static void polymost_identityrotmat(void)
{
    if (currentShaderProgramID == polymost1CurrentShaderProgramID)
    {
        float matrix[16] = {
            1.f, 0.f, 0.f, 0.f,
            0.f, 1.f, 0.f, 0.f,
            0.f, 0.f, 1.f, 0.f,
            0.f, 0.f, 0.f, 1.f,
        };
        Bmemcpy(polymost1RotMatrix, matrix, sizeof(matrix));
        glUniformMatrix4fv(polymost1RotMatrixLoc, 1, false, polymost1RotMatrix);
    }
}

static void polymost_polyeditorfunc(vec2f_t const * const dpxy, int n)
{
    if (!doeditorcheck)
        return;

    for (int i = 0; i < n; i++)
    {
        float dx1 = dpxy[(i+1)%n].x-dpxy[i].x;
        float dy1 = dpxy[(i+1)%n].y-dpxy[i].y;
        float dx2 = fsearchx-dpxy[i].x;
        float dy2 = fsearchy-dpxy[i].y;
        float cross = dx1*dy2-dx2*dy1;
        if (cross < 0.f)
            return;
    }

    float const z = otex.d + xtex.d * fsearchx + ytex.d * fsearchy;

    if (z > fsearchz)
    {
        searchit = 1;
        searchsector = psectnum;
        searchwall = pwallnum;
        searchbottomwall = pbottomwall;
        searchisbottom = pisbottomwall;
        searchstat = psearchstat;
        fsearchz = z;
    }
}

static void polymost_drawpoly(vec2f_t const * const dpxy, int32_t const n, int32_t method)
{
    if (doeditorcheck && editstatus)
        polymost_polyeditorfunc(dpxy, n);

    if (method == DAMETH_BACKFACECULL ||
#ifdef YAX_ENABLE
        g_nodraw ||
#endif
        (uint32_t)globalpicnum >= MAXTILES)
        return;

    const int32_t method_ = method;

    if (n == 3)
    {
        if ((dpxy[0].x-dpxy[1].x) * (dpxy[2].y-dpxy[1].y) >=
            (dpxy[2].x-dpxy[1].x) * (dpxy[0].y-dpxy[1].y)) return; //for triangle
    }
    else if (n > 3)
    {
        float f = 0; //f is area of polygon / 2

        for (bssize_t i=n-2, j=n-1,k=0; k<n; i=j,j=k,k++)
            f += (dpxy[i].x-dpxy[k].x)*dpxy[j].y;

        if (f <= 0) return;
    }

    if (palookup[globalpal] == NULL)
        globalpal = 0;

    //Load texture (globalpicnum)
    setgotpic(globalpicnum);
    vec2_16_t const & tsizart = tilesiz[globalpicnum];
    vec2_t tsiz = { tsizart.x, tsizart.y };

    if (!waloff[globalpicnum])
    {
        tileLoad(globalpicnum);
    }

    Bassert(n <= MAX_DRAWPOLY_VERTS);

    int j = 0;
    float px[8], py[8], dd[8], uu[8], vv[8];
#if SOFTROTMAT
    float const ozgs = ghalfx * gshang,
                ozgc = ghalfx * gchang;
#endif

    for (bssize_t i=0; i<n; ++i)
    {
#if SOFTROTMAT
        //Up/down rotation
        vec3f_t const orot = { dpxy[i].x - ghalfx,
                              (dpxy[i].y - ghoriz) * gchang - ozgs,
                              (dpxy[i].y - ghoriz) * gshang + ozgc };

        // Tilt rotation
        float const r = ghalfx / orot.z;

        px[j] = ghalfx + (((orot.x * gctang) - (orot.y * gstang)) * r);
        py[j] = ghoriz + (((orot.x * gstang) + (orot.y * gctang)) * r);

        dd[j] = (dpxy[i].x * xtex.d + dpxy[i].y * ytex.d + otex.d) * r;
        uu[j] = (dpxy[i].x * xtex.u + dpxy[i].y * ytex.u + otex.u) * r;
        vv[j] = (dpxy[i].x * xtex.v + dpxy[i].y * ytex.v + otex.v) * r;

        if ((!j) || (px[j] != px[j-1]) || (py[j] != py[j-1]))
            j++;
#else
        px[j] = dpxy[i].x;
        py[j] = dpxy[i].y;

        dd[j] = (dpxy[i].x * xtex.d + dpxy[i].y * ytex.d + otex.d);
        uu[j] = (dpxy[i].x * xtex.u + dpxy[i].y * ytex.u + otex.u);
        vv[j] = (dpxy[i].x * xtex.v + dpxy[i].y * ytex.v + otex.v);
        j++;
#endif
    }

    while ((j >= 3) && (px[j-1] == px[0]) && (py[j-1] == py[0])) j--;

    if (j < 3)
        return;

    int const npoints = j;

    if (skyclamphack) method |= DAMETH_CLAMPED;

    polymost_outputGLDebugMessage(3, "polymost_drawpoly(dpxy:%p, n:%d, method_:%X), method: %X", dpxy, n, method_, method);

    pthtyp *pth = our_texcache_fetch(method | (videoGetRenderMode() == REND_POLYMOST && r_useindexedcolortextures ? PTH_INDEXED : 0));

    if (!pth)
    {
        if (editstatus)
        {
            Bsprintf(ptempbuf, "pth==NULL! (bad pal?) pic=%d pal=%d", globalpicnum, globalpal);
            polymost_printtext256(8,8, editorcolors[15],editorcolors[5], ptempbuf, 0);
        }
        return;
    }

    if (!waloff[globalpicnum])
    {
        tsiz.x = tsiz.y = 1;
        glColorMask(false, false, false, false); //Hack to update Z-buffer for invalid mirror textures
    }

    static int32_t fullbright_pass = 0;

    if (pth->flags & PTH_HASFULLBRIGHT && r_fullbrights)
    {
        if (!fullbright_pass)
            fullbright_pass = 1;
        else if (fullbright_pass == 2)
            pth = pth->ofb;
    }

    Bassert(pth);

    // If we aren't rendmode 3, we're in Polymer, which means this code is
    // used for rotatesprite only. Polymer handles all the material stuff,
    // just submit the geometry and don't mess with textures.
    if (videoGetRenderMode() == REND_POLYMOST)
    {
        polymost_bindPth(pth);

        //POGOTODO: I could move this into bindPth
        if (!(pth->flags & PTH_INDEXED))
            polymost_usePaletteIndexing(false);

        if (drawpoly_srepeat)
            glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_REPEAT);
        if (drawpoly_trepeat)
            glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_REPEAT);
    }

    // texture scale by parkar request
    if (pth->hicr && !drawingskybox && ((pth->hicr->scale.x != 1.0f) || (pth->hicr->scale.y != 1.0f)))
    {
        glMatrixMode(GL_TEXTURE);
        glLoadIdentity();
        glScalef(pth->hicr->scale.x, pth->hicr->scale.y, 1.0f);
        glMatrixMode(GL_MODELVIEW);
    }

#ifdef USE_GLEXT
    int32_t texunits = GL_TEXTURE0;

    if (videoGetRenderMode() == REND_POLYMOST)
    {
        polymost_updatePalette();
        texunits += 4;
    }

    // detail texture
    if (r_detailmapping)
    {
        pthtyp *detailpth = NULL;

        if (usehightile && !drawingskybox && hicfindsubst(globalpicnum, DETAILPAL, 1) &&
            (detailpth = texcache_fetch(globalpicnum, DETAILPAL, 0, method & ~DAMETH_MASKPROPS)) &&
            detailpth->hicr && detailpth->hicr->palnum == DETAILPAL)
        {
            polymost_useDetailMapping(true);
            polymost_setupdetailtexture(videoGetRenderMode() == REND_POLYMOST ? GL_TEXTURE3 : ++texunits, detailpth->glpic);

            glMatrixMode(GL_TEXTURE);
            glLoadIdentity();

            if (pth->hicr && ((pth->hicr->scale.x != 1.0f) || (pth->hicr->scale.y != 1.0f)))
                glScalef(pth->hicr->scale.x, pth->hicr->scale.y, 1.0f);

            if ((detailpth->hicr->scale.x != 1.0f) || (detailpth->hicr->scale.y != 1.0f))
                glScalef(detailpth->hicr->scale.x, detailpth->hicr->scale.y, 1.0f);

            glMatrixMode(GL_MODELVIEW);
            glActiveTexture(GL_TEXTURE0);
        }
    }

    // glow texture
    if (r_glowmapping)
    {
        pthtyp *glowpth = NULL;

        if (usehightile && !drawingskybox && hicfindsubst(globalpicnum, GLOWPAL, 1) &&
            (glowpth = texcache_fetch(globalpicnum, GLOWPAL, 0, (method & ~DAMETH_MASKPROPS) | DAMETH_MASK)) &&
            glowpth->hicr && (glowpth->hicr->palnum == GLOWPAL))
        {
            polymost_useGlowMapping(true);
            polymost_setupglowtexture(videoGetRenderMode() == REND_POLYMOST ? GL_TEXTURE4 : ++texunits, glowpth->glpic);
            glActiveTexture(GL_TEXTURE0);
        }
    }

    if (glinfo.texnpot && r_npotwallmode == 2 && (method & DAMETH_WALL) != 0)
    {
        int32_t size = tilesiz[globalpicnum].y;
        int32_t size2;
        for (size2 = 1; size2 < size; size2 += size2) {}
        if (size == size2)
            polymost_npotEmulation(false, 1.f, 0.f);
        else
        {
            float xOffset = 1.f / tilesiz[globalpicnum].x;
            polymost_npotEmulation(true, (1.f*size2) / size, xOffset);
        }
    }
    else
    {
        polymost_npotEmulation(false, 1.f, 0.f);
    }
#endif

    vec2f_t hacksc = { 1.f, 1.f };

    if (pth->flags & PTH_HIGHTILE)
    {
        hacksc = pth->scale;
        tsiz = pth->siz;
    }

    vec2_t tsiz2 = tsiz;

    if (!glinfo.texnpot)
    {
        for (tsiz2.x = 1; tsiz2.x < tsiz.x; tsiz2.x += tsiz2.x)
            ; /* do nothing */
        for (tsiz2.y = 1; tsiz2.y < tsiz.y; tsiz2.y += tsiz2.y)
            ; /* do nothing */
    }

    static int32_t skyzbufferhack_pass = 0;

    if (method & DAMETH_MASKPROPS || fullbright_pass == 2)
    {
        float const al = alphahackarray[globalpicnum] != 0 ? alphahackarray[globalpicnum] * (1.f/255.f) :
                         (pth->hicr && pth->hicr->alphacut >= 0.f ? pth->hicr->alphacut : 0.f);

        glAlphaFunc(GL_GREATER, al);
        handle_blend((method & DAMETH_MASKPROPS) > DAMETH_MASK, drawpoly_blend, (method & DAMETH_MASKPROPS) == DAMETH_TRANS2);
    }

    float pc[4];

#ifdef POLYMER
    if (videoGetRenderMode() == REND_POLYMER && pr_artmapping && !(globalflags & GLOBAL_NO_GL_TILESHADES) && polymer_eligible_for_artmap(globalpicnum, pth))
        pc[0] = pc[1] = pc[2] = 1.0f;
    else
#endif
    {
        polytint_t const & tint = hictinting[globalpal];
        float shadeFactor = (pth->flags & PTH_INDEXED) &&
                            !(globalflags & GLOBAL_NO_GL_TILESHADES) ? 1.f : getshadefactor(globalshade);
        pc[0] = (1.f-(tint.sr*(1.f/255.f)))*shadeFactor+(tint.sr*(1.f/255.f));
        pc[1] = (1.f-(tint.sg*(1.f/255.f)))*shadeFactor+(tint.sg*(1.f/255.f));
        pc[2] = (1.f-(tint.sb*(1.f/255.f)))*shadeFactor+(tint.sb*(1.f/255.f));
    }

    // spriteext full alpha control
    pc[3] = float_trans(method & DAMETH_MASKPROPS, drawpoly_blend) * (1.f - drawpoly_alpha);

    // tinting
    polytintflags_t const tintflags = hictinting[globalpal].f;
    if (!(tintflags & HICTINT_PRECOMPUTED))
    {
        if (pth->flags & PTH_HIGHTILE)
        {
            if (pth->palnum != globalpal || (pth->effects & HICTINT_IN_MEMORY) || (tintflags & HICTINT_APPLYOVERALTPAL))
                hictinting_apply(pc, globalpal);
        }
        else if (tintflags & (HICTINT_USEONART|HICTINT_ALWAYSUSEART))
            hictinting_apply(pc, globalpal);
    }

    // global tinting
    if ((pth->flags & PTH_HIGHTILE) && have_basepal_tint())
        hictinting_apply(pc, MAXPALOOKUPS-1);

    globaltinting_apply(pc);

    if (skyzbufferhack_pass)
        pc[3] = 0.01f;

    glColor4f(pc[0], pc[1], pc[2], pc[3]);

    //POGOTODO: remove this, replace it with a shader implementation
    //Hack for walls&masked walls which use textures that are not a power of 2
    if ((pow2xsplit) && (tsiz.x != tsiz2.x))
    {
        vec3f_t const opxy[3] = { { py[1] - py[2], py[2] - py[0], py[0] - py[1] },
                                  { px[2] - px[1], px[0] - px[2], px[1] - px[0] },
                                  { px[0] - .5f, py[0] - .5f, 0 } };

        float const r = 1.f / (opxy[0].x*px[0] + opxy[0].y*px[1] + opxy[0].z*px[2]);

        vec3f_t ngx = { (opxy[0].x * dd[0] + opxy[0].y * dd[1] + opxy[0].z * dd[2]) * r,
                        ((opxy[0].x * uu[0] + opxy[0].y * uu[1] + opxy[0].z * uu[2]) * r) * hacksc.x,
                        ((opxy[0].x * vv[0] + opxy[0].y * vv[1] + opxy[0].z * vv[2]) * r) * hacksc.y };

        vec3f_t ngy = { (opxy[1].x * dd[0] + opxy[1].y * dd[1] + opxy[1].z * dd[2]) * r,
                        ((opxy[1].x * uu[0] + opxy[1].y * uu[1] + opxy[1].z * uu[2]) * r) * hacksc.x,
                        ((opxy[1].x * vv[0] + opxy[1].y * vv[1] + opxy[1].z * vv[2]) * r) * hacksc.y };

        vec3f_t ngo = { dd[0] - opxy[2].x * ngx.d - opxy[2].y * ngy.d,
                        (uu[0] - opxy[2].x * ngx.u - opxy[2].y * ngy.u) * hacksc.x,
                        (vv[0] - opxy[2].x * ngx.v - opxy[2].y * ngy.v) * hacksc.y };

        float const uoffs = ((float)(tsiz2.x - tsiz.x) * 0.5f);

        ngx.u -= ngx.d * uoffs;
        ngy.u -= ngy.d * uoffs;
        ngo.u -= ngo.d * uoffs;

        float du0 = 0.f, du1 = 0.f;

        //Find min&max u coordinates (du0...du1)
        for (bssize_t i=0; i<npoints; ++i)
        {
            vec2f_t const o = { px[i], py[i] };
            float const f = (o.x*ngx.u + o.y*ngy.u + ngo.u) / (o.x*ngx.d + o.y*ngy.d + ngo.d);
            if (!i) { du0 = du1 = f; continue; }
            if (f < du0) du0 = f;
            else if (f > du1) du1 = f;
        }

        float const rf = 1.0f / tsiz.x;
        int const ix1 = (int)floorf(du1 * rf);

        for (bssize_t ix0 = (int)floorf(du0 * rf); ix0 <= ix1; ++ix0)
        {
            du0 = (float)(ix0 * tsiz.x);        // + uoffs;
            du1 = (float)((ix0 + 1) * tsiz.x);  // + uoffs;

            float duj = (px[0]*ngx.u + py[0]*ngy.u + ngo.u) / (px[0]*ngx.d + py[0]*ngy.d + ngo.d);
            int i = 0, nn = 0;

            do
            {
                j = i + 1;

                if (j == npoints)
                    j = 0;

                float const dui = duj;

                duj = (px[j]*ngx.u + py[j]*ngy.u + ngo.u) / (px[j]*ngx.d + py[j]*ngy.d + ngo.d);

                if ((du0 <= dui) && (dui <= du1))
                {
                    uu[nn] = px[i];
                    vv[nn] = py[i];
                    nn++;
                }

                //ox*(ngx.u-ngx.d*du1) + oy*(ngy.u-ngdy*du1) + (ngo.u-ngo.d*du1) = 0
                //(px[j]-px[i])*f + px[i] = ox
                //(py[j]-py[i])*f + py[i] = oy

                ///Solve for f
                //((px[j]-px[i])*f + px[i])*(ngx.u-ngx.d*du1) +
                //((py[j]-py[i])*f + py[i])*(ngy.u-ngdy*du1) + (ngo.u-ngo.d*du1) = 0

                //POGOTODO: this could be a static inline function -- the do/while loop should be just a pair of braces
#define DRAWPOLY_MATH_BULLSHIT(XXX)                                                                                \
do                                                                                                                 \
{                                                                                                                  \
    float const f = -(px[i] * (ngx.u - ngx.d * XXX) + py[i] * (ngy.u - ngy.d * XXX) + (ngo.u - ngo.d * XXX)) /     \
        ((px[j] - px[i]) * (ngx.u - ngx.d * XXX) + (py[j] - py[i]) * (ngy.u - ngy.d * XXX));                       \
    uu[nn] = (px[j] - px[i]) * f + px[i];                                                                          \
    vv[nn] = (py[j] - py[i]) * f + py[i];                                                                          \
    ++nn;                                                                                                          \
} while (0)


                if (duj <= dui)
                {
                    if ((du1 < duj) != (du1 < dui)) DRAWPOLY_MATH_BULLSHIT(du1);
                    if ((du0 < duj) != (du0 < dui)) DRAWPOLY_MATH_BULLSHIT(du0);
                }
                else
                {
                    if ((du0 < duj) != (du0 < dui)) DRAWPOLY_MATH_BULLSHIT(du0);
                    if ((du1 < duj) != (du1 < dui)) DRAWPOLY_MATH_BULLSHIT(du1);
                }

#undef DRAWPOLY_MATH_BULLSHIT

                i = j;
            }
            while (i);

            if (nn < 3) continue;

            if (nn+drawpolyVertsOffset > (drawpolyVertsSubBufferIndex+1)*drawpolyVertsBufferLength)
            {
                if (persistentStreamBuffer)
                {
                    // lock this sub buffer
                    polymost_lockSubBuffer(drawpolyVertsSubBufferIndex);
                    drawpolyVertsSubBufferIndex = (drawpolyVertsSubBufferIndex+1)%3;
                    drawpolyVertsOffset = drawpolyVertsSubBufferIndex*drawpolyVertsBufferLength;
                    // wait for the next sub buffer to become available before writing to it
                    // our buffer size should be long enough that no waiting is ever necessary
                    polymost_waitForSubBuffer(drawpolyVertsSubBufferIndex);
                }
                else
                {
                    glBufferData(GL_ARRAY_BUFFER, sizeof(float)*5*drawpolyVertsBufferLength, NULL, GL_STREAM_DRAW);
                    drawpolyVertsOffset = 0;
                }
            }

            vec2f_t const invtsiz2 = { 1.f / tsiz2.x, 1.f / tsiz2.y };
            uint32_t off = persistentStreamBuffer ? drawpolyVertsOffset : 0;
            for (i = 0; i<nn; ++i)
            {
                vec2f_t const o = { uu[i], vv[i] };
                vec3f_t const p = { o.x * ngx.d + o.y * ngy.d + ngo.d,
                                    o.x * ngx.u + o.y * ngy.u + ngo.u,
                                    o.x * ngx.v + o.y * ngy.v + ngo.v };
                float const r = 1.f/p.d;

                //update verts
                drawpolyVerts[(off+i)*5] = (o.x - ghalfx) * r * grhalfxdown10x;
                drawpolyVerts[(off+i)*5+1] = (ghalfy - o.y) * r * grhalfxdown10;
                drawpolyVerts[(off+i)*5+2] = r * (1.f / 1024.f);

                //update texcoords
                drawpolyVerts[(off+i)*5+3] = (p.u * r - du0 + uoffs) * invtsiz2.x;
                drawpolyVerts[(off+i)*5+4] = p.v * r * invtsiz2.y;
            }

            if (!persistentStreamBuffer)
            {
                glBufferSubData(GL_ARRAY_BUFFER, drawpolyVertsOffset*sizeof(float)*5, nn*sizeof(float)*5, drawpolyVerts);
            }
            glDrawArrays(GL_TRIANGLE_FAN, drawpolyVertsOffset, nn);
            drawpolyVertsOffset += nn;
        }
    }
    else
    {
        if (npoints+drawpolyVertsOffset > (drawpolyVertsSubBufferIndex+1)*drawpolyVertsBufferLength)
        {
            if (persistentStreamBuffer)
            {
                // lock this sub buffer
                polymost_lockSubBuffer(drawpolyVertsSubBufferIndex);
                drawpolyVertsSubBufferIndex = (drawpolyVertsSubBufferIndex+1)%3;
                drawpolyVertsOffset = drawpolyVertsSubBufferIndex*drawpolyVertsBufferLength;
                // wait for the next sub buffer to become available before writing to it
                // our buffer size should be long enough that no waiting is ever necessary
                polymost_waitForSubBuffer(drawpolyVertsSubBufferIndex);
            }
            else
            {
                glBufferData(GL_ARRAY_BUFFER, sizeof(float)*5*drawpolyVertsBufferLength, NULL, GL_STREAM_DRAW);
                drawpolyVertsOffset = 0;
            }
        }

        vec2f_t const scale = { 1.f / tsiz2.x * hacksc.x, 1.f / tsiz2.y * hacksc.y };
        uint32_t off = persistentStreamBuffer ? drawpolyVertsOffset : 0;
        for (bssize_t i = 0; i < npoints; ++i)
        {
            float const r = 1.f / dd[i];

            //update verts
            drawpolyVerts[(off+i)*5] = (px[i] - ghalfx) * r * grhalfxdown10x;
            drawpolyVerts[(off+i)*5+1] = (ghalfy - py[i]) * r * grhalfxdown10;
            drawpolyVerts[(off+i)*5+2] = r * (1.f / 1024.f);

            //update texcoords
            drawpolyVerts[(off+i)*5+3] = uu[i] * r * scale.x;
            drawpolyVerts[(off+i)*5+4] = vv[i] * r * scale.y;
        }

        if (!persistentStreamBuffer)
        {
            glBufferSubData(GL_ARRAY_BUFFER, drawpolyVertsOffset*sizeof(float)*5, npoints*sizeof(float)*5, drawpolyVerts);
        }
        glDrawArrays(GL_TRIANGLE_FAN, drawpolyVertsOffset, npoints);
        drawpolyVertsOffset += npoints;
    }

#ifdef USE_GLEXT
    if (videoGetRenderMode() != REND_POLYMOST)
    {
        while (texunits > GL_TEXTURE0)
        {
            glActiveTexture(texunits);
            glMatrixMode(GL_TEXTURE);
            glLoadIdentity();
            glMatrixMode(GL_MODELVIEW);

            glClientActiveTexture(texunits);
            glDisableClientState(GL_TEXTURE_COORD_ARRAY);

            glTexEnvf(GL_TEXTURE_ENV, GL_RGB_SCALE, 1.0f);

            --texunits;
        }
    }

    polymost_useDetailMapping(false);
    polymost_useGlowMapping(false);
    polymost_npotEmulation(false, 1.f, 0.f);
#endif
    if (pth->hicr)
    {
        glMatrixMode(GL_TEXTURE);
        glLoadIdentity();
        glMatrixMode(GL_MODELVIEW);
    }

    if (videoGetRenderMode() != REND_POLYMOST)
    {
        if (!waloff[globalpicnum])
            glColorMask(true, true, true, true);

        return;
    }

    if (!(pth->flags & PTH_INDEXED))
    {
        // restore palette usage if we were just rendering a non-indexed color texture
        polymost_usePaletteIndexing(true);
    }

    int const clamp_mode = glinfo.clamptoedge ? GL_CLAMP_TO_EDGE : GL_CLAMP;

    if (drawpoly_srepeat)
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, clamp_mode);

    if (drawpoly_trepeat)
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, clamp_mode);

    if (fullbright_pass == 1)
    {
        int32_t const shade = globalshade;

        globalshade = -128;
        fullbright_pass = 2;

        polymost_setFogEnabled(false);

        glDepthFunc(GL_EQUAL);

        polymost_drawpoly(dpxy, n, method_);

        glDepthFunc(GL_LEQUAL);

        if (!nofog)
            polymost_setFogEnabled(true);

        globalshade = shade;
        fullbright_pass = 0;
    }

    if (skyzbufferhack && skyzbufferhack_pass == 0)
    {
        vec3d_t const bxtex = xtex, bytex = ytex, botex = otex;
        xtex = xtex2, ytex = ytex2, otex = otex2;
        skyzbufferhack_pass++;
        glColorMask(false, false, false, false);
        polymost_drawpoly(dpxy, n, DAMETH_MASK);
        glColorMask(true, true, true, true);
        xtex = bxtex, ytex = bytex, otex = botex;
        skyzbufferhack_pass--;
    }

    if (!waloff[globalpicnum])
        glColorMask(true, true, true, true);
}


static inline void vsp_finalize_init(int32_t const vcnt)
{
    for (bssize_t i=0; i<vcnt; ++i)
    {
        vsp[i].cy[1] = vsp[i+1].cy[0]; vsp[i].ctag = i;
        vsp[i].fy[1] = vsp[i+1].fy[0]; vsp[i].ftag = i;
        vsp[i].n = i+1; vsp[i].p = i-1;
//        vsp[i].tag = -1;
    }
    vsp[vcnt-1].n = 0; vsp[0].p = vcnt-1;

    //VSPMAX-1 is dummy empty node
    for (bssize_t i=vcnt; i<VSPMAX; i++) { vsp[i].n = i+1; vsp[i].p = i-1; }
    vsp[VSPMAX-1].n = vcnt; vsp[vcnt].p = VSPMAX-1;
}

#ifdef YAX_ENABLE
static inline void yax_vsp_finalize_init(int32_t const yaxbunch, int32_t const vcnt)
{
    for (bssize_t i=0; i<vcnt; ++i)
    {
        yax_vsp[yaxbunch][i].cy[1] = yax_vsp[yaxbunch][i+1].cy[0]; yax_vsp[yaxbunch][i].ctag = i;
        yax_vsp[yaxbunch][i].n = i+1; yax_vsp[yaxbunch][i].p = i-1;
//        vsp[i].tag = -1;
    }
    yax_vsp[yaxbunch][vcnt-1].n = 0; yax_vsp[yaxbunch][0].p = vcnt-1;

    //VSPMAX-1 is dummy empty node
    for (bssize_t i=vcnt; i<VSPMAX; i++) { yax_vsp[yaxbunch][i].n = i+1; yax_vsp[yaxbunch][i].p = i-1; }
    yax_vsp[yaxbunch][VSPMAX-1].n = vcnt; yax_vsp[yaxbunch][vcnt].p = VSPMAX-1;
}
#endif

#define COMBINE_STRIPS

#ifdef COMBINE_STRIPS
static inline void vsdel(int32_t const i)
{
    //Delete i
    int const pi = vsp[i].p;
    int const ni = vsp[i].n;

    vsp[ni].p = pi;
    vsp[pi].n = ni;

    //Add i to empty list
    vsp[i].n = vsp[VSPMAX-1].n;
    vsp[i].p = VSPMAX-1;
    vsp[vsp[VSPMAX-1].n].p = i;
    vsp[VSPMAX-1].n = i;
}
# ifdef YAX_ENABLE
static inline void yax_vsdel(int32_t const yaxbunch, int32_t const i)
{
    //Delete i
    int const pi = yax_vsp[yaxbunch][i].p;
    int const ni = yax_vsp[yaxbunch][i].n;

    yax_vsp[yaxbunch][ni].p = pi;
    yax_vsp[yaxbunch][pi].n = ni;

    //Add i to empty list
    yax_vsp[yaxbunch][i].n = yax_vsp[yaxbunch][VSPMAX - 1].n;
    yax_vsp[yaxbunch][i].p = VSPMAX - 1;
    yax_vsp[yaxbunch][yax_vsp[yaxbunch][VSPMAX - 1].n].p = i;
    yax_vsp[yaxbunch][VSPMAX - 1].n = i;
}
# endif
#endif

static inline int32_t vsinsaft(int32_t const i)
{
    //i = next element from empty list
    int32_t const r = vsp[VSPMAX-1].n;
    vsp[vsp[r].n].p = VSPMAX-1;
    vsp[VSPMAX-1].n = vsp[r].n;

    vsp[r] = vsp[i]; //copy i to r

    //insert r after i
    vsp[r].p = i; vsp[r].n = vsp[i].n;
    vsp[vsp[i].n].p = r; vsp[i].n = r;

    return r;
}

#ifdef YAX_ENABLE


static inline int32_t yax_vsinsaft(int32_t const yaxbunch, int32_t const i)
{
    //i = next element from empty list
    int32_t const r = yax_vsp[yaxbunch][VSPMAX - 1].n;
    yax_vsp[yaxbunch][yax_vsp[yaxbunch][r].n].p = VSPMAX - 1;
    yax_vsp[yaxbunch][VSPMAX - 1].n = yax_vsp[yaxbunch][r].n;

    yax_vsp[yaxbunch][r] = yax_vsp[yaxbunch][i]; //copy i to r

    //insert r after i
    yax_vsp[yaxbunch][r].p = i; yax_vsp[yaxbunch][r].n = yax_vsp[yaxbunch][i].n;
    yax_vsp[yaxbunch][yax_vsp[yaxbunch][i].n].p = r; yax_vsp[yaxbunch][i].n = r;

    return r;
}
#endif

static int32_t domostpolymethod = DAMETH_NOMASK;

#define DOMOST_OFFSET .01f

static void polymost_clipmost(vec2f_t *dpxy, int &n, float x0, float x1, float y0top, float y0bot, float y1top, float y1bot)
{
    if (y0bot < y0top || y1bot < y1top)
        return;

    //Clip to (x0,y0top)-(x1,y1top)

    vec2f_t dp2[8];

    float t0, t1;
    int n2 = 0;
    t1 = -((dpxy[0].x - x0) * (y1top - y0top) - (dpxy[0].y - y0top) * (x1 - x0));

    for (bssize_t i=0; i<n; i++)
    {
        int j = i + 1;

        if (j >= n)
            j = 0;

        t0 = t1;
        t1 = -((dpxy[j].x - x0) * (y1top - y0top) - (dpxy[j].y - y0top) * (x1 - x0));

        if (t0 >= 0)
            dp2[n2++] = dpxy[i];

        if ((t0 >= 0) != (t1 >= 0) && (t0 <= 0) != (t1 <= 0))
        {
            float const r = t0 / (t0 - t1);
            dp2[n2] = { (dpxy[j].x - dpxy[i].x) * r + dpxy[i].x,
                        (dpxy[j].y - dpxy[i].y) * r + dpxy[i].y };
            n2++;
        }
    }

    if (n2 < 3)
    {
        n = 0;
        return;
    }

    //Clip to (x1,y1bot)-(x0,y0bot)
    t1 = -((dp2[0].x - x1) * (y0bot - y1bot) - (dp2[0].y - y1bot) * (x0 - x1));
    n = 0;

    for (bssize_t i = 0, j = 1; i < n2; j = ++i + 1)
    {
        if (j >= n2)
            j = 0;

        t0 = t1;
        t1 = -((dp2[j].x - x1) * (y0bot - y1bot) - (dp2[j].y - y1bot) * (x0 - x1));

        if (t0 >= 0)
            dpxy[n++] = dp2[i];

        if ((t0 >= 0) != (t1 >= 0) && (t0 <= 0) != (t1 <= 0))
        {
            float const r = t0 / (t0 - t1);
            dpxy[n] = { (dp2[j].x - dp2[i].x) * r + dp2[i].x,
                        (dp2[j].y - dp2[i].y) * r + dp2[i].y };
            n++;
        }
    }

    if (n < 3)
    {
        n = 0;
        return;
    }
}

static void polymost_domost(float x0, float y0, float x1, float y1, float y0top = 0.f, float y0bot = -1.f, float y1top = 0.f, float y1bot = -1.f)
{
    int const dir = (x0 < x1);

    polymost_outputGLDebugMessage(3, "polymost_domost(x0:%f, y0:%f, x1:%f, y1:%f, y0top:%f, y0bot:%f, y1top:%f, y1bot:%f)",
                                  x0, y0, x1, y1, y0top, y0bot, y1top, y1bot);

    y0top -= DOMOST_OFFSET;
    y1top -= DOMOST_OFFSET;
    y0bot += DOMOST_OFFSET;
    y1bot += DOMOST_OFFSET;

    if (dir) //clip dmost (floor)
    {
        y0 -= DOMOST_OFFSET;
        y1 -= DOMOST_OFFSET;
    }
    else //clip umost (ceiling)
    {
        if (x0 == x1) return;
        swapfloat(&x0, &x1);
        swapfloat(&y0, &y1);
        swapfloat(&y0top, &y1top);
        swapfloat(&y0bot, &y1bot);
        y0 += DOMOST_OFFSET;
        y1 += DOMOST_OFFSET; //necessary?
    }

    x0 -= DOMOST_OFFSET;
    x1 += DOMOST_OFFSET;

    // Test if span is outside screen bounds
    if (x1 < xbl || x0 > xbr)
    {
        domost_rejectcount++;
        return;
    }

    vec2f_t dm0 = { x0, y0 };
    vec2f_t dm1 = { x1, y1 };

    float const slop = (dm1.y - dm0.y) / (dm1.x - dm0.x);

    if (dm0.x < xbl)
    {
        dm0.y += slop*(xbl-dm0.x);
        dm0.x = xbl;
    }

    if (dm1.x > xbr)
    {
        dm1.y += slop*(xbr-dm1.x);
        dm1.x = xbr;
    }

    drawpoly_alpha = 0.f;
    drawpoly_blend = 0;

    vec2f_t n0, n1;
    float spx[4];
    int32_t  spt[4];

    for (bssize_t newi, i=vsp[0].n; i; i=newi)
    {
        newi = vsp[i].n; n0.x = vsp[i].x; n1.x = vsp[newi].x;

        if ((dm0.x >= n1.x) || (n0.x >= dm1.x) || (vsp[i].ctag <= 0)) continue;

        float const dx = n1.x-n0.x;
        float const cy[2] = { vsp[i].cy[0], vsp[i].fy[0] },
                    cv[2] = { vsp[i].cy[1]-cy[0], vsp[i].fy[1]-cy[1] };

        int scnt = 0;

        //Test if left edge requires split (dm0.x,dm0.y) (nx0,cy(0)),<dx,cv(0)>
        if ((dm0.x > n0.x) && (dm0.x < n1.x))
        {
            float const t = (dm0.x-n0.x)*cv[dir] - (dm0.y-cy[dir])*dx;
            if (((!dir) && (t < 0.f)) || ((dir) && (t > 0.f)))
                { spx[scnt] = dm0.x; spt[scnt] = -1; scnt++; }
        }

        //Test for intersection on umost (0) and dmost (1)

        float const d[2] = { ((dm0.y - dm1.y) * dx) - ((dm0.x - dm1.x) * cv[0]),
                             ((dm0.y - dm1.y) * dx) - ((dm0.x - dm1.x) * cv[1]) };

        float const n[2] = { ((dm0.y - cy[0]) * dx) - ((dm0.x - n0.x) * cv[0]),
                             ((dm0.y - cy[1]) * dx) - ((dm0.x - n0.x) * cv[1]) };

        float const fnx[2] = { dm0.x + ((n[0] / d[0]) * (dm1.x - dm0.x)),
                               dm0.x + ((n[1] / d[1]) * (dm1.x - dm0.x)) };

        if ((Bfabsf(d[0]) > Bfabsf(n[0])) && (d[0] * n[0] >= 0.f) && (fnx[0] > n0.x) && (fnx[0] < n1.x))
            spx[scnt] = fnx[0], spt[scnt++] = 0;

        if ((Bfabsf(d[1]) > Bfabsf(n[1])) && (d[1] * n[1] >= 0.f) && (fnx[1] > n0.x) && (fnx[1] < n1.x))
            spx[scnt] = fnx[1], spt[scnt++] = 1;

        //Nice hack to avoid full sort later :)
        if ((scnt >= 2) && (spx[scnt-1] < spx[scnt-2]))
        {
            swapfloat(&spx[scnt-1], &spx[scnt-2]);
            swaplong(&spt[scnt-1], &spt[scnt-2]);
        }

        //Test if right edge requires split
        if ((dm1.x > n0.x) && (dm1.x < n1.x))
        {
            float const t = (dm1.x-n0.x)*cv[dir] - (dm1.y-cy[dir])*dx;
            if (((!dir) && (t < 0.f)) || ((dir) && (t > 0.f)))
                { spx[scnt] = dm1.x; spt[scnt] = -1; scnt++; }
        }

        vsp[i].tag = vsp[newi].tag = -1;

        float const rdx = 1.f/dx;

        for (bssize_t z=0, vcnt=0; z<=scnt; z++,i=vcnt)
        {
            float t;

            if (z == scnt)
                goto skip;

            t = (spx[z]-n0.x)*rdx;
            vcnt = vsinsaft(i);
            vsp[i].cy[1] = t*cv[0] + cy[0];
            vsp[i].fy[1] = t*cv[1] + cy[1];
            vsp[vcnt].x = spx[z];
            vsp[vcnt].cy[0] = vsp[i].cy[1];
            vsp[vcnt].fy[0] = vsp[i].fy[1];
            vsp[vcnt].tag = spt[z];

skip: ;
            int32_t const ni = vsp[i].n; if (!ni) continue; //this 'if' fixes many bugs!
            float const dx0 = vsp[i].x; if (dm0.x > dx0) continue;
            float const dx1 = vsp[ni].x; if (dm1.x < dx1) continue;
            n0.y = (dx0-dm0.x)*slop + dm0.y;
            n1.y = (dx1-dm0.x)*slop + dm0.y;

            //      dx0           dx1
            //       ~             ~
            //----------------------------
            //     t0+=0         t1+=0
            //   vsp[i].cy[0]  vsp[i].cy[1]
            //============================
            //     t0+=1         t1+=3
            //============================
            //   vsp[i].fy[0]    vsp[i].fy[1]
            //     t0+=2         t1+=6
            //
            //     ny0 ?         ny1 ?

            int k = 4;

            if ((vsp[i].tag == 0) || (n0.y <= vsp[i].cy[0]+DOMOST_OFFSET)) k--;
            if ((vsp[i].tag == 1) || (n0.y >= vsp[i].fy[0]-DOMOST_OFFSET)) k++;
            if ((vsp[ni].tag == 0) || (n1.y <= vsp[i].cy[1]+DOMOST_OFFSET)) k -= 3;
            if ((vsp[ni].tag == 1) || (n1.y >= vsp[i].fy[1]-DOMOST_OFFSET)) k += 3;

#if 0
            //POGO: This GL1 debug code draws a green line that represents the new line, and the current VSP floor & ceil as red and blue respectively.
            //      To enable this, ensure that in polymost_drawrooms() that you are clearing the stencil buffer and color buffer.
            //      Additionally, disable any calls to glColor4f in polymost_drawpoly and disable culling triangles with area==0/removing duplicate points
            //      If you don't want any lines showing up from mirrors/skyboxes, be sure to disable them as well.
            glEnable(GL_STENCIL_TEST);
            glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE);
            glStencilFunc(GL_ALWAYS, 1, 0xFF);
            glDisable(GL_DEPTH_TEST);
            polymost_useColorOnly(true);
            glPolygonMode(GL_FRONT_AND_BACK,GL_LINE);

            glColor4f(0.f, 1.f, 0.f, 1.f);
            vec2f_t nline[3] = {{dx0, n0.y}, {dx1, n1.y}, {dx0, n0.y}};
            polymost_drawpoly(nline, 3, domostpolymethod);

            glColor4f(1.f, 0.f, 0.f, 1.f);
            vec2f_t floor[3] = {{vsp[i].x, vsp[i].fy[0]}, {vsp[ni].x, vsp[i].fy[1]}, {vsp[i].x, vsp[i].fy[0]}};
            polymost_drawpoly(floor, 3, domostpolymethod);

            glColor4f(0.f, 0.f, 1.f, 1.f);
            vec2f_t ceil[3] = {{vsp[i].x, vsp[i].cy[0]}, {vsp[ni].x, vsp[i].cy[1]}, {vsp[i].x, vsp[i].cy[0]}};
            polymost_drawpoly(ceil, 3, domostpolymethod);

            glPolygonMode(GL_FRONT_AND_BACK,GL_FILL);
            polymost_useColorOnly(false);
            glEnable(GL_DEPTH_TEST);
            glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
            glStencilFunc(GL_EQUAL, 0, 0xFF);
            glColor4f(1.f, 1.f, 1.f, 1.f);
#endif

            if (!dir)
            {
                switch (k)
                {
                    case 4:
                    case 5:
                    case 7:
                    {
                        vec2f_t dpxy[8] = {
                            { dx0, vsp[i].cy[0] }, { dx1, vsp[i].cy[1] }, { dx1, n1.y }, { dx0, n0.y }
                        };

                        int n = 4;
                        polymost_clipmost(dpxy, n, x0, x1, y0top, y0bot, y1top, y1bot);
#ifdef YAX_ENABLE
                        if (g_nodraw)
                        {
                            if (yax_drawcf != -1)
                                yax_holecf[yax_drawcf][yax_holencf[yax_drawcf]++] = { dx0, dx1, vsp[i].cy[0], vsp[i].cy[1], n0.y, n1.y };

                            if (editstatus && doeditorcheck)
                                polymost_polyeditorfunc(dpxy, n);
                        }
                        else
#endif
                            polymost_drawpoly(dpxy, n, domostpolymethod);

                        vsp[i].cy[0] = n0.y;
                        vsp[i].cy[1] = n1.y;
                        vsp[i].ctag = gtag;
                    }
                    break;
                    case 1:
                    case 2:
                    {
                        vec2f_t dpxy[8] = { { dx0, vsp[i].cy[0] }, { dx1, vsp[i].cy[1] }, { dx0, n0.y } };

                        int n = 3;
                        polymost_clipmost(dpxy, n, x0, x1, y0top, y0bot, y1top, y1bot);
#ifdef YAX_ENABLE
                        if (g_nodraw)
                        {
                            if (yax_drawcf != -1)
                                yax_holecf[yax_drawcf][yax_holencf[yax_drawcf]++] = { dx0, dx1, vsp[i].cy[0], vsp[i].cy[1], n0.y, vsp[i].cy[1] };

                            if (editstatus && doeditorcheck)
                                polymost_polyeditorfunc(dpxy, n);
                        }
                        else
#endif
                            polymost_drawpoly(dpxy, n, domostpolymethod);

                        vsp[i].cy[0] = n0.y;
                        vsp[i].ctag = gtag;
                    }
                    break;
                    case 3:
                    case 6:
                    {
                        vec2f_t dpxy[8] = { { dx0, vsp[i].cy[0] }, { dx1, vsp[i].cy[1] }, { dx1, n1.y } };

                        int n = 3;
                        polymost_clipmost(dpxy, n, x0, x1, y0top, y0bot, y1top, y1bot);
#ifdef YAX_ENABLE
                        if (g_nodraw)
                        {
                            if (yax_drawcf != -1)
                                yax_holecf[yax_drawcf][yax_holencf[yax_drawcf]++] = { dx0, dx1, vsp[i].cy[0], vsp[i].cy[1], vsp[i].cy[0], n1.y };

                            if (editstatus && doeditorcheck)
                                polymost_polyeditorfunc(dpxy, n);
                        }
                        else
#endif
                            polymost_drawpoly(dpxy, n, domostpolymethod);

                        vsp[i].cy[1] = n1.y;
                        vsp[i].ctag = gtag;
                    }
                    break;
                    case 8:
                    {
                        vec2f_t dpxy[8] = {
                            { dx0, vsp[i].cy[0] }, { dx1, vsp[i].cy[1] }, { dx1, vsp[i].fy[1] }, { dx0, vsp[i].fy[0] }
                        };

                        int n = 4;
                        polymost_clipmost(dpxy, n, x0, x1, y0top, y0bot, y1top, y1bot);
#ifdef YAX_ENABLE
                        if (g_nodraw)
                        {
                            if (yax_drawcf != -1)
                                yax_holecf[yax_drawcf][yax_holencf[yax_drawcf]++] = { dx0, dx1, vsp[i].cy[0], vsp[i].cy[1], vsp[i].fy[0], vsp[i].fy[1] };

                            if (editstatus && doeditorcheck)
                                polymost_polyeditorfunc(dpxy, n);
                        }
                        else
#endif
                            polymost_drawpoly(dpxy, n, domostpolymethod);

                        vsp[i].ctag = vsp[i].ftag = -1;
                    }
                    default: break;
                }
            }
            else
            {
                switch (k)
                {
                case 4:
                case 3:
                case 1:
                {
                    vec2f_t dpxy[8] = {
                        { dx0, n0.y }, { dx1, n1.y }, { dx1, vsp[i].fy[1] }, { dx0, vsp[i].fy[0] }
                    };

                    int n = 4;
                    polymost_clipmost(dpxy, n, x0, x1, y0top, y0bot, y1top, y1bot);
#ifdef YAX_ENABLE
                    if (g_nodraw)
                    {
                        if (yax_drawcf != -1)
                            yax_holecf[yax_drawcf][yax_holencf[yax_drawcf]++] = { dx0, dx1, n0.y, n1.y, vsp[i].fy[0], vsp[i].fy[1] };

                        if (editstatus && doeditorcheck)
                            polymost_polyeditorfunc(dpxy, n);
                    }
                    else
#endif
                        polymost_drawpoly(dpxy, n, domostpolymethod);

                    vsp[i].fy[0] = n0.y;
                    vsp[i].fy[1] = n1.y;
                    vsp[i].ftag = gtag;
                }
                    break;
                case 7:
                case 6:
                {
                    vec2f_t dpxy[8] = { { dx0, n0.y }, { dx1, vsp[i].fy[1] }, { dx0, vsp[i].fy[0] } };

                    int n = 3;
                    polymost_clipmost(dpxy, n, x0, x1, y0top, y0bot, y1top, y1bot);
#ifdef YAX_ENABLE
                    if (g_nodraw)
                    {
                        if (yax_drawcf != -1)
                            yax_holecf[yax_drawcf][yax_holencf[yax_drawcf]++] = { dx0, dx1, n0.y, vsp[i].fy[1], vsp[i].fy[0], vsp[i].fy[1] };

                        if (editstatus && doeditorcheck)
                            polymost_polyeditorfunc(dpxy, n);
                    }
                    else
#endif
                        polymost_drawpoly(dpxy, n, domostpolymethod);

                    vsp[i].fy[0] = n0.y;
                    vsp[i].ftag = gtag;
                }
                    break;
                case 5:
                case 2:
                {
                    vec2f_t dpxy[8] = { { dx0, vsp[i].fy[0] }, { dx1, n1.y }, { dx1, vsp[i].fy[1] } };

                    int n = 3;
                    polymost_clipmost(dpxy, n, x0, x1, y0top, y0bot, y1top, y1bot);
#ifdef YAX_ENABLE
                    if (g_nodraw)
                    {
                        if (yax_drawcf != -1)
                            yax_holecf[yax_drawcf][yax_holencf[yax_drawcf]++] = { dx0, dx1, vsp[i].fy[0], n1.y, vsp[i].fy[0], vsp[i].fy[1] };

                        if (editstatus && doeditorcheck)
                            polymost_polyeditorfunc(dpxy, n);
                    }
                    else
#endif
                        polymost_drawpoly(dpxy, n, domostpolymethod);

                    vsp[i].fy[1] = n1.y;
                    vsp[i].ftag = gtag;
                }
                    break;
                case 0:
                {
                    vec2f_t dpxy[8] = { { dx0, vsp[i].cy[0] }, { dx1, vsp[i].cy[1] }, { dx1, vsp[i].fy[1] }, { dx0, vsp[i].fy[0] } };

                    int n = 4;
                    polymost_clipmost(dpxy, n, x0, x1, y0top, y0bot, y1top, y1bot);
#ifdef YAX_ENABLE
                    if (g_nodraw)
                    {
                        if (yax_drawcf != -1)
                            yax_holecf[yax_drawcf][yax_holencf[yax_drawcf]++] = { dx0, dx1, vsp[i].cy[0], vsp[i].cy[1], vsp[i].fy[0], vsp[i].fy[1] };

                        if (editstatus && doeditorcheck)
                            polymost_polyeditorfunc(dpxy, n);
                    }
                    else
#endif
                        polymost_drawpoly(dpxy, n, domostpolymethod);

                    vsp[i].ctag = vsp[i].ftag = -1;
                }
                default:
                    break;
                }
            }
        }
    }

    gtag++;

    //Combine neighboring vertical strips with matching collinear top&bottom edges
    //This prevents x-splits from propagating through the entire scan
#ifdef COMBINE_STRIPS
    int i = vsp[0].n;

    do
    {
        if ((vsp[i].cy[0] >= vsp[i].fy[0]) && (vsp[i].cy[1] >= vsp[i].fy[1]))
            vsp[i].ctag = vsp[i].ftag = -1;

        int const ni = vsp[i].n;

        //POGO: specially treat the viewport nodes so that we will never end up in a situation where we accidentally access the sentinel node
        if (ni >= viewportNodeCount &&
            (vsp[i].ctag == vsp[ni].ctag) && (vsp[i].ftag == vsp[ni].ftag))
        {
            vsp[i].cy[1] = vsp[ni].cy[1];
            vsp[i].fy[1] = vsp[ni].fy[1];
            vsdel(ni);

#if 0
            //POGO: This GL1 debug code draws the resulting merged VSP segment with floor and ceiling bounds lines as yellow and cyan respectively
            //      To enable this, ensure that in polymost_drawrooms() that you are clearing the stencil buffer and color buffer.
            //      Additionally, disable any calls to glColor4f in polymost_drawpoly and disable culling triangles with area==0
            //      If you don't want any lines showing up from mirrors/skyboxes, be sure to disable them as well.
            glEnable(GL_STENCIL_TEST);
            glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE);
            glStencilFunc(GL_ALWAYS, 1, 0xFF);
            glDisable(GL_DEPTH_TEST);
            polymost_useColorOnly(true);
            glPolygonMode(GL_FRONT_AND_BACK,GL_LINE);

            glColor4f(1.f, 1.f, 0.f, 1.f);
            vec2f_t dfloor[3] = {{vsp[i].x, vsp[i].fy[0]}, {vsp[vsp[i].n].x, vsp[i].fy[1]}, {vsp[i].x, vsp[i].fy[0]}};
            polymost_drawpoly(dfloor, 3, domostpolymethod);

            glColor4f(0.f, 1.f, 1.f, 1.f);
            vec2f_t dceil[3] = {{vsp[i].x, vsp[i].cy[0]}, {vsp[vsp[i].n].x, vsp[i].cy[1]}, {vsp[i].x, vsp[i].cy[0]}};
            polymost_drawpoly(dceil, 3, domostpolymethod);

            glPolygonMode(GL_FRONT_AND_BACK,GL_FILL);
            polymost_useColorOnly(false);
            glEnable(GL_DEPTH_TEST);
            glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
            glStencilFunc(GL_EQUAL, 0, 0xFF);
            glColor4f(1.f, 1.f, 1.f, 1.f);
#endif
        }
        else i = ni;
    }
    while (i);
#endif
}

#ifdef YAX_ENABLE
static void yax_polymost_domost(const int yaxbunch, float x0, float y0, float x1, float y1)
{
    int const dir = (x0 < x1);

    if (dir) //clip dmost (floor)
    {
        y0 -= DOMOST_OFFSET;
        y1 -= DOMOST_OFFSET;
    }
    else //clip umost (ceiling)
    {
        if (x0 == x1) return;
        swapfloat(&x0, &x1);
        swapfloat(&y0, &y1);
        y0 += DOMOST_OFFSET;
        y1 += DOMOST_OFFSET; //necessary?
    }

    // Test if span is outside screen bounds
    if (x1 < xbl || x0 > xbr)
    {
        domost_rejectcount++;
        return;
    }

    vec2f_t dm0 = { x0, y0 };
    vec2f_t dm1 = { x1, y1 };

    float const slop = (dm1.y - dm0.y) / (dm1.x - dm0.x);

    if (dm0.x < xbl)
    {
        dm0.y += slop*(xbl-dm0.x);
        dm0.x = xbl;
    }

    if (dm1.x > xbr)
    {
        dm1.y += slop*(xbr-dm1.x);
        dm1.x = xbr;
    }

    vec2f_t n0, n1;
    float spx[4];
    int32_t  spt[4];

    for (bssize_t newi, i=yax_vsp[yaxbunch][0].n; i; i=newi)
    {
        newi = yax_vsp[yaxbunch][i].n; n0.x = yax_vsp[yaxbunch][i].x; n1.x = yax_vsp[yaxbunch][newi].x;

        if ((dm0.x >= n1.x) || (n0.x >= dm1.x) || (yax_vsp[yaxbunch][i].ctag <= 0)) continue;

        double const dx = double(n1.x)-double(n0.x);
        double const cy = yax_vsp[yaxbunch][i].cy[0],
                     cv = yax_vsp[yaxbunch][i].cy[1]-cy;

        int scnt = 0;

        //Test if left edge requires split (dm0.x,dm0.y) (nx0,cy(0)),<dx,cv(0)>
        if ((dm0.x > n0.x) && (dm0.x < n1.x))
        {
            double const t = (dm0.x-n0.x)*cv - (dm0.y-cy)*dx;
            if (((!dir) && (t <= 0.0)) || ((dir) && (t >= 0.0)))
                { spx[scnt] = dm0.x; spt[scnt] = -1; scnt++; }
        }

        //Test for intersection on umost (0) and dmost (1)

        double const d = ((double(dm0.y) - double(dm1.y)) * dx) - ((double(dm0.x) - double(dm1.x)) * cv);

        double const n = ((double(dm0.y) - cy) * dx) - ((double(dm0.x) - double(n0.x)) * cv);

        double const fnx = double(dm0.x) + ((n / d) * (double(dm1.x) - double(dm0.x)));

        if ((fabs(d) > fabs(n)) && (d * n >= 0.0) && (fnx > n0.x) && (fnx < n1.x))
            spx[scnt] = fnx, spt[scnt++] = 0;

        //Nice hack to avoid full sort later :)
        if ((scnt >= 2) && (spx[scnt-1] < spx[scnt-2]))
        {
            swapfloat(&spx[scnt-1], &spx[scnt-2]);
            swaplong(&spt[scnt-1], &spt[scnt-2]);
        }

        //Test if right edge requires split
        if ((dm1.x > n0.x) && (dm1.x < n1.x))
        {
            double const t = (double(dm1.x)- double(n0.x))*cv - (double(dm1.y)- double(cy))*dx;
            if (((!dir) && (t <= 0.0)) || ((dir) && (t >= 0.0)))
                { spx[scnt] = dm1.x; spt[scnt] = -1; scnt++; }
        }

        yax_vsp[yaxbunch][i].tag = yax_vsp[yaxbunch][newi].tag = -1;

        float const rdx = 1.f/dx;

        for (bssize_t z=0, vcnt=0; z<=scnt; z++,i=vcnt)
        {
            float t;

            if (z == scnt)
                goto skip;

            t = (spx[z]-n0.x)*rdx;
            vcnt = yax_vsinsaft(yaxbunch, i);
            yax_vsp[yaxbunch][i].cy[1] = t*cv + cy;
            yax_vsp[yaxbunch][vcnt].x = spx[z];
            yax_vsp[yaxbunch][vcnt].cy[0] = yax_vsp[yaxbunch][i].cy[1];
            yax_vsp[yaxbunch][vcnt].tag = spt[z];

skip: ;
            int32_t const ni = yax_vsp[yaxbunch][i].n; if (!ni) continue; //this 'if' fixes many bugs!
            float const dx0 = yax_vsp[yaxbunch][i].x; if (dm0.x > dx0) continue;
            float const dx1 = yax_vsp[yaxbunch][ni].x; if (dm1.x < dx1) continue;
            n0.y = (dx0-dm0.x)*slop + dm0.y;
            n1.y = (dx1-dm0.x)*slop + dm0.y;

            //      dx0           dx1
            //       ~             ~
            //----------------------------
            //     t0+=0         t1+=0
            //   vsp[i].cy[0]  vsp[i].cy[1]
            //============================
            //     t0+=1         t1+=3
            //============================
            //   vsp[i].fy[0]    vsp[i].fy[1]
            //     t0+=2         t1+=6
            //
            //     ny0 ?         ny1 ?

            int k = 4;

            if (!dir)
            {
                if ((yax_vsp[yaxbunch][i].tag == 0) || (n0.y <= yax_vsp[yaxbunch][i].cy[0]+DOMOST_OFFSET)) k--;
                if ((yax_vsp[yaxbunch][ni].tag == 0) || (n1.y <= yax_vsp[yaxbunch][i].cy[1]+DOMOST_OFFSET)) k -= 3;
                switch (k)
                {
                    case 4:
                    {
                        yax_vsp[yaxbunch][i].cy[0] = n0.y;
                        yax_vsp[yaxbunch][i].cy[1] = n1.y;
                        yax_vsp[yaxbunch][i].ctag = gtag;
                    }
                    break;
                    case 1:
                    case 2:
                    {
                        yax_vsp[yaxbunch][i].cy[0] = n0.y;
                        yax_vsp[yaxbunch][i].ctag = gtag;
                    }
                    break;
                    case 3:
                    {
                        yax_vsp[yaxbunch][i].cy[1] = n1.y;
                        yax_vsp[yaxbunch][i].ctag = gtag;
                    }
                    break;
                    default: break;
                }
            }
            else
            {
                if ((yax_vsp[yaxbunch][i].tag == 0) || (n0.y >= yax_vsp[yaxbunch][i].cy[0]-DOMOST_OFFSET)) k++;
                if ((yax_vsp[yaxbunch][ni].tag == 0) || (n1.y >= yax_vsp[yaxbunch][i].cy[1]-DOMOST_OFFSET)) k += 3;
                switch (k)
                {
                case 4:
                {
                    yax_vsp[yaxbunch][i].cy[0] = n0.y;
                    yax_vsp[yaxbunch][i].cy[1] = n1.y;
                    yax_vsp[yaxbunch][i].ctag = gtag;
                }
                    break;
                case 7:
                case 6:
                {
                    yax_vsp[yaxbunch][i].cy[0] = n0.y;
                    yax_vsp[yaxbunch][i].ctag = gtag;
                }
                    break;
                case 5:
                {
                    yax_vsp[yaxbunch][i].cy[1] = n1.y;
                    yax_vsp[yaxbunch][i].ctag = gtag;
                }
                    break;
                default:
                    break;
                }
            }
        }
    }

    gtag++;

    //Combine neighboring vertical strips with matching collinear top&bottom edges
    //This prevents x-splits from propagating through the entire scan
#ifdef COMBINE_STRIPS
    int i = yax_vsp[yaxbunch][0].n;

    do
    {
        int const ni = yax_vsp[yaxbunch][i].n;

        if ((yax_vsp[yaxbunch][i].ctag == yax_vsp[yaxbunch][ni].ctag))
        {
            yax_vsp[yaxbunch][i].cy[1] = yax_vsp[yaxbunch][ni].cy[1];
            yax_vsdel(yaxbunch, ni);
        }
        else i = ni;
    }
    while (i);
#endif
}

static int32_t should_clip_cfwall(float x0, float y0, float x1, float y1)
{
    int const dir = (x0 < x1);

    if (dir && yax_globallev >= YAX_MAXDRAWS)
        return 1;

    if (!dir && yax_globallev <= YAX_MAXDRAWS)
        return 1;

    if (dir) //clip dmost (floor)
    {
        y0 -= DOMOST_OFFSET;
        y1 -= DOMOST_OFFSET;
    }
    else //clip umost (ceiling)
    {
        if (x0 == x1) return 1;
        swapfloat(&x0, &x1);
        swapfloat(&y0, &y1);
        y0 += DOMOST_OFFSET;
        y1 += DOMOST_OFFSET; //necessary?
    }

    x0 -= DOMOST_OFFSET;
    x1 += DOMOST_OFFSET;

    // Test if span is outside screen bounds
    if (x1 < xbl || x0 > xbr)
        return 1;

    vec2f_t dm0 = { x0, y0 };
    vec2f_t dm1 = { x1, y1 };

    float const slop = (dm1.y - dm0.y) / (dm1.x - dm0.x);

    if (dm0.x < xbl)
    {
        dm0.y += slop*(xbl-dm0.x);
        dm0.x = xbl;
    }

    if (dm1.x > xbr