Subversion Repositories eduke32

Rev

Rev 2470 | Blame | Compare with Previous | Last modification | View Log | RSS feed

// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
// Ken Silverman's official web site: "http://www.advsys.net/ken"
// See the included license file "BUILDLIC.TXT" for license info.
//
// This file has been modified from Ken Silverman's original release
// by Jonathon Fowler (jf@jonof.id.au)


#define ENGINE

#include "compat.h"
#include "build.h"
#include "pragmas.h"
#include "cache1d.h"
#include "a.h"
#include "osd.h"
#include "crc32.h"
#include "quicklz.h"

#include "baselayer.h"
#include "scriptfile.h"

#ifdef USE_OPENGL
# include "glbuild.h"
# include "mdsprite.h"
# ifdef POLYMER
#  include "polymer.h"
# endif
# include "hightile.h"
# include "polymost.h"
# ifdef _WIN32
#  define WIN32_LEAN_AND_MEAN
#  include <windows.h>
# endif
#endif

#ifdef USE_LIBPNG
//# include <setjmp.h>
# ifdef NEDMALLOC
#  define PNG_USER_MEM_SUPPORTED
# endif
# include <png.h>
#endif

#include <math.h>
#include <assert.h>

#include "engine_priv.h"

#define CACHEAGETIME 16

#if !defined DEBUG_MAIN_ARRAYS
# define HAVE_CLIPSHAPE_FEATURE
const int32_t engine_main_arrays_are_static = 0;  // for Lunatic
#else
const int32_t engine_main_arrays_are_static = 1;
#endif

float debug1, debug2;

static void drawpixel_safe(void *s, char a)
{
#if defined __GNUC__
    if (__builtin_expect((intptr_t)s >= frameplace && (intptr_t)s < frameplace+bytesperline*ydim, 1))
#else
    if ((intptr_t)s >= frameplace && (intptr_t)s < frameplace+bytesperline*ydim)
#endif
        drawpixel(s, a);
#ifdef DEBUGGINGAIDS
    else
    {
        static const char *const c = &editorcolors[15];
        drawpixel((intptr_t *)frameplace, *c);
        drawpixel((intptr_t *)frameplace+1, *c);
        drawpixel((intptr_t *)frameplace+2, *c);
        drawpixel((intptr_t *)frameplace+bytesperline, *c);
        drawpixel((intptr_t *)frameplace+bytesperline+1, *c);
        drawpixel((intptr_t *)frameplace+bytesperline+2, *c);
        drawpixel((intptr_t *)frameplace+2*bytesperline, *c);
        drawpixel((intptr_t *)frameplace+2*bytesperline+1, *c);
        drawpixel((intptr_t *)frameplace+2*bytesperline+2, *c);
    }
#endif
}

//void loadvoxel(int32_t voxindex) { UNREFERENCED_PARAMATER(voxindex); }
int16_t tiletovox[MAXTILES];
int32_t usevoxels = 1;
//#define kloadvoxel loadvoxel

int32_t novoxmips = 1;
int32_t editorgridextent = 131072;

//These variables need to be copied into BUILD
#define MAXXSIZ 256
#define MAXYSIZ 256
#define MAXZSIZ 255
#define MAXVOXMIPS 5
static intptr_t voxoff[MAXVOXELS][MAXVOXMIPS];
static char voxlock[MAXVOXELS][MAXVOXMIPS];
int32_t voxscale[MAXVOXELS];

static int32_t ggxinc[MAXXSIZ+1], ggyinc[MAXXSIZ+1];
static int32_t lowrecip[1024], nytooclose, nytoofar;
static uint32_t distrecip[65536+256];

static intptr_t *lookups = NULL;
static int32_t dommxoverlay = 1, beforedrawrooms = 1;
int32_t indrawroomsandmasks = 0;

static int32_t oxdimen = -1, oviewingrange = -1, oxyaspect = -1;

// r_usenewaspect is the cvar, newaspect_enable to trigger the new behaviour in the code
int32_t r_usenewaspect = 1, newaspect_enable=0;
uint32_t r_screenxy = 403;  // 4:3 aspect ratio

int32_t curbrightness = 0, gammabrightness = 0;

double vid_gamma = DEFAULT_GAMMA;
double vid_contrast = DEFAULT_CONTRAST;
double vid_brightness = DEFAULT_BRIGHTNESS;

//Textured Map variables
static char globalpolytype;
static int16_t *dotp1[MAXYDIM], *dotp2[MAXYDIM];

static int8_t tempbuf[MAXWALLS];

int32_t ebpbak, espbak;
static intptr_t slopalookup[16384];    // was 2048
#if defined(USE_OPENGL)
palette_t palookupfog[MAXPALOOKUPS];
#endif

static int32_t artversion;
int32_t mapversion=7; // JBF 20040211: default mapversion to 7
char picsiz[MAXTILES];
static void *pic = NULL;
static char permanentlock = 255;
static char tilefilenum[MAXTILES];
static int32_t tilefileoffs[MAXTILES];
static int32_t lastageclock;

static int32_t artsize = 0, cachesize = 0;

// Whole ART file contents loaded from ZIPs in memory.
static char *artptrs[MAXTILEFILES];

// GCC 4.6 LTO build fix
#ifdef USING_LTO
# define B_ENGINE_STATIC
#else
# define B_ENGINE_STATIC static
#endif

static int16_t radarang2[MAXXDIM];
B_ENGINE_STATIC uint16_t ATTRIBUTE((used)) sqrtable[4096], ATTRIBUTE((used)) shlookup[4096+256];
const char pow2char[8] = {1,2,4,8,16,32,64,128};
const int32_t pow2long[32] =
{
    1L,2L,4L,8L,
    16L,32L,64L,128L,
    256L,512L,1024L,2048L,
    4096L,8192L,16384L,32768L,
    65536L,131072L,262144L,524288L,
    1048576L,2097152L,4194304L,8388608L,
    16777216L,33554432L,67108864L,134217728L,
    268435456L,536870912L,1073741824L,2147483647L
};
int32_t reciptable[2048], fpuasm;

char britable[16][256]; // JBF 20040207: full 8bit precision

extern char textfont[2048], smalltextfont[2048];

static char kensmessage[128];
const char *engineerrstr = "No error";

int32_t showfirstwall=0;
int32_t showheightindicators=2;
int32_t circlewall=-1;

//char cachedebug = 0;

qlz_state_compress *state_compress = NULL;
qlz_state_decompress *state_decompress = NULL;

int32_t whitecol;

#ifdef POLYMER
static int16_t maphacklightcnt=0;
static int16_t maphacklight[PR_MAXLIGHTS];
#endif

// forward refs
inline int32_t getscreenvdisp(int32_t bz, int32_t zoome);
void screencoords(int32_t *xres, int32_t *yres, int32_t x, int32_t y, int32_t zoome);

static void scansector(int16_t sectnum);

int16_t editstatus = 0;


////////// YAX //////////

int32_t numgraysects = 0;
uint8_t graysectbitmap[MAXSECTORS>>3];
uint8_t graywallbitmap[MAXWALLS>>3];
int32_t autogray = 0, showinnergray = 1;

#ifdef YAX_DEBUG
double hitickspersec;
#endif
#ifdef ENGINE_SCREENSHOT_DEBUG
int32_t engine_screenshot = 0;
#endif

void yax_updategrays(int32_t posze)
{
    int32_t i, j, k=1;
#ifdef YAX_ENABLE
    int32_t mingoodz=INT32_MAX, maxgoodz=INT32_MIN;
#else
    UNREFERENCED_PARAMETER(posze);
#endif

    Bmemset(graysectbitmap, 0, sizeof(graysectbitmap));
    Bmemset(graywallbitmap, 0, sizeof(graywallbitmap));

    for (i=0; i<numsectors; i++)
    {
#ifdef YAX_ENABLE
        int16_t cb, fb;

        yax_getbunches(i, &cb, &fb);
        // update grayouts due to yax  --v-- has to be half-open  --v--
        // because only one level should v  be ever active          v
        k = ((cb<0 || sector[i].ceilingz < posze) && (fb<0 || posze <= sector[i].floorz));
        if (autogray && (cb>=0 || fb>=0) && (sector[i].ceilingz <= posze && posze <= sector[i].floorz))
        {
            mingoodz = min(mingoodz, sector[i].ceilingz);
            maxgoodz = max(maxgoodz, sector[i].floorz);
        }
#endif
        // update grayouts due to editorzrange
        k &= (sector[i].ceilingz >= editorzrange[0] && sector[i].floorz <= editorzrange[1]);

        if (!k)  // outside bounds, gray out!
            graysectbitmap[i>>3] |= (1<<(i&7));
    }

#ifdef YAX_ENABLE
    if (autogray && mingoodz<=maxgoodz)
    {
        for (i=0; i<numsectors; i++)
            if (!(mingoodz <= sector[i].ceilingz && sector[i].floorz <= maxgoodz))
                graysectbitmap[i>>3] |= (1<<(i&7));
    }
#endif

    numgraysects = 0;
    for (i=0; i<numsectors; i++)
    {
        if (graysectbitmap[i>>3]&(1<<(i&7)))
        {
            numgraysects++;
            for (j=sector[i].wallptr; j<sector[i].wallptr+sector[i].wallnum; j++)
                graywallbitmap[j>>3] |= (1<<(j&7));
        }
    }
}


#if !defined YAX_ENABLE
# warning Non-TROR builds are supported only for debugging. Expect savegame breakage etc...
#endif

#ifdef YAX_ENABLE
// all references to floor/ceiling bunchnums should be through the
// get/set functions!

int32_t g_nodraw = 0;
int32_t scansector_retfast = 0;
static int32_t scansector_collectsprites = 1;
int32_t yax_globalcf = -1, yax_nomaskpass=0, yax_nomaskdidit;  // engine internal
int32_t r_tror_nomaskpass = 1;  // cvar
int32_t yax_globallev = YAX_MAXDRAWS;
int32_t yax_globalbunch = -1;

// duplicated tsprites
//  [i]:
//   i==MAXDRAWS: base level
//   i<MAXDRAWS: MAXDRAWS-i-1 is level towards ceiling
//   i>MAXDRAWS: i-MAXDRAWS-1 is level towards floor
static int16_t yax_spritesortcnt[1 + 2*YAX_MAXDRAWS];
static uint16_t yax_tsprite[1 + 2*YAX_MAXDRAWS][MAXSPRITESONSCREEN];
static uint8_t yax_tsprfrombunch[1 + 2*YAX_MAXDRAWS][MAXSPRITESONSCREEN];

// drawn sectors
uint8_t yax_gotsector[MAXSECTORS>>3];  // engine internal

// game-time YAX data structures
int16_t yax_bunchnum[MAXSECTORS][2];
int16_t yax_nextwall[MAXWALLS][2];

static int32_t yax_islockededge(int32_t line, int32_t cf)
{
    return !!(wall[line].cstat&(YAX_NEXTWALLBIT(cf)));
}

#define YAX_PTRBUNCHNUM(Ptr, Sect, Cf) (*(&Ptr[Sect].ceilingxpanning + 8*Cf))
#define YAX_BUNCHNUM(Sect, Cf) YAX_PTRBUNCHNUM(sector, Sect, Cf)

//// bunch getters/setters
int16_t yax_getbunch(int16_t i, int16_t cf)
{
    if (editstatus==0)
        return yax_bunchnum[i][cf];

    if (((*(&sector[i].ceilingstat + cf))&YAX_BIT)==0)
        return -1;

    return YAX_BUNCHNUM(i, cf);
}

void yax_getbunches(int16_t i, int16_t *cb, int16_t *fb)
{
    *cb = yax_getbunch(i, YAX_CEILING);
    *fb = yax_getbunch(i, YAX_FLOOR);
}

// bunchnum: -1: also clear yax-nextwalls (forward and reverse)
//           -2: don't clear reverse yax-nextwalls
//           -3: don't clear either forward or reverse yax-nextwalls
void yax_setbunch(int16_t i, int16_t cf, int16_t bunchnum)
{
    if (editstatus==0)
    {
        yax_bunchnum[i][cf] = bunchnum;
        return;
    }

    if (bunchnum < 0)
    {
        int32_t j;
        int16_t ynw;

        if (bunchnum > -3)
        {
            // TODO: for in-game too?
            for (j=sector[i].wallptr; j<sector[i].wallptr+sector[i].wallnum; j++)
            {
                ynw = yax_getnextwall(j, cf);
                if (ynw >= 0)
                {
                    if (bunchnum > -2)
                        yax_setnextwall(ynw, !cf, -1);
                    yax_setnextwall(j, cf, -1);
                }
            }
        }

        *(&sector[i].ceilingstat + cf) &= ~YAX_BIT;
        YAX_BUNCHNUM(i, cf) = 0;
        return;
    }

    *(&sector[i].ceilingstat + cf) |= YAX_BIT;
    YAX_BUNCHNUM(i, cf) = bunchnum;
}

void yax_setbunches(int16_t i, int16_t cb, int16_t fb)
{
    yax_setbunch(i, YAX_CEILING, cb);
    yax_setbunch(i, YAX_FLOOR, fb);
}

//// nextwall getters/setters
int16_t yax_getnextwall(int16_t wal, int16_t cf)
{
    if (editstatus==0)
        return yax_nextwall[wal][cf];

    if (!yax_islockededge(wal, cf))
        return -1;

    return YAX_NEXTWALL(wal, cf);
}

// unchecked!
void yax_setnextwall(int16_t wal, int16_t cf, int16_t thenextwall)
{
    if (editstatus==0)
    {
        yax_nextwall[wal][cf] = thenextwall;
        return;
    }

    if (thenextwall >= 0)
    {
        wall[wal].cstat |= YAX_NEXTWALLBIT(cf);
        YAX_NEXTWALL(wal, cf) = thenextwall;
    }
    else
    {
        wall[wal].cstat &= ~YAX_NEXTWALLBIT(cf);
        YAX_NEXTWALL(wal, cf) = cf?-1:0;
    }
}

// make one step in the vertical direction, and if the wall we arrive at
// is red, return its nextsector.
int16_t yax_vnextsec(int16_t line, int16_t cf)
{
    int16_t ynw = yax_getnextwall(line, cf);

    if (ynw < 0)
        return -1;

    return wall[ynw].nextsector;
}


//// in-struct --> array transfer (only resetstat==0); list construction
// resetstat:  0: reset and read data from structs and construct linked lists etc.
//             1: only reset
//             2: read data from game-time arrays and construct linked lists etc.
void yax_update(int32_t resetstat)
{
    int32_t i, j, oeditstatus=editstatus;
    int16_t cb, fb, tmpsect;

    if (resetstat != 2)
        numyaxbunches = 0;

    for (i=0; i<MAXSECTORS; i++)
    {
        if (resetstat != 2 || i>=numsectors)
            yax_bunchnum[i][0] = yax_bunchnum[i][1] = -1;
        nextsectbunch[0][i] = nextsectbunch[1][i] = -1;
    }
    for (i=0; i<YAX_MAXBUNCHES; i++)
        headsectbunch[0][i] = headsectbunch[1][i] = -1;
    for (i=0; i<MAXWALLS; i++)
        if (resetstat != 2 || i>=numwalls)
            yax_nextwall[i][0] = yax_nextwall[i][1] = -1;

    if (resetstat==1)
        return;

    // constuct singly linked list of sectors-of-bunch

    // read bunchnums directly from the sector struct in yax_[gs]etbunch{es}!
    editstatus = (resetstat==0);
    // use oeditstatus to check for in-gamedness from here on!

    if (resetstat==0)
    {
        // make bunchnums consecutive
        uint8_t *const havebunch = (uint8_t *)tempbuf;
        uint8_t *const bunchmap = havebunch + (YAX_MAXBUNCHES>>3);
        int32_t dasub = 0;

        Bmemset(havebunch, 0, YAX_MAXBUNCHES>>3);
        for (i=0; i<numsectors; i++)
        {
            yax_getbunches(i, &cb, &fb);
            if (cb>=0)
                havebunch[cb>>3] |= (1<<(cb&7));
            if (fb>=0)
                havebunch[fb>>3] |= (1<<(fb&7));
        }

        for (i=0; i<YAX_MAXBUNCHES; i++)
        {
            if ((havebunch[i>>3]&(1<<(i&7)))==0)
            {
                bunchmap[i] = 255;
                dasub++;
                continue;
            }

            bunchmap[i] = i-dasub;
        }

        for (i=0; i<numsectors; i++)
        {
            yax_getbunches(i, &cb, &fb);
            if (cb>=0)
                yax_setbunch(i, YAX_CEILING, bunchmap[cb]);
            if (fb>=0)
                yax_setbunch(i, YAX_FLOOR, bunchmap[fb]);
        }
    }

    // in-struct --> array transfer (resetstat==0) and list construction
    for (i=numsectors-1; i>=0; i--)
    {
        yax_getbunches(i, &cb, &fb);
        if (resetstat==0)
        {
            yax_bunchnum[i][0] = cb;
            yax_bunchnum[i][1] = fb;
        }

        if (cb >= 0)
        {
            if (resetstat==0)
                for (j=sector[i].wallptr; j<sector[i].wallptr+sector[i].wallnum; j++)
                {
                    if (yax_islockededge(j,YAX_CEILING))
                    {
                        yax_nextwall[j][0] = YAX_NEXTWALL(j,0);
                        if (oeditstatus==0)
                            YAX_NEXTWALL(j,0) = 0;  // reset lotag!
                    }
                }

            if (headsectbunch[0][cb] == -1)
            {
                headsectbunch[0][cb] = i;
                // not duplicated in floors, since every extended ceiling
                // must have a corresponding floor:
                if (resetstat==0)
                    numyaxbunches++;
            }
            else
            {
                tmpsect = headsectbunch[0][cb];
                headsectbunch[0][cb] = i;
                nextsectbunch[0][i] = tmpsect;
            }
        }

        if (fb >= 0)
        {
            if (resetstat==0)
                for (j=sector[i].wallptr; j<sector[i].wallptr+sector[i].wallnum; j++)
                {
                    if (yax_islockededge(j,YAX_FLOOR))
                    {
                        yax_nextwall[j][1] = YAX_NEXTWALL(j,1);
                        if (oeditstatus==0)
                            YAX_NEXTWALL(j,1) = -1;  // reset extra!
                    }
                }

            if (headsectbunch[1][fb] == -1)
                headsectbunch[1][fb] = i;
            else
            {
                tmpsect = headsectbunch[1][fb];
                headsectbunch[1][fb] = i;
                nextsectbunch[1][i] = tmpsect;
            }
        }
    }

    editstatus = oeditstatus;
}

int32_t yax_getneighborsect(int32_t x, int32_t y, int32_t sectnum, int32_t cf, int16_t *ret_bunchnum)
{
    int16_t bunchnum = yax_getbunch(sectnum, cf);
    int32_t i;

    if (bunchnum < 0)
        return -1;

    for (SECTORS_OF_BUNCH(bunchnum, !cf, i))
        if (inside(x, y, i)==1)
        {
            if (ret_bunchnum)
                *ret_bunchnum = bunchnum;
            return i;
        }

    return -1;
}

// indexed as a list:
static int16_t bunches[2][YAX_MAXBUNCHES];
// indexed with bunchnums directly:
static int16_t bunchsec[YAX_MAXBUNCHES], bunchdist[YAX_MAXBUNCHES];

static int32_t ymostallocsize = 0;  // numyaxbunches*xdimen (no sizeof(int16_t) here!)
static int16_t *yumost=NULL, *ydmost=NULL;  // used as if [numyaxbunches][xdimen]
uint8_t haveymost[YAX_MAXBUNCHES>>3];

// adapted from build.c
static void yax_getclosestpointonwall(int32_t dawall, int32_t *closestx, int32_t *closesty)
{
    int64_t i, j, wx,wy, wx2,wy2, dx, dy;

    wx = wall[dawall].x;
    wy = wall[dawall].y;
    wx2 = wall[wall[dawall].point2].x;
    wy2 = wall[wall[dawall].point2].y;

    dx = wx2 - wx;
    dy = wy2 - wy;
    i = dx*(globalposx-wx) + dy*(globalposy-wy);
    if (i <= 0) { *closestx = wx; *closesty = wy; return; }
    j = dx*dx + dy*dy;
    if (i >= j) { *closestx = wx2; *closesty = wy2; return; }
    i=((i<<15)/j)<<15;
    *closestx = wx + ((dx*i)>>30);
    *closesty = wy + ((dy*i)>>30);
}

static inline int32_t yax_walldist(int32_t w)
{
    int32_t closestx, closesty;

    yax_getclosestpointonwall(w, &closestx, &closesty);
    return klabs(closestx-globalposx) + klabs(closesty-globalposy);

//    return klabs(wall[w].x-globalposx) + klabs(wall[w].y-globalposy);
}

// calculate distances to bunches and best start-drawing sectors
static void yax_scanbunches(int32_t bbeg, int32_t numhere, const uint8_t *lastgotsector)
{
    int32_t bnchcnt, bunchnum, j, k;
    int32_t startwall, endwall;

    UNREFERENCED_PARAMETER(lastgotsector);

    scansector_retfast = 1;
    scansector_collectsprites = 0;

    for (bnchcnt=bbeg; bnchcnt<bbeg+numhere; bnchcnt++)
    {
        int32_t walldist, bestsec=-1;
        int32_t bestwalldist=INT32_MAX, bestbestdist=INT32_MAX;

        bunchnum = bunches[yax_globalcf][bnchcnt];

        for (SECTORS_OF_BUNCH(bunchnum,!yax_globalcf, k))
        {
            int32_t checkthisec = 0;

            if (inside(globalposx, globalposy, k)==1)
            {
                bestsec = k;
                bestbestdist = 0;
                break;
            }

            startwall = sector[k].wallptr;
            endwall = startwall+sector[k].wallnum;

            for (j=startwall; j<endwall; j++)
            {
/*
                if ((w=yax_getnextwall(j,!yax_globalcf))>=0)
                    if ((ns=wall[w].nextsector)>=0)
                        if ((lastgotsector[ns>>3]&(1<<(ns&7)))==0)
                            continue;
*/

                walldist = yax_walldist(j);
                if (walldist < bestwalldist)
                {
                    checkthisec = 1;
                    bestwalldist = walldist;
                }
            }

            if (checkthisec)
            {
                numscans = numbunches = 0;
                if (getrendermode()==0)
                    scansector(k);
#ifdef USE_OPENGL
                else
                    polymost_scansector(k);
#endif
                if (numbunches > 0)
                {
                    bestsec = k;
                    bestbestdist = bestwalldist;
                }
            }
        }

        bunchsec[bunchnum] = bestsec;
        bunchdist[bunchnum] = bestbestdist;
    }

    scansector_collectsprites = 1;
    scansector_retfast = 0;
}

static int yax_cmpbunches(const int16_t *b1, const int16_t *b2)
{
    return (bunchdist[*b2] - bunchdist[*b1]);
}


void yax_tweakpicnums(int32_t bunchnum, int32_t cf, int32_t restore)
{
    // for polymer, this is called before polymer_drawrooms() with restore==0
    // and after polymer_drawmasks() with restore==1

    int32_t i, dastat;
    static int16_t opicnum[2][MAXSECTORS];
#ifdef DEBUGGINGAIDS
    static uint8_t expect_restore[2][YAX_MAXBUNCHES];

    // must call this with restore == 0, 1,  0, 1,  0, 1,  ...
    assert(expect_restore[cf][bunchnum] == restore);
    expect_restore[cf][bunchnum] = !expect_restore[cf][bunchnum];
#endif

    for (SECTORS_OF_BUNCH(bunchnum, cf, i))
    {
        dastat = (SECTORFLD(i,stat, cf)&(128+256));

        // only consider non-masked ceilings/floors
        if (dastat==0 || (restore==1 && opicnum[cf][i]&0x8000))
        {
            if (!restore)
            {
                opicnum[cf][i] = SECTORFLD(i,picnum, cf);
                if (editstatus && showinvisibility)
                    SECTORFLD(i,picnum, cf) = MAXTILES-1;
                else //if ((dastat&(128+256))==0)
                    SECTORFLD(i,picnum, cf) = 13; //FOF;
            }
            else
            {
                SECTORFLD(i,picnum, cf) = opicnum[cf][i];
            }
#ifdef POLYMER
            // will be called only in editor
            if (rendmode==4)
            {
                if (!restore)
                {
                    SECTORFLD(i,stat, cf) |= 128;
                    opicnum[cf][i] |= 0x8000;
                }
                else
                {
                    SECTORFLD(i,stat, cf) &= ~128;
                    SECTORFLD(i,picnum, cf) &= 0x7fff;
                    opicnum[cf][i] = 0;
                }
            }
#endif
        }
    }
}

static void yax_copytsprites()
{
    int32_t i, spritenum, gotthrough, sectnum;
    int32_t sortcnt = yax_spritesortcnt[yax_globallev];
    const spritetype *spr;

    for (i=0; i<sortcnt; i++)
    {
        spritenum = yax_tsprite[yax_globallev][i];

        gotthrough = spritenum&(MAXSPRITES|(MAXSPRITES<<1));

        spritenum &= MAXSPRITES-1;
        spr = &sprite[spritenum];
        sectnum = spr->sectnum;

        if (gotthrough == (MAXSPRITES|(MAXSPRITES<<1)))
        {
            if (yax_globalbunch != yax_tsprfrombunch[yax_globallev][i])
                continue;
        }
        else
        {
            int32_t cf = -1;

            if (gotthrough == MAXSPRITES)
                cf = YAX_CEILING;  // sprite got here through the ceiling of lower sector
            else if (gotthrough == (MAXSPRITES<<1))
                cf = YAX_FLOOR;  // sprite got here through the floor of upper sector

            if (cf != -1)
            {
                if ((yax_globallev-YAX_MAXDRAWS)*(-1 + 2*cf) > 0)
                    if (yax_getbunch(sectnum, cf) != yax_globalbunch)
                        continue;

                sectnum = yax_getneighborsect(spr->x, spr->y, sectnum, cf, NULL);
                if (sectnum < 0)
                    continue;
            }
        }

        if (spritesortcnt >= MAXSPRITESONSCREEN)
            break;

        Bmemcpy(&tsprite[spritesortcnt], spr, sizeof(spritetype));
        spriteext[spritenum].tspr = &tsprite[spritesortcnt];
        tsprite[spritesortcnt].owner = spritenum;

        tsprite[spritesortcnt].sectnum = sectnum;  // potentially tweak sectnum!
        spritesortcnt++;
    }
}


void yax_preparedrawrooms(void)
{
    if (getrendermode()==4 || numyaxbunches==0)
        return;

    g_nodraw = 1;
    Bmemset(yax_spritesortcnt, 0, sizeof(yax_spritesortcnt));
    Bmemset(haveymost, 0, (numyaxbunches+7)>>3);

    if (getrendermode()==0 && ymostallocsize < xdimen*numyaxbunches)
    {
        ymostallocsize = xdimen*numyaxbunches;
        yumost = Brealloc(yumost, ymostallocsize*sizeof(int16_t));
        ydmost = Brealloc(ydmost, ymostallocsize*sizeof(int16_t));

        if (!yumost || !ydmost)
        {
            initprintf("OUT OF MEMORY in yax_preparedrawrooms!\n");
            uninitengine();
            exit(10);
        }
    }
}

void yax_drawrooms(void (*ExtAnalyzeSprites)(void), int32_t horiz, int16_t sectnum)
{
    static uint8_t havebunch[YAX_MAXBUNCHES>>3];

    int32_t i, j, k, lev, cf, nmp;
    int32_t bnchcnt, bnchnum[2] = {0,0}, maxlev[2];
    int16_t ourbunch[2] = {-1,-1}, osectnum=sectnum;
    int32_t bnchbeg[YAX_MAXDRAWS][2], bnchend[YAX_MAXDRAWS][2];
    int32_t bbeg, numhere;

    // original (1st-draw) and accumulated ('per-level') gotsector bitmaps
    static uint8_t ogotsector[MAXSECTORS>>3], lgotsector[MAXSECTORS>>3];
#ifdef YAX_DEBUG
    uint64_t t;
#endif

    if (getrendermode()==4 || numyaxbunches==0)
    {
#ifdef ENGINE_SCREENSHOT_DEBUG
        engine_screenshot = 0;
#endif
        return;
    }

    // if we're here, there was just a drawrooms() call with g_nodraw=1

    Bmemcpy(ogotsector, gotsector, (numsectors+7)>>3);

    if (sectnum >= 0)
        yax_getbunches(sectnum, &ourbunch[0], &ourbunch[1]);
    Bmemset(&havebunch, 0, (numyaxbunches+7)>>3);

    // first scan all bunches above, then all below...
    for (cf=0; cf<2; cf++)
    {
        yax_globalcf = cf;

        if (cf==1)
        {
            sectnum = osectnum;
            Bmemcpy(gotsector, ogotsector, (numsectors+7)>>3);
        }

        for (lev=0; /*lev<YAX_MAXDRAWS*/; lev++)
        {
            yax_globallev = YAX_MAXDRAWS + (-1 + 2*cf)*(lev+1);

            bbeg = bnchbeg[lev][cf] = bnchend[lev][cf] = bnchnum[cf];
            numhere = 0;

            for (i=0; i<numsectors; i++)
            {
                if (!(gotsector[i>>3]&(1<<(i&7))))
                    continue;

                j = yax_getbunch(i, cf);
                if (j >= 0 && !(havebunch[j>>3]&(1<<(j&7))))
                {
                    if (getrendermode()==0 && (haveymost[j>>3]&(1<<(j&7)))==0)
                    {
                        yaxdebug("%s, l %d: skipped bunch %d (no *most)", cf?"v":"^", lev, j);
                        continue;
                    }

                    if ((SECTORFLD(i,stat, cf)&2) ||
                            (cf==0 && globalposz > sector[i].ceilingz) ||
                            (cf==1 && globalposz < sector[i].floorz))
                    {
                        havebunch[j>>3] |= (1<<(j&7));
                        bunches[cf][bnchnum[cf]++] = j;
                        bnchend[lev][cf]++;
                        numhere++;
                    }
                }
            }

            if (numhere > 0)
            {
                // found bunches -- need to fake-draw

                yax_scanbunches(bbeg, numhere, (uint8_t *)gotsector);

                qsort(&bunches[cf][bbeg], numhere, sizeof(int16_t),
                      (int(*)(const void *, const void *))&yax_cmpbunches);

                if (numhere > 1 && lev != YAX_MAXDRAWS-1)
                    Bmemset(lgotsector, 0, (numsectors+7)>>3);

                for (bnchcnt=bbeg; bnchcnt < bbeg+numhere; bnchcnt++)
                {
                    j = bunches[cf][bnchcnt];  // the actual bunchnum...
                    yax_globalbunch = j;
#ifdef YAX_DEBUG
                    t=gethiticks();
#endif
                    k = bunchsec[j];

                    if (k < 0)
                    {
                        yaxprintf("%s, l %d: skipped bunch %d\n", cf?"v":"^", lev, j);
                        continue;
                    }

                    if (lev != YAX_MAXDRAWS-1)
                    {
#ifdef YAX_DEBUG
                        int32_t odsprcnt = yax_spritesortcnt[yax_globallev];
#endif
                        // +MAXSECTORS: force
                        drawrooms(globalposx,globalposy,globalposz,globalang,horiz,k+MAXSECTORS);
                        if (numhere > 1)
                            for (i=0; i<(numsectors+7)>>3; i++)
                                lgotsector[i] |= gotsector[i];

                        yaxdebug("l%d: faked (bn %2d) sec %4d,%3d dspr, ob=[%2d,%2d], sn=%4d, %.3f ms",
                                 yax_globallev-YAX_MAXDRAWS, j, k, yax_spritesortcnt[yax_globallev]-odsprcnt,
                                 ourbunch[0],ourbunch[1],sectnum,
                                 (double)(1000*(gethiticks()-t))/hitickspersec);
                    }

                    if (ourbunch[cf]==j)
                    {
                        ourbunch[cf] = yax_getbunch(k, cf);
                        sectnum = k;
                    }
                }

                if (numhere > 1 && lev != YAX_MAXDRAWS-1)
                    Bmemcpy(gotsector, lgotsector, (numsectors+7)>>3);
            }

            if (numhere==0 || lev==YAX_MAXDRAWS-1)
            {
                // no new bunches or max level reached
                maxlev[cf] = lev - (numhere==0);
                break;
            }
        }
    }

//    yax_globalcf = -1;

    // now comes the real drawing!
    g_nodraw = 0;
    scansector_collectsprites = 0;

#if 1
//def ENGINE_CLEAR_SCREEN
    if (editstatus==1)
    {
        if (getrendermode()==0)
        {
            begindrawing();
            {
                const int32_t dimenprod = xdimen*ydimen;
                char *const p = (char *)frameplace;

                j = 0;
                for (i=0; i<dimenprod; i++)
                {
                    p[i] = (char)j;
                    j++;
                    if (j >= xdimen)
                        j = 0;
                }
            }
            enddrawing();
        }
#ifdef USE_OPENGL
        else
        {
            bglClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);
        }
#endif
    }
#endif

    for (cf=0; cf<2; cf++)
    {
        yax_globalcf = cf;

        for (lev=maxlev[cf]; lev>=0; lev--)
        {
            yax_globallev = YAX_MAXDRAWS + (-1 + 2*cf)*(lev+1);
            scansector_collectsprites = (lev == YAX_MAXDRAWS-1);

            for (bnchcnt=bnchbeg[lev][cf]; bnchcnt<bnchend[lev][cf]; bnchcnt++)
            {
                j = bunches[cf][bnchcnt];  // the actual bunchnum...
                k = bunchsec[j];  // best start-drawing sector
                yax_globalbunch = j;
#ifdef YAX_DEBUG
                t=gethiticks();
#endif
                yax_tweakpicnums(j, cf, 0);
                if (k < 0)
                    continue;

                yax_nomaskdidit = 0;
                for (nmp=r_tror_nomaskpass; nmp>=0; nmp--)
                {
                    yax_nomaskpass = nmp;
                    drawrooms(globalposx,globalposy,globalposz,globalang,horiz,k+MAXSECTORS);  // +MAXSECTORS: force

                    if (nmp==1)
                    {
                        yaxdebug("nm1 l%d: DRAWN (bn %2d) sec %4d,          %.3f ms",
                                 yax_globallev-YAX_MAXDRAWS, j, k,
                                 (double)(1000*(gethiticks()-t))/hitickspersec);

                        if (!yax_nomaskdidit)
                        {
                            yax_nomaskpass = 0;
                            break;  // no need to draw the same stuff twice
                        }
                        Bmemcpy(yax_gotsector, gotsector, (numsectors+7)>>3);
                    }
                }

                if (!scansector_collectsprites)
                    spritesortcnt = 0;
                yax_copytsprites();
                yaxdebug("nm0 l%d: DRAWN (bn %2d) sec %4d,%3d tspr, %.3f ms",
                         yax_globallev-YAX_MAXDRAWS, j, k, spritesortcnt,
                         (double)(1000*(gethiticks()-t))/hitickspersec);

                ExtAnalyzeSprites();
                drawmasks();
            }

            if (lev < maxlev[cf])
                for (bnchcnt=bnchbeg[lev+1][cf]; bnchcnt<bnchend[lev+1][cf]; bnchcnt++)
                    yax_tweakpicnums(bunches[cf][bnchcnt], cf, 1);  // restore picnums
        }
    }

#ifdef YAX_DEBUG
    t=gethiticks();
#endif
    yax_globalcf = -1;
    yax_globalbunch = -1;
    yax_globallev = YAX_MAXDRAWS;
    scansector_collectsprites = 0;

    // draw base level
    drawrooms(globalposx,globalposy,globalposz,globalang,horiz,osectnum);
//    if (scansector_collectsprites)
//        spritesortcnt = 0;
    yax_copytsprites();
    yaxdebug("DRAWN base level sec %d,%3d tspr, %.3f ms", osectnum,
             spritesortcnt, (double)(1000*(gethiticks()-t))/hitickspersec);
    scansector_collectsprites = 1;

    for (cf=0; cf<2; cf++)
        if (maxlev[cf] >= 0)
            for (bnchcnt=bnchbeg[0][cf]; bnchcnt<bnchend[0][cf]; bnchcnt++)
                yax_tweakpicnums(bunches[cf][bnchcnt], cf, 1);  // restore picnums

#ifdef ENGINE_SCREENSHOT_DEBUG
    engine_screenshot = 0;
#endif

#ifdef YAX_DEBUG_YMOSTS
    if (getrendermode()==0 && numyaxbunches>0)
    {
        char purple = getclosestcol(63, 0, 63);
        char yellow = getclosestcol(63, 63, 0);

        begindrawing();
        for (i=0; i<numyaxbunches; i++)
        {
            int32_t x, x1;

            if ((haveymost[i>>3]&(1<<i&7))==0)
                continue;

            x1 = i*xdimen;

            for (x=x1; x<x1+xdimen; x++)
            {
                if (yumost[x] >= 0 && yumost[x] < ydim && (x&1))
                    *((char *)frameplace + yumost[x]*bytesperline + x-x1) = purple;

                if (ydmost[x]-1 >= 0 && ydmost[x]-1 < ydim && !(x&1))
                    *((char *)frameplace + (ydmost[x]-1)*bytesperline + x-x1) = yellow;
            }
        }
        enddrawing();
    }
#endif
}

#endif

//
// setslope
//
void setslope(int32_t sectnum, int32_t cf, int16_t slope)
{
    if (slope==0)
    {
        SECTORFLD(sectnum,stat, cf) &= ~2;
        SECTORFLD(sectnum,heinum, cf) = 0;
    }
    else
    {
        SECTORFLD(sectnum,stat, cf) |= 2;
        SECTORFLD(sectnum,heinum, cf) = slope;
    }
}

////////// editor side view //////////
int32_t m32_sideview = 0;
int32_t m32_sideelev = 256;  // elevation in BUILD degrees, 0..512
int16_t m32_sideang = 200;  // azimuth, 0..2047

int32_t m32_sidecos, m32_sidesin;
int32_t m32_swcnt;
int32_t m32_wallscreenxy[MAXWALLS][2];
int16_t m32_wallsprite[MAXWALLS+MAXSPRITES];
static int32_t m32_sidedist[MAXWALLS+MAXSPRITES];
static vec3_t m32_viewplane;


////// sector-like clipping for sprites //////
#ifdef HAVE_CLIPSHAPE_FEATURE
typedef struct
{
    int16_t numsectors, numwalls;
    sectortype *sector;
    walltype *wall;
} mapinfo_t;

static void mapinfo_set(mapinfo_t *bak, mapinfo_t *new)
{
    if (bak)
    {
        bak->numsectors = numsectors;
        bak->numwalls = numwalls;
        bak->sector = sector;
        bak->wall = wall;
    }

    if (new)
    {
        numsectors = new->numsectors;
        numwalls = new->numwalls;
        sector = new->sector;
        wall = new->wall;
    }
}

static mapinfo_t origmapinfo, clipmapinfo;
static int32_t quickloadboard=0;


#define CM_MAX 256  // must be a power of 2

typedef struct
{
    int16_t qbeg, qend;  // indices into sectq
    int16_t picnum, next;
    int32_t maxdist;
} clipinfo_t;

static int32_t numclipmaps;
static clipinfo_t clipinfo[CM_MAX];

static int32_t numclipsects;  // number in sectq[]
static int16_t *sectoidx, *sectq;  // [numsectors]
static int16_t pictoidx[MAXTILES];  // maps tile num to clipinfo[] index
static int16_t *tempictoidx;

static sectortype *loadsector;
static walltype *loadwall, *loadwallinv;
static spritetype *loadsprite;

// sectoidx bits
#define CM_NONE (CM_MAX<<1)
#define CM_SOME (CM_NONE-1)
#define CM_OUTER (CM_MAX)   // sector surrounds clipping sector

// sprite -> sector tag mappings
#define CM_XREPEAT floorpal
#define CM_YREPEAT floorxpanning
#define CM_XOFFSET ceilingshade
#define CM_YOFFSET floorshade
#define CM_CSTAT hitag
#define CM_ANG extra
#define CM_FLOORZ(Sec) (*(int32_t *)&sector[Sec].ceilingxpanning)  // ceilingxpanning,ceilingypanning,floorpicnum
#define CM_CEILINGZ(Sec) (*(int32_t *)&sector[Sec].visibility)  // visibility,filler,lotag

// backup of original normalized coordinates
#define CM_WALL_X(Wal) (*(int32_t *)&wall[Wal].picnum)
#define CM_WALL_Y(Wal) (*(int32_t *)&wall[Wal].lotag)

// don't rotate when applying clipping, for models with rotational symmetry
#define CM_NOROT(Spri) (sprite[Spri].cstat&2)
#define CM_NOROTS(Sect) (sector[Sect].CM_CSTAT&2)


static void clipmapinfo_init()
{
    int32_t i;

    numclipmaps = 0;
    numclipsects = 0;

    if (sectq) { Bfree(sectq); sectq=NULL; }
    if (sectoidx) { Bfree(sectoidx); sectoidx=NULL; }
    if (tempictoidx) { Bfree(tempictoidx); tempictoidx=NULL; }

    for (i=0; i<MAXTILES; i++)
        pictoidx[i]=-1;

    if (loadsector) { Bfree(loadsector); loadsector=NULL; }
    if (loadwall) { Bfree(loadwall); loadwall=NULL; }
    if (loadwallinv) { Bfree(loadwallinv); loadwallinv=NULL; }
    if (loadsprite) { Bfree(loadsprite); loadsprite=NULL; }

    clipmapinfo.numsectors = clipmapinfo.numwalls = 0;
    clipmapinfo.sector=NULL;
    clipmapinfo.wall=NULL;

    numsectors = 0;
    numwalls = 0;
}

// loads the clip maps 0 through 9.
// this should be called before any real map is loaded.
int32_t clipmapinfo_load(const char *filename)
{
    int32_t i,k,w, px,py,pz;
    int16_t ang,cs;

    char fn[BMAX_PATH], loadedwhich[32]={0}, *lwcp=loadedwhich;
    int32_t slen, fi, fisec[10], fispr[10];
    int32_t ournumsectors=0, ournumwalls=0, ournumsprites=0, numsprites;

    clipmapinfo_init();

    loadsector = Bmalloc(MAXSECTORS * sizeof(sectortype));
    loadwall = Bmalloc(MAXWALLS * sizeof(walltype));
    loadsprite = Bmalloc(MAXSPRITES * sizeof(spritetype));

    if (!loadsector || !loadwall || !loadsprite)
    {
        clipmapinfo_init();
        return 1;
    }

    Bmemset(fisec, 0, sizeof(fisec));
    Bmemset(fispr, 0, sizeof(fispr));

    Bstrcpy(fn, filename);
    slen = Bstrlen(fn);

    quickloadboard = 1;
    for (fi='0'; fi<='9'; fi++)
    {
        fisec[fi-'0'] = ournumsectors;
        fispr[fi-'0'] = ournumsprites;

        fn[slen-5] = fi;
        i = loadboard(fn, 0, &px,&py,&pz, &ang,&cs);
        if (i<0)
            continue;

        for (numsprites=0; numsprites<MAXSPRITES; numsprites++)
            if (sprite[numsprites].statnum == MAXSTATUS)
                break;

        if (ournumsectors+numsectors>MAXSECTORS ||
                ournumwalls+numwalls>MAXWALLS ||
                ournumsprites+numsprites>MAXSPRITES)
        {
            initprintf("clip map: warning: exceeded limits when loading %s, aborting.\n", fn);
            break;
        }

        Bmemcpy(loadsector+ournumsectors, sector, numsectors*sizeof(sectortype));
        Bmemcpy(loadwall+ournumwalls, wall, numwalls*sizeof(walltype));
        Bmemcpy(loadsprite+ournumsprites, sprite, numsprites*sizeof(spritetype));
        for (i=ournumsectors; i<ournumsectors+numsectors; i++)
            loadsector[i].wallptr += ournumwalls;
        for (i=ournumwalls; i<ournumwalls+numwalls; i++)
        {
            if (loadwall[i].point2>=0)
                loadwall[i].point2 += ournumwalls;
            if (loadwall[i].nextwall>=0)
            {
                loadwall[i].nextwall += ournumwalls;
                loadwall[i].nextsector += ournumsectors;
            }
        }
        for (i=ournumsprites; i<ournumsprites+numsprites; i++)
            if (loadsprite[i].sectnum>=0)
                loadsprite[i].sectnum += ournumsectors;
        ournumsectors += numsectors;
        ournumwalls += numwalls;
        ournumsprites += numsprites;

        if (lwcp != loadedwhich)
        {
            *lwcp++ = ',';
            *lwcp++ = ' ';
        }
        *lwcp++ = fi;
        *lwcp = 0;
    }
    quickloadboard = 0;

    if (ournumsectors==0 || ournumwalls==0 || ournumsprites==0)  // nothing loaded
    {
        clipmapinfo_init();
        return -1;
    }

    // shrink
    loadsector = Brealloc(loadsector, ournumsectors*sizeof(sectortype));
    loadwall = Brealloc(loadwall, ournumwalls*sizeof(walltype));

    Bmemcpy(sector, loadsector, ournumsectors*sizeof(sectortype));
    Bmemcpy(wall, loadwall, ournumwalls*sizeof(walltype));
    Bmemcpy(sprite, loadsprite, ournumsprites*sizeof(spritetype));
    numsectors = ournumsectors;
    numwalls = ournumwalls;

    //  vvvv    don't use headsprite[sect,stat]!   vvvv

    sectoidx = Bmalloc(numsectors*sizeof(sectoidx[0]));
    if (!sectoidx || !sector || !wall)
    {
        clipmapinfo_init();
        return 1;
    }
    for (i=0; i<numsectors; i++)
        sectoidx[i] = CM_NONE;

    // determine outer sectors
    for (i=0; i<numsectors; i++)
    {
        for (w=sector[i].wallptr; w<sector[i].wallptr+sector[i].wallnum; w++)
            if (wall[w].nextsector<0)
            {
                sectoidx[i] = CM_OUTER;
                break;
            }
    }
    // break connections between outer sectors
    for (i=0; i<numsectors; i++)
    {
        if (sectoidx[i] == CM_OUTER)
            for (w=sector[i].wallptr; w<sector[i].wallptr+sector[i].wallnum; w++)
            {
                k = wall[w].nextwall;
                if (k>=0 && sectoidx[wall[w].nextsector]==CM_OUTER)
                {
                    wall[k].nextwall = wall[k].nextsector = -1;
                    wall[w].nextwall = wall[w].nextsector = -1;
                }
            }
    }

    {
        int16_t ns, outersect;
        int32_t pn,scnt, x,y,z, maxdist;

        sectq = Bmalloc(numsectors*sizeof(sectq[0]));
        tempictoidx = Bmalloc(MAXTILES*sizeof(tempictoidx[0]));
        if (!sectq || !tempictoidx)
        {
            clipmapinfo_init();
            return 1;
        }
        for (i=0; i<MAXTILES; i++)
            tempictoidx[i]=-1;

        // collect sprite picnums
        for (i=0; i<MAXSPRITES && sprite[i].statnum<MAXSTATUS; i++)
        {
            pn = sprite[i].picnum;
            k = sprite[i].sectnum;
            //    -v-  note the <=                         ignore sprites in outer sectors
            if (pn<=0 || pn>=MAXTILES || k<0 || k>=numsectors || (sectoidx[k]&CM_OUTER))
                continue;

            if (numclipmaps >= CM_MAX)
            {
                initprintf("warning: reached max clip map number %d, not processing any more\n", CM_MAX);
                break;
            }

            // chain
            if (pictoidx[pn]>=0)
            {
                if (sectoidx[k]&CM_SOME)
                {
                    for (fi=0; fi<9; fi++)
                        if (k>=fisec[fi])
                            break;
                    initprintf("clip map %d: error: tried to chain picnum %d (sprite %d) in sector %d which"
                               " already belongs to picnum %d.\n", fi, pn, i-fispr[fi], k-fisec[fi],
                               clipinfo[sectoidx[k]].picnum);
                    clipmapinfo_init();
                    return 2;
                }

                // new one is front
                clipinfo[numclipmaps].next = pictoidx[pn];
                pictoidx[pn] = numclipmaps;
            }
            else
            {
                clipinfo[numclipmaps].next = -1;
                pictoidx[pn] = numclipmaps;
            }

            if (!CM_NOROT(i))
            {
                if (sprite[i].ang!=1536 && sprite[i].ang!=512)
                {
                    for (fi=0; fi<9; fi++)
                        if (i>=fispr[fi])
                            break;
                    initprintf("clip map %d: warning: sprite %d pointing neither northward nor southward. %s will be wrong.\n",
                               fi, i-fispr[fi], (sprite[i].cstat&48)==32 ? "Scaling and flipping" : "X-flipping");
                }
            }

            clipinfo[numclipmaps].picnum = pn;

            // collect sectors
            scnt = numclipsects;
            sectq[numclipsects++] = k;
            sectoidx[k] = numclipmaps;

            clipinfo[numclipmaps].qbeg = scnt;

            outersect = -1;

            do
            {
                k = sectq[scnt];

                for (w=sector[k].wallptr; w<sector[k].wallptr+sector[k].wallnum; w++)
                {
                    ns = wall[w].nextsector;
                    if (ns>=0)
                    {
                        if (sectoidx[ns]==CM_NONE)
                        {
                            sectoidx[ns] = numclipmaps;
                            sectq[numclipsects++] = ns;
                        }
                        else if (sectoidx[ns]&CM_OUTER)
                        {
                            if (outersect>=0 && ns!=outersect)
                            {
                                for (fi=0; fi<9; fi++)
                                    if (ns>=fisec[fi])
                                        break;
                                initprintf("clip map %d: error: encountered more than one outer sector (%d and %d)"
                                           " for sprite %d.\n", fi, outersect-fisec[fi], ns-fisec[fi], i-fispr[fi]);
                                clipmapinfo_init();
                                return 3;
                            }

                            outersect = ns;
                            sectoidx[outersect] |= numclipmaps;
                        }
                        else if (sectoidx[ns]!=numclipmaps)
                        {
                            for (fi=0; fi<9; fi++)
                                if (ns>=fisec[fi])
                                    break;
                            initprintf("clip map %d: error: encountered sector %d belonging to index %d"
                                       " while collecting sectors for sprite %d (index %d).\n",
                                       fi, ns-fisec[fi], sectoidx[ns], i-fispr[fi], numclipmaps);
                            clipmapinfo_init();
                            return 4;
                        }
                    }
                }
            }
            while (++scnt < numclipsects);

            if (outersect==-1)
            {
                initprintf("clip map: INTERNAL ERROR: outersect==-1!\n");
                clipmapinfo_init();
                return 5;
            }

            sectq[numclipsects++] = outersect;  // last is outer
            clipinfo[numclipmaps].qend = numclipsects-1;

            // normalize
            maxdist = 0;

            for (scnt=clipinfo[numclipmaps].qbeg; scnt<=clipinfo[numclipmaps].qend; scnt++)
            {
                k = sectq[scnt];

                x = sprite[i].x;
                y = sprite[i].y;
                z = sprite[i].z;

                sector[k].floorz -= z;
                sector[k].ceilingz -= z;

                if (scnt==clipinfo[numclipmaps].qbeg)
                {
                    // backup sprite tags since we'll discard sprites later
                    sector[k].CM_XREPEAT = sprite[i].xrepeat;
                    sector[k].CM_YREPEAT = sprite[i].yrepeat;
                    sector[k].CM_XOFFSET = sprite[i].xoffset;
                    sector[k].CM_YOFFSET = sprite[i].yoffset;
                    sector[k].CM_CSTAT = sprite[i].cstat;
                    sector[k].CM_ANG = sprite[i].ang;
                }

                // backup floor and ceiling z
                CM_FLOORZ(k) = sector[k].floorz;
                CM_CEILINGZ(k) = sector[k].ceilingz;

                for (w=sector[k].wallptr; w<sector[k].wallptr+sector[k].wallnum; w++)
                {
                    wall[w].x -= x;
                    wall[w].y -= y;

                    if (scnt!=clipinfo[numclipmaps].qend)
                    {
                        if (CM_NOROT(i))
                        {
                            if (klabs(wall[w].x) > maxdist)
                                maxdist = klabs(wall[w].x);
                            if (klabs(wall[w].y) > maxdist)
                                maxdist = klabs(wall[w].y);
                        }
                        else
                        {
                            int32_t tmp = ksqrt(wall[w].x*wall[w].x + wall[w].y*wall[w].y);
                            if (tmp > maxdist)
                                maxdist = tmp;
                        }
                    }

                    // aliasing
                    if (wall[w].lotag>0 || wall[w].hitag>0)
                    {
                        int32_t ii;

                        if (wall[w].lotag>0 && wall[w].hitag>0)
                        {
                            if (wall[w].lotag > wall[w].hitag)
                                swapshort(&wall[w].lotag, &wall[w].hitag);

                            for (ii=wall[w].lotag; ii<wall[w].hitag; ii++)
                                tempictoidx[ii] = numclipmaps;
                        }
                        else if (wall[w].lotag>0)
                        {
                            if (wall[w].lotag<MAXTILES)
                                tempictoidx[wall[w].lotag] = numclipmaps;
                        }
                        else
                        {
                            if (wall[w].hitag<MAXTILES)
                                tempictoidx[wall[w].hitag] = numclipmaps;
                        }
                    }

                    CM_WALL_X(w) = wall[w].x;
                    CM_WALL_Y(w) = wall[w].y;
                }
            }

            clipinfo[numclipmaps].maxdist = maxdist;
            numclipmaps++;
        }
    }

    // yes, too much copying, but better than ugly code
    Bmemcpy(loadsector, sector, ournumsectors*sizeof(sectortype));
    Bmemcpy(loadwall, wall, ournumwalls*sizeof(walltype));

    // loadwallinv will contain all walls with inverted orientation for x/y-flip handling
    loadwallinv = Bmalloc(ournumwalls*sizeof(walltype));
    if (!loadwallinv)
    {
        clipmapinfo_init();
        return 1;
    }

    {
        int32_t j, loopstart, loopend, numloopwalls;

        // invert walls!
        loopstart = 0;
        for (j=0; j<ournumwalls; j++)
        {
            wall[j].nextsector = wall[j].nextwall = -1;

            if (wall[j].point2 < j)
            {
                loopend = j+1;
                numloopwalls = loopend-loopstart;

                if (numloopwalls<3)
                {
                    loopstart = loopend;
                    continue;
                }

                for (k=0; k<numloopwalls; k++)
                {
                    wall[loopstart+k].x = loadwall[loopstart + (numloopwalls+1-k)%numloopwalls].x;
                    wall[loopstart+k].y = loadwall[loopstart + (numloopwalls+1-k)%numloopwalls].y;

                    CM_WALL_X(loopstart+k) = wall[loopstart+k].x;
                    CM_WALL_Y(loopstart+k) = wall[loopstart+k].y;
                }

                loopstart = loopend;
            }
        }

        // reconstruct wall connections
        for (i=0; i<ournumsectors; i++)
        {
            for (j=sector[i].wallptr; j<sector[i].wallptr+sector[i].wallnum; j++)
                checksectorpointer(j, i);
        }
    }
    Bmemcpy(loadwallinv, wall, ournumwalls*sizeof(walltype));

    clipmapinfo.numsectors = numsectors;
    clipmapinfo.sector = loadsector;
    clipmapinfo.numwalls = numwalls;
    clipmapinfo.wall = loadwall;

    for (i=0; i<MAXTILES; i++)
    {
        if (pictoidx[i]==-1 && tempictoidx[i]>=0)
            pictoidx[i]=tempictoidx[i];
    }

    Bfree(loadsprite); loadsprite=NULL;
    Bfree(tempictoidx); tempictoidx=NULL;

    // don't let other code be distracted by the temporary map we constructed
    numsectors = 0;
    numwalls = 0;
    initspritelists();

    initprintf("Loaded clip map%s %s.\n", lwcp==loadedwhich+1?"":"s", loadedwhich);

    return 0;
}
#endif
////// //////

#define WALLS_ARE_CONSISTENT(k) ((wall[k].x == x2 && wall[k].y == y2)   \
                                 && ((wall[wall[k].point2]).x == x1 && (wall[wall[k].point2]).y == y1))


static int32_t getscore(int32_t w1c, int32_t w1f, int32_t w2c, int32_t w2f)
{
    int32_t minflor, maxceil;

    if (w1c > w1f)
        swaplong(&w1c, &w1f);
    if (w2c > w2f)
        swaplong(&w2c, &w2f);

    // now: c <= f for each "wall-vline"

    maxceil = max(w1c, w2c);
    minflor = min(w1f, w2f);

    return minflor-maxceil;
}

const int16_t *chsecptr_onextwall = NULL;

int32_t checksectorpointer(int16_t i, int16_t sectnum)
{
    int32_t startsec, endsec;
    int32_t j, k, startwall, endwall, x1, y1, x2, y2, numnewwalls=0;
    int32_t bestnextwall=-1, bestnextsec=-1, bestwallscore=INT32_MIN;
    int32_t cz[4], fz[4], tmp[2], tmpscore=0;
#ifdef YAX_ENABLE
    int16_t cb[2], fb[2];
#endif

#if 0
    if (checksectorpointer_warn && (i<0 || i>=max(numwalls,newnumwalls)))
    {
        char buf[128];
        Bsprintf(buf, "WARN: checksectorpointer called with i=%d but (new)numwalls=%d", i, max(numwalls,newnumwalls));
        OSD_Printf("%s\n", buf);
        printmessage16("%s", buf);
        return 0;
    }
#endif

    x1 = wall[i].x;
    y1 = wall[i].y;
    x2 = (wall[wall[i].point2]).x;
    y2 = (wall[wall[i].point2]).y;

    k = wall[i].nextwall;
    if (k >= 0)          //Check for early exit
    {
        if (WALLS_ARE_CONSISTENT(k))
            return 0;

        wall[k].nextwall = wall[k].nextsector = -1;
    }

    if ((unsigned)wall[i].nextsector < (unsigned)numsectors && wall[i].nextwall < 0)
    {
        // if we have a nextsector but no nextwall, take this as a hint
        // to search only the walls of that sector
        startsec = wall[i].nextsector;
        endsec = startsec+1;
    }
    else
    {
        startsec = 0;
        endsec = numsectors;
    }

    wall[i].nextsector = wall[i].nextwall = -1;

    if (chsecptr_onextwall && (k=chsecptr_onextwall[i])>=0 && wall[k].nextwall<0)
    {
        // old next wall found
        if (WALLS_ARE_CONSISTENT(k))
        {
            j = sectorofwall(k);

            wall[i].nextsector = j;
            wall[i].nextwall = k;
            wall[k].nextsector = sectnum;
            wall[k].nextwall = i;

            return 1;
        }
    }

    for (j=startsec; j<endsec; j++)
    {
        if (j == sectnum)
            continue;

        YAX_SKIPSECTOR(j);

        startwall = sector[j].wallptr;
        endwall = startwall + sector[j].wallnum;
        for (k=startwall; k<endwall; k++)
        {
            if (!WALLS_ARE_CONSISTENT(k))
                continue;

            // Don't create link if the other side is connected to another wall.
            // The nextwall relation should be definitely one-to-one at all times!
            if (wall[k].nextwall>=0 && wall[k].nextwall != i)
                continue;
#ifdef YAX_ENABLE
            yax_getbunches(sectnum, &cb[0], &fb[0]);
            yax_getbunches(j, &cb[1], &fb[1]);

            if ((cb[0]>=0 && cb[0]==cb[1]) || (fb[0]>=0 && fb[0]==fb[1]))
            {
                tmpscore = INT32_MAX;
            }
            else
#endif
            {
                getzsofslope(sectnum, x1,y1, &cz[0],&fz[0]);
                getzsofslope(sectnum, x2,y2, &cz[1],&fz[1]);
                getzsofslope(j, x1,y1, &cz[2],&fz[2]);
                getzsofslope(j, x2,y2, &cz[3],&fz[3]);

                tmp[0] = getscore(cz[0],fz[0], cz[2],fz[2]);
                tmp[1] = getscore(cz[1],fz[1], cz[3],fz[3]);

                if ((tmp[0]^tmp[1]) >= 0)
                    tmpscore = tmp[0]+tmp[1];
                else
                    tmpscore = max(tmp[0], tmp[1]);
            }

            if (bestnextwall == -1 || tmpscore > bestwallscore)
            {
                bestwallscore = tmpscore;
                bestnextwall = k;
                bestnextsec = j;
            }

            numnewwalls++;
        }
    }

    // sectnum -2 means dry run
    if (bestnextwall >= 0 && sectnum!=-2)
#ifdef YAX_ENABLE
        // for walls with TROR neighbors, be conservative in case if score <=0
        // (meaning that no wall area is mutually visible) -- it could be that
        // another sector is a better candidate later on
        if ((yax_getnextwall(i, 0)<0 && yax_getnextwall(i, 1)<0) || bestwallscore>0)
#endif
        {
//    initprintf("w%d new nw=%d (score %d)\n", i, bestnextwall, bestwallscore)
            wall[i].nextsector = bestnextsec;
            wall[i].nextwall = bestnextwall;
            wall[bestnextwall].nextsector = sectnum;
            wall[bestnextwall].nextwall = i;
        }

    return numnewwalls;
}

#undef WALLS_ARE_CONSISTENT


#if defined(_MSC_VER) && !defined(NOASM)

//
// Microsoft C Inline Assembly Routines
//

static inline int32_t nsqrtasm(int32_t a)
{
    _asm
    {
        push ebx
        mov eax, a
        test eax, 0xff000000
        mov ebx, eax
        jnz short over24
        shr ebx, 12
        mov cx, word ptr shlookup[ebx*2]
        jmp short under24
        over24:
        shr ebx, 24
        mov cx, word ptr shlookup[ebx*2+8192]
        under24:
        shr eax, cl
        mov cl, ch
        mov ax, word ptr sqrtable[eax*2]
        shr eax, cl
        pop ebx
    }
}

static inline int32_t msqrtasm(int32_t c)
{
    _asm
    {
        push ebx
        mov ecx, c
        mov eax, 0x40000000
        mov ebx, 0x20000000
        begit:
        cmp ecx, eax
        jl skip
        sub ecx, eax
        lea eax, [eax+ebx*4]
        skip:
        sub eax, ebx
        shr eax, 1
        shr ebx, 2
        jnz begit
        cmp ecx, eax
        sbb eax, -1
        shr eax, 1
        pop ebx
    }
}

//0x007ff000 is (11<<13), 0x3f800000 is (127<<23)
static inline int32_t krecipasm(int32_t a)
{
    _asm
    {
        push ebx
        mov eax, a
        mov fpuasm, eax
        fild dword ptr fpuasm
        add eax, eax
        fstp dword ptr fpuasm
        sbb ebx, ebx
        mov eax, fpuasm
        mov ecx, eax
        and eax, 0x007ff000
        shr eax, 10
        sub ecx, 0x3f800000
        shr ecx, 23
        mov eax, dword ptr reciptable[eax]
        sar eax, cl
        xor eax, ebx
        pop ebx
    }
}

static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d)
{
    _asm
    {
        push ebx
        mov eax, a
        mov ebx, b
        mov ecx, c
        mov edx, d
        sar eax, 31
        add ebx, ebx
        adc eax, eax
        add ecx, ecx
        adc eax, eax
        add edx, edx
        adc eax, eax
        mov ebx, eax
        shl ebx, 4
        or al, 0xf0
        xor eax, ebx
        pop ebx
    }
}

static inline int32_t getkensmessagecrc(void *b)
{
    _asm
    {
        push ebx
        mov ebx, b
        xor eax, eax
        mov ecx, 32
        beg:
        mov edx, dword ptr [ebx+ecx*4-4]
        ror edx, cl
        adc eax, edx
        bswap eax
        loop short beg
        pop ebx
    }
}

#elif defined(__GNUC__) && defined(__i386__) && !defined(NOASM) // _MSC_VER

//
// GCC "Inline" Assembly Routines
//

#define nsqrtasm(a) \
    ({ int32_t __r, __a=(a); \
       __asm__ __volatile__ ( \
        "testl $0xff000000, %%eax\n\t" \
        "movl %%eax, %%ebx\n\t" \
        "jnz 0f\n\t" \
        "shrl $12, %%ebx\n\t" \
        "movw "ASMSYM("shlookup")"(,%%ebx,2), %%cx\n\t" \
        "jmp 1f\n\t" \
        "0:\n\t" \
        "shrl $24, %%ebx\n\t" \
        "movw ("ASMSYM("shlookup")"+8192)(,%%ebx,2), %%cx\n\t" \
        "1:\n\t" \
        "shrl %%cl, %%eax\n\t" \
        "movb %%ch, %%cl\n\t" \
        "movw "ASMSYM("sqrtable")"(,%%eax,2), %%ax\n\t" \
        "shrl %%cl, %%eax" \
        : "=a" (__r) : "a" (__a) : "ebx", "ecx", "cc"); \
     __r; })


// edx is blown by this code somehow?!
#define msqrtasm(c) \
    ({ int32_t __r, __c=(c); \
       __asm__ __volatile__ ( \
        "movl $0x40000000, %%eax\n\t" \
        "movl $0x20000000, %%ebx\n\t" \
        "0:\n\t" \
        "cmpl %%eax, %%ecx\n\t" \
        "jl 1f\n\t" \
        "subl %%eax, %%ecx\n\t" \
        "leal (%%eax,%%ebx,4), %%eax\n\t" \
        "1:\n\t" \
        "subl %%ebx, %%eax\n\t" \
        "shrl $1, %%eax\n\t" \
        "shrl $2, %%ebx\n\t" \
        "jnz 0b\n\t" \
        "cmpl %%eax, %%ecx\n\t" \
        "sbbl $-1, %%eax\n\t" \
        "shrl $1, %%eax" \
        : "=a" (__r) : "c" (__c) : "edx","ebx", "cc"); \
     __r; })


#define krecipasm(a) \
    ({ int32_t __a=(a); \
       __asm__ __volatile__ ( \
            "movl %%eax, ("ASMSYM("fpuasm")"); fildl ("ASMSYM("fpuasm")"); " \
            "addl %%eax, %%eax; fstps ("ASMSYM("fpuasm")"); sbbl %%ebx, %%ebx; " \
            "movl ("ASMSYM("fpuasm")"), %%eax; movl %%eax, %%ecx; " \
            "andl $0x007ff000, %%eax; shrl $10, %%eax; subl $0x3f800000, %%ecx; " \
            "shrl $23, %%ecx; movl "ASMSYM("reciptable")"(%%eax), %%eax; " \
            "sarl %%cl, %%eax; xorl %%ebx, %%eax" \
        : "=a" (__a) : "a" (__a) : "ebx", "ecx", "memory", "cc"); \
     __a; })


#define getclipmask(a,b,c,d) \
    ({ int32_t __a=(a), __b=(b), __c=(c), __d=(d); \
       __asm__ __volatile__ ("sarl $31, %%eax; addl %%ebx, %%ebx; adcl %%eax, %%eax; " \
                "addl %%ecx, %%ecx; adcl %%eax, %%eax; addl %%edx, %%edx; " \
                "adcl %%eax, %%eax; movl %%eax, %%ebx; shl $4, %%ebx; " \
                "orb $0xf0, %%al; xorl %%ebx, %%eax" \
        : "=a" (__a), "=b" (__b), "=c" (__c), "=d" (__d) \
        : "a" (__a), "b" (__b), "c" (__c), "d" (__d) : "cc"); \
     __a; })



#define getkensmessagecrc(b) \
    ({ int32_t __a, __b=(b); \
       __asm__ __volatile__ ( \
        "xorl %%eax, %%eax\n\t" \
        "movl $32, %%ecx\n\t" \
        "0:\n\t" \
        "movl -4(%%ebx,%%ecx,4), %%edx\n\t" \
        "rorl %%cl, %%edx\n\t" \
        "adcl %%edx, %%eax\n\t" \
        "bswapl %%eax\n\t" \
        "loop 0b" \
        : "=a" (__a) : "b" (__b) : "ecx", "edx" \
     __a; })


#else   // __GNUC__ && __i386__

static inline uint32_t nsqrtasm(uint32_t a)
{
    // JBF 20030901: This was a damn lot simpler to reverse engineer than
    // msqrtasm was. Really, it was just like simplifying an algebra equation.
    uint16_t c;

    if (a & 0xff000000)                         // test eax, 0xff000000  /  jnz short over24
    {
        c = shlookup[(a >> 24) + 4096]; // mov ebx, eax
        // over24: shr ebx, 24
        // mov cx, word ptr shlookup[ebx*2+8192]
    }
    else
    {
        c = shlookup[a >> 12];          // mov ebx, eax
        // shr ebx, 12
        // mov cx, word ptr shlookup[ebx*2]
        // jmp short under24
    }
    a >>= c&0xff;                               // under24: shr eax, cl
    a = (a&0xffff0000)|(sqrtable[a]);   // mov ax, word ptr sqrtable[eax*2]
    a >>= ((c&0xff00) >> 8);            // mov cl, ch
    // shr eax, cl
    return a;
}

static inline int32_t msqrtasm(uint32_t c)
{
    uint32_t a,b;

    a = 0x40000000l;            // mov eax, 0x40000000
    b = 0x20000000l;            // mov ebx, 0x20000000
    do                                  // begit:
    {
        if (c >= a)             // cmp ecx, eax  /  jl skip
        {
            c -= a;             // sub ecx, eax
            a += b*4;   // lea eax, [eax+ebx*4]
        }                       // skip:
        a -= b;                 // sub eax, ebx
        a >>= 1;                // shr eax, 1
        b >>= 2;                // shr ebx, 2
    }
    while (b);                  // jnz begit
    if (c >= a)                 // cmp ecx, eax
        a++;                    // sbb eax, -1
    a >>= 1;                    // shr eax, 1
    return a;
}

static inline int32_t krecipasm(int32_t i)
{
    // Ken did this
    float f = (float)i; i = *(int32_t *)&f;
    return((reciptable[(i>>12)&2047]>>(((i-0x3f800000)>>23)&31))^(i>>31));
}


static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d)
{
    // Ken did this
    d = ((a<0)*8) + ((b<0)*4) + ((c<0)*2) + (d<0);
    return(((d<<4)^0xf0)|d);
}

inline int32_t getkensmessagecrc(int32_t b)
{
    UNREFERENCED_PARAMETER(b);
    return 0x56c764d4l;
}

#endif


int32_t xb1[MAXWALLSB];  // Polymost uses this as a temp array
static int32_t yb1[MAXWALLSB], xb2[MAXWALLSB], yb2[MAXWALLSB];
int32_t rx1[MAXWALLSB], ry1[MAXWALLSB];
static int32_t rx2[MAXWALLSB], ry2[MAXWALLSB];
int16_t p2[MAXWALLSB], thesector[MAXWALLSB];
int16_t thewall[MAXWALLSB];

int16_t bunchfirst[MAXWALLSB], bunchlast[MAXWALLSB];

static int32_t smostcnt;
static int16_t smost[MAXYSAVES];
static int32_t smoststart[MAXWALLSB];
static char smostwalltype[MAXWALLSB];
static int32_t smostwall[MAXWALLSB], smostwallcnt = -1;

int16_t maskwall[MAXWALLSB], maskwallcnt;
static int32_t spritesx[MAXSPRITESONSCREEN];
static int32_t spritesy[MAXSPRITESONSCREEN+1];
static int32_t spritesz[MAXSPRITESONSCREEN];
spritetype *tspriteptr[MAXSPRITESONSCREEN + 1];

static int16_t umost[MAXXDIM], dmost[MAXXDIM];
static int16_t bakumost[MAXXDIM], bakdmost[MAXXDIM];
static int16_t uplc[MAXXDIM], dplc[MAXXDIM];
static int16_t uwall[MAXXDIM], dwall[MAXXDIM];
static int32_t swplc[MAXXDIM], lplc[MAXXDIM];
static int32_t swall[MAXXDIM], lwall[MAXXDIM+4];
int32_t xdimen = -1, xdimenrecip, halfxdimen, xdimenscale, xdimscale;
int32_t ydimen;
static int32_t wx1, wy1, wx2, wy2;
intptr_t /*viewoffset,*/ frameoffset;

static int32_t nrx1[8], nry1[8], nrx2[8], nry2[8]; // JBF 20031206: Thanks Ken

static int32_t rxi[8], ryi[8], rzi[8], rxi2[8], ryi2[8], rzi2[8];
static int32_t xsi[8], ysi[8], horizycent;
static intptr_t *horizlookup=0, *horizlookup2=0;

int32_t globalposx, globalposy, globalposz, globalhoriz;
int16_t globalang, globalcursectnum;
int32_t globalpal, cosglobalang, singlobalang;
int32_t cosviewingrangeglobalang, sinviewingrangeglobalang;
static char *globalpalwritten;
static int32_t globaluclip, globaldclip, globvis;
int32_t globalvisibility;
static int32_t globalhisibility, globalpisibility, globalcisibility;
//char globparaceilclip, globparaflorclip;

int32_t xyaspect;
static int32_t viewingrangerecip;

intptr_t asm1, asm2, asm3, asm4, palookupoffse[4];
int32_t vplce[4], vince[4], bufplce[4];
static char globalxshift, globalyshift;
static int32_t globalxpanning, globalypanning;
int32_t globalshade, globalorientation;
int16_t globalpicnum;
static int16_t globalshiftval;
static int32_t globalzd, globalyscale;
static int32_t globalxspan, globalyspan, globalispow2=1;  // true if texture has power-of-two x and y size
static intptr_t globalbufplc;

int32_t globalx1, globaly2, globalx3, globaly3;
static int32_t globaly1, globalx2, globalzx;
static int32_t globalx, globaly, globalz;

int16_t sectorborder[256], sectorbordercnt;
static char tablesloaded = 0;
int32_t ydim16, qsetmode = 0;
int32_t startposx, startposy, startposz;
int16_t startang, startsectnum;
int16_t pointhighlight=-1, linehighlight=-1, highlightcnt=0;
#ifndef OBSOLETE_RENDMODES
static int32_t lastx[MAXYDIM];
static char *transluc;
#else
int32_t lastx[MAXYDIM];
char *transluc;
#endif

static char paletteloaded = 0;

int32_t halfxdim16, midydim16;

#define FASTPALGRIDSIZ 8
static int32_t rdist[129], gdist[129], bdist[129];
static char colhere[((FASTPALGRIDSIZ+2)*(FASTPALGRIDSIZ+2)*(FASTPALGRIDSIZ+2))>>3];
static char colhead[(FASTPALGRIDSIZ+2)*(FASTPALGRIDSIZ+2)*(FASTPALGRIDSIZ+2)];
static int32_t colnext[256];
static char coldist[8] = {0,1,2,3,4,3,2,1};
static int32_t colscan[27];

static int16_t clipnum, hitwalls[4];
const int32_t hitscangoalx = (1<<29)-1, hitscangoaly = (1<<29)-1;
#ifdef USE_OPENGL
int32_t hitallsprites = 0;
#endif

typedef struct { int32_t x1, y1, x2, y2; } linetype;
static linetype clipit[MAXCLIPNUM];
static int16_t clipsectorlist[MAXCLIPNUM], clipsectnum, origclipsectorlist[MAXCLIPNUM], origclipsectnum;
static int16_t clipspritelist[MAXCLIPNUM], clipspritenum;  // sector-like sprite clipping
static int16_t clipobjectval[MAXCLIPNUM];

typedef struct
{
    int32_t sx, sy, z;
    int16_t a, picnum;
    int8_t dashade;
    char dapalnum, dastat, pagesleft;
    int32_t cx1, cy1, cx2, cy2;
    int32_t uniqid;    //JF extension
} permfifotype;
static permfifotype permfifo[MAXPERMS];
static int32_t permhead = 0, permtail = 0;

int16_t numscans, numbunches;
static int16_t numhits;
int16_t capturecount = 0;

char vgapal16[4*256] =
{
    00,00,00,00, 42,00,00,00, 00,42,00,00, 42,42,00,00, 00,00,42,00,
    42,00,42,00, 00,21,42,00, 42,42,42,00, 21,21,21,00, 63,21,21,00,
    21,63,21,00, 63,63,21,00, 21,21,63,00, 63,21,63,00, 21,63,63,00,
    63,63,63,00
};

int16_t searchit;
int32_t searchx = -1, searchy;                          //search input
int16_t searchsector, searchwall, searchstat;     //search output

// SEARCHBOTTOMWALL:
//   When aiming at a the bottom part of a 2-sided wall whose bottom part
//   is swapped (.cstat&2), searchbottomwall equals that wall's .nextwall. In all
//   other cases (when aiming at a wall), searchbottomwall equals searchwall.
//
// SEARCHISBOTTOM:
//  When aiming at a 2-sided wall, 1 if aiming at the bottom part, 0 else
int16_t searchbottomwall, searchisbottom;

double msens = 1.0;

static char artfilename[20];
static int32_t artfil = -1, artfilnum, artfilplc;

char inpreparemirror = 0;
static int32_t mirrorsx1, mirrorsy1, mirrorsx2, mirrorsy2;

static int32_t setviewcnt = 0; // interface layers use this now
static int32_t bakframeplace[4], bakxsiz[4], bakysiz[4];
static int32_t bakwindowx1[4], bakwindowy1[4];
static int32_t bakwindowx2[4], bakwindowy2[4];
#ifdef USE_OPENGL
static int32_t bakrendmode,baktile;
#endif

int32_t totalclocklock;

char apptitle[256] = "Build Engine";

static uint8_t **basepaltableptr;
uint8_t basepalcount;
uint8_t curbasepal;

static uint32_t g_lastpalettesum = 0;
palette_t curpalette[256];                      // the current palette, unadjusted for brightness or tint
palette_t curpalettefaded[256];         // the current palette, adjusted for brightness and tint (ie. what gets sent to the card)
palette_t palfadergb = { 0,0,0,0 };
char palfadedelta = 0;



//
// Internal Engine Functions
//
//int32_t cacheresets = 0,cacheinvalidates = 0;


//
// getpalookup (internal)
//
static inline int32_t getpalookup(int32_t davis, int32_t dashade)
{
    return(min(max(dashade+(davis>>8),0),numpalookups-1));
}

static void setpalettefade_calc(uint8_t offset);

void fade_screen_black(int32_t moreopaquep)
{
    if (getrendermode() >= 3)
    {
        // we have a ~1 px horizontal line if we don't shift y by -1,
        // might need a fix in rotatesprite!
        rotatesprite_fs(0, -(1<<16), 65536<<3, 0, 0,0,4,10+16+1+((!moreopaquep)*32)+1024);  // tile 0: 64x32
    }
    else
    {
        assert(!offscreenrendering);

        begindrawing();
        {
            char *const p = (char *)frameplace;
            const char *const trans = transluc;
            const int32_t shiftamnt = ((!!moreopaquep)*8);
            const int32_t dimprod = xdim*ydim;

            int32_t i;

            for (i=0; i<dimprod; i++)
                p[i] = trans[p[i]<<shiftamnt];
        }
        enddrawing();
    }
}


// returns: 0=continue sprite collecting;
//          1=break out of sprite collecting;
int32_t engine_addtsprite(int16_t z, int16_t sectnum)
{
    spritetype *spr = &sprite[z];
#ifdef YAX_ENABLE
    int16_t cb, fb, *sortcnt;
    int32_t spheight, spzofs;

    if (g_nodraw==0)
    {
        if (numyaxbunches==0)
        {
#endif
            if (spritesortcnt >= MAXSPRITESONSCREEN)
                return 1;

            copybufbyte(spr,&tsprite[spritesortcnt],sizeof(spritetype));
            spriteext[z].tspr = (spritetype *)&tsprite[spritesortcnt];
            tsprite[spritesortcnt++].owner = z;

#ifdef YAX_ENABLE
        }
    }
    else
#ifdef YAX_ENABLE
        if (yax_nomaskpass==0)
#endif
    {
        sortcnt = &yax_spritesortcnt[yax_globallev];
        if (*sortcnt >= MAXSPRITESONSCREEN)
            return 1;

        yax_tsprite[yax_globallev][*sortcnt] = z;
        if (yax_globalbunch >= 0)
        {
            yax_tsprite[yax_globallev][*sortcnt] |= (MAXSPRITES|(MAXSPRITES<<1));
            yax_tsprfrombunch[yax_globallev][*sortcnt] = yax_globalbunch;
        }
        (*sortcnt)++;

        // now check whether the tsprite needs duplication into another level
        if ((spr->cstat&48)==32)
            return 0;

        yax_getbunches(sectnum, &cb, &fb);
        if (cb < 0 && fb < 0)
            return 0;

        spriteheightofs(z, &spheight, &spzofs, 1);

        // TODO: get*zofslope?
        if (cb>=0 && spr->z+spzofs-spheight < sector[sectnum].ceilingz)
        {
            sortcnt = &yax_spritesortcnt[yax_globallev-1];
            if (*sortcnt < MAXSPRITESONSCREEN)
            {
                yax_tsprite[yax_globallev-1][*sortcnt] = z|MAXSPRITES;
                (*sortcnt)++;
            }
        }
        if (fb>=0 && spr->z+spzofs > sector[sectnum].floorz)
        {
            sortcnt = &yax_spritesortcnt[yax_globallev+1];
            if (*sortcnt < MAXSPRITESONSCREEN)
            {
                yax_tsprite[yax_globallev+1][*sortcnt] = z|(MAXSPRITES<<1);
                (*sortcnt)++;
            }
        }
    }
#endif

    return 0;
}

//
// scansector (internal)
//
static void scansector(int16_t sectnum)
{
    walltype *wal, *wal2;
    spritetype *spr;
    int32_t xs, ys, x1, y1, x2, y2, xp1, yp1, xp2=0, yp2=0, tempint;
    int16_t z, zz, startwall, endwall, numscansbefore, scanfirst, bunchfrst;
    int16_t nextsectnum;

    if (sectnum < 0) return;

//    if (automapping) show2dsector[sectnum>>3] |= pow2char[sectnum&7];

    sectorborder[0] = sectnum, sectorbordercnt = 1;
    do
    {
        sectnum = sectorborder[--sectorbordercnt];
#ifdef YAX_ENABLE
        if (scansector_collectsprites)
#endif
        for (z=headspritesect[sectnum]; z>=0; z=nextspritesect[z])
        {
            spr = &sprite[z];
            if ((((spr->cstat&0x8000) == 0) || (showinvisibility)) &&
                    (spr->xrepeat > 0) && (spr->yrepeat > 0))
            {
                xs = spr->x-globalposx; ys = spr->y-globalposy;
                if ((spr->cstat&48) || (xs*cosglobalang+ys*singlobalang > 0))
                    if ((spr->cstat&(64+48))!=(64+16) || dmulscale6(sintable[(spr->ang+512)&2047],-xs, sintable[spr->ang&2047],-ys) > 0)
                        if (engine_addtsprite(z, sectnum))
                            break;
            }
        }

        gotsector[sectnum>>3] |= pow2char[sectnum&7];

        bunchfrst = numbunches;
        numscansbefore = numscans;

        startwall = sector[sectnum].wallptr;
        endwall = startwall + sector[sectnum].wallnum;
        scanfirst = numscans;
        for (z=startwall,wal=&wall[z]; z<endwall; z++,wal++)
        {
            nextsectnum = wal->nextsector;

            wal2 = &wall[wal->point2];
            x1 = wal->x-globalposx; y1 = wal->y-globalposy;
            x2 = wal2->x-globalposx; y2 = wal2->y-globalposy;

            if ((nextsectnum >= 0) && ((wal->cstat&32) == 0))
#ifdef YAX_ENABLE
                if (yax_nomaskpass==0 || !yax_isislandwall(z, !yax_globalcf) || (yax_nomaskdidit=1, 0))
#endif
                if ((gotsector[nextsectnum>>3]&pow2char[nextsectnum&7]) == 0)
                {
                    tempint = x1*y2-x2*y1;
                    if (((unsigned)tempint+262144) < 524288)  // BXY_MAX?
                        if (mulscale5(tempint,tempint) <= (x2-x1)*(x2-x1)+(y2-y1)*(y2-y1))
                            sectorborder[sectorbordercnt++] = nextsectnum;
                }

            if ((z == startwall) || (wall[z-1].point2 != z))
            {
                xp1 = dmulscale6(y1,cosglobalang,-x1,singlobalang);
                yp1 = dmulscale6(x1,cosviewingrangeglobalang,y1,sinviewingrangeglobalang);
            }
            else
            {
                xp1 = xp2;
                yp1 = yp2;
            }
            xp2 = dmulscale6(y2,cosglobalang,-x2,singlobalang);
            yp2 = dmulscale6(x2,cosviewingrangeglobalang,y2,sinviewingrangeglobalang);
            if ((yp1 < 256) && (yp2 < 256)) goto skipitaddwall;

            //If wall's NOT facing you
            if (dmulscale32(xp1,yp2,-xp2,yp1) >= 0) goto skipitaddwall;

            if (xp1 >= -yp1)
            {
                if ((xp1 > yp1) || (yp1 == 0)) goto skipitaddwall;
                xb1[numscans] = halfxdimen + scale(xp1,halfxdimen,yp1);
                if (xp1 >= 0) xb1[numscans]++;   //Fix for SIGNED divide
                if (xb1[numscans] >= xdimen) xb1[numscans] = xdimen-1;
                yb1[numscans] = yp1;
            }
            else
            {
                if (xp2 < -yp2) goto skipitaddwall;
                xb1[numscans] = 0;
                tempint = yp1-yp2+xp1-xp2;
                if (tempint == 0) goto skipitaddwall;
                yb1[numscans] = yp1 + scale(yp2-yp1,xp1+yp1,tempint);
            }
            if (yb1[numscans] < 256) goto skipitaddwall;

            if (xp2 <= yp2)
            {
                if ((xp2 < -yp2) || (yp2 == 0)) goto skipitaddwall;
                xb2[numscans] = halfxdimen + scale(xp2,halfxdimen,yp2) - 1;
                if (xp2 >= 0) xb2[numscans]++;   //Fix for SIGNED divide
                if (xb2[numscans] >= xdimen) xb2[numscans] = xdimen-1;
                yb2[numscans] = yp2;
            }
            else
            {
                if (xp1 > yp1) goto skipitaddwall;
                xb2[numscans] = xdimen-1;
                tempint = xp2-xp1+yp1-yp2;
                if (tempint == 0) goto skipitaddwall;
                yb2[numscans] = yp1 + scale(yp2-yp1,yp1-xp1,tempint);
            }
            if ((yb2[numscans] < 256) || (xb1[numscans] > xb2[numscans])) goto skipitaddwall;

            //Made it all the way!
            thesector[numscans] = sectnum; thewall[numscans] = z;
            rx1[numscans] = xp1; ry1[numscans] = yp1;
            rx2[numscans] = xp2; ry2[numscans] = yp2;
            p2[numscans] = numscans+1;
            numscans++;
skipitaddwall:

            if ((wall[z].point2 < z) && (scanfirst < numscans))
                p2[numscans-1] = scanfirst, scanfirst = numscans;
        }

        for (z=numscansbefore; z<numscans; z++)
            if ((wall[thewall[z]].point2 != thewall[p2[z]]) || (xb2[z] >= xb1[p2[z]]))
            {
                bunchfirst[numbunches++] = p2[z], p2[z] = -1;
#ifdef YAX_ENABLE
                if (scansector_retfast)
                    return;
#endif
            }

        for (z=bunchfrst; z<numbunches; z++)
        {
            for (zz=bunchfirst[z]; p2[zz]>=0; zz=p2[zz]);
            bunchlast[z] = zz;
        }
    }
    while (sectorbordercnt > 0);
}


//
// maskwallscan (internal)
//
static void maskwallscan(int32_t x1, int32_t x2, int16_t *uwal, int16_t *dwal, int32_t *swal, int32_t *lwal)
{
    int32_t x,/* startx,*/ xnice, ynice;
    intptr_t startx, p, fpalookup;
    int32_t y1ve[4], y2ve[4], /* p,*/ tsizx, tsizy;
#ifndef ENGINE_USING_A_C
    char bad;
    int32_t i, u4, d4, dax, z;
#endif

    tsizx = tilesizx[globalpicnum];
    tsizy = tilesizy[globalpicnum];
    setgotpic(globalpicnum);
    if ((tsizx <= 0) || (tsizy <= 0)) return;
    if ((uwal[x1] > ydimen) && (uwal[x2] > ydimen)) return;
    if ((dwal[x1] < 0) && (dwal[x2] < 0)) return;

    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);

    startx = x1;

    xnice = (pow2long[picsiz[globalpicnum]&15] == tsizx);
    if (xnice) tsizx = (tsizx-1);
    ynice = (pow2long[picsiz[globalpicnum]>>4] == tsizy);
    if (ynice) tsizy = (picsiz[globalpicnum]>>4);

    if (palookup[globalpal] == NULL)
        globalpal = 0;

    fpalookup = FP_OFF(palookup[globalpal]);

    setupmvlineasm(globalshiftval);

#ifndef ENGINE_USING_A_C

    x = startx;
    while ((startumost[x+windowx1] > startdmost[x+windowx1]) && (x <= x2)) x++;

    p = x+frameoffset;

    for (; (x<=x2)&&(p&3); x++,p++)
    {
        y1ve[0] = max(uwal[x],startumost[x+windowx1]-windowy1);
        y2ve[0] = min(dwal[x],startdmost[x+windowx1]-windowy1);
        if (y2ve[0] <= y1ve[0]) continue;

        palookupoffse[0] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x],globvis),globalshade)<<8);

        bufplce[0] = lwal[x] + globalxpanning;
        if (bufplce[0] >= tsizx) { if (xnice == 0) bufplce[0] %= tsizx; else bufplce[0] &= tsizx; }
        if (ynice == 0) bufplce[0] *= tsizy; else bufplce[0] <<= tsizy;

        vince[0] = swal[x]*globalyscale;
        vplce[0] = globalzd + vince[0]*(y1ve[0]-globalhoriz+1);

        mvlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0]+waloff[globalpicnum],p+ylookup[y1ve[0]]);
    }
    for (; x<=x2-3; x+=4,p+=4)
    {
        bad = 0;
        for (z=3,dax=x+3; z>=0; z--,dax--)
        {
            y1ve[z] = max(uwal[dax],startumost[dax+windowx1]-windowy1);
            y2ve[z] = min(dwal[dax],startdmost[dax+windowx1]-windowy1)-1;
            if (y2ve[z] < y1ve[z]) { bad += pow2char[z]; continue; }

            i = lwal[dax] + globalxpanning;
            if (i >= tsizx) { if (xnice == 0) i %= tsizx; else i &= tsizx; }
            if (ynice == 0) i *= tsizy; else i <<= tsizy;
            bufplce[z] = waloff[globalpicnum]+i;

            vince[z] = swal[dax]*globalyscale;
            vplce[z] = globalzd + vince[z]*(y1ve[z]-globalhoriz+1);
        }
        if (bad == 15) continue;

        palookupoffse[0] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x],globvis),globalshade)<<8);
        palookupoffse[3] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x+3],globvis),globalshade)<<8);

        if ((palookupoffse[0] == palookupoffse[3]) && ((bad&0x9) == 0))
        {
            palookupoffse[1] = palookupoffse[0];
            palookupoffse[2] = palookupoffse[0];
        }
        else
        {
            palookupoffse[1] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x+1],globvis),globalshade)<<8);
            palookupoffse[2] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x+2],globvis),globalshade)<<8);
        }

        u4 = max(max(y1ve[0],y1ve[1]),max(y1ve[2],y1ve[3]));
        d4 = min(min(y2ve[0],y2ve[1]),min(y2ve[2],y2ve[3]));

        if ((bad > 0) || (u4 >= d4))
        {
            if (!(bad&1)) mvlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0],vplce[0],bufplce[0],ylookup[y1ve[0]]+p+0);
            if (!(bad&2)) mvlineasm1(vince[1],palookupoffse[1],y2ve[1]-y1ve[1],vplce[1],bufplce[1],ylookup[y1ve[1]]+p+1);
            if (!(bad&4)) mvlineasm1(vince[2],palookupoffse[2],y2ve[2]-y1ve[2],vplce[2],bufplce[2],ylookup[y1ve[2]]+p+2);
            if (!(bad&8)) mvlineasm1(vince[3],palookupoffse[3],y2ve[3]-y1ve[3],vplce[3],bufplce[3],ylookup[y1ve[3]]+p+3);
            continue;
        }

        if (u4 > y1ve[0]) vplce[0] = mvlineasm1(vince[0],palookupoffse[0],u4-y1ve[0]-1,vplce[0],bufplce[0],ylookup[y1ve[0]]+p+0);
        if (u4 > y1ve[1]) vplce[1] = mvlineasm1(vince[1],palookupoffse[1],u4-y1ve[1]-1,vplce[1],bufplce[1],ylookup[y1ve[1]]+p+1);
        if (u4 > y1ve[2]) vplce[2] = mvlineasm1(vince[2],palookupoffse[2],u4-y1ve[2]-1,vplce[2],bufplce[2],ylookup[y1ve[2]]+p+2);
        if (u4 > y1ve[3]) vplce[3] = mvlineasm1(vince[3],palookupoffse[3],u4-y1ve[3]-1,vplce[3],bufplce[3],ylookup[y1ve[3]]+p+3);

        if (d4 >= u4) mvlineasm4(d4-u4+1,ylookup[u4]+p);

        i = p+ylookup[d4+1];
        if (y2ve[0] > d4) mvlineasm1(vince[0],palookupoffse[0],y2ve[0]-d4-1,vplce[0],bufplce[0],i+0);
        if (y2ve[1] > d4) mvlineasm1(vince[1],palookupoffse[1],y2ve[1]-d4-1,vplce[1],bufplce[1],i+1);
        if (y2ve[2] > d4) mvlineasm1(vince[2],palookupoffse[2],y2ve[2]-d4-1,vplce[2],bufplce[2],i+2);
        if (y2ve[3] > d4) mvlineasm1(vince[3],palookupoffse[3],y2ve[3]-d4-1,vplce[3],bufplce[3],i+3);
    }
    for (; x<=x2; x++,p++)
    {
        y1ve[0] = max(uwal[x],startumost[x+windowx1]-windowy1);
        y2ve[0] = min(dwal[x],startdmost[x+windowx1]-windowy1);
        if (y2ve[0] <= y1ve[0]) continue;

        palookupoffse[0] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x],globvis),globalshade)<<8);

        bufplce[0] = lwal[x] + globalxpanning;
        if (bufplce[0] >= tsizx) { if (xnice == 0) bufplce[0] %= tsizx; else bufplce[0] &= tsizx; }
        if (ynice == 0) bufplce[0] *= tsizy; else bufplce[0] <<= tsizy;

        vince[0] = swal[x]*globalyscale;
        vplce[0] = globalzd + vince[0]*(y1ve[0]-globalhoriz+1);

        mvlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0]+waloff[globalpicnum],p+ylookup[y1ve[0]]);
    }

#else   // ENGINE_USING_A_C

    p = startx+frameoffset;
    for (x=startx; x<=x2; x++,p++)
    {
        y1ve[0] = max(uwal[x],startumost[x+windowx1]-windowy1);
        y2ve[0] = min(dwal[x],startdmost[x+windowx1]-windowy1);
        if (y2ve[0] <= y1ve[0]) continue;

        palookupoffse[0] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x],globvis),globalshade)<<8);

        bufplce[0] = lwal[x] + globalxpanning;
        if (bufplce[0] >= tsizx) { if (xnice == 0) bufplce[0] %= tsizx; else bufplce[0] &= tsizx; }
        if (ynice == 0) bufplce[0] *= tsizy; else bufplce[0] <<= tsizy;

        vince[0] = swal[x]*globalyscale;
        vplce[0] = globalzd + vince[0]*(y1ve[0]-globalhoriz+1);

        mvlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0]+waloff[globalpicnum],p+ylookup[y1ve[0]]);
    }

#endif

    faketimerhandler();
}


//
// wallfront (internal)
//
int32_t wallfront(int32_t l1, int32_t l2)
{
    walltype *wal;
    int32_t x11, y11, x21, y21, x12, y12, x22, y22, dx, dy, t1, t2;

    wal = &wall[thewall[l1]]; x11 = wal->x; y11 = wal->y;
    wal = &wall[wal->point2]; x21 = wal->x; y21 = wal->y;
    wal = &wall[thewall[l2]]; x12 = wal->x; y12 = wal->y;
    wal = &wall[wal->point2]; x22 = wal->x; y22 = wal->y;

    dx = x21-x11; dy = y21-y11;
    t1 = dmulscale2(x12-x11,dy,-dx,y12-y11); //p1(l2) vs. l1
    t2 = dmulscale2(x22-x11,dy,-dx,y22-y11); //p2(l2) vs. l1
    if (t1 == 0) { t1 = t2; if (t1 == 0) return(-1); }
    if (t2 == 0) t2 = t1;
    if ((t1^t2) >= 0)
    {
        t2 = dmulscale2(globalposx-x11,dy,-dx,globalposy-y11); //pos vs. l1
        return((t2^t1) >= 0);
    }

    dx = x22-x12; dy = y22-y12;
    t1 = dmulscale2(x11-x12,dy,-dx,y11-y12); //p1(l1) vs. l2
    t2 = dmulscale2(x21-x12,dy,-dx,y21-y12); //p2(l1) vs. l2
    if (t1 == 0) { t1 = t2; if (t1 == 0) return(-1); }
    if (t2 == 0) t2 = t1;
    if ((t1^t2) >= 0)
    {
        t2 = dmulscale2(globalposx-x12,dy,-dx,globalposy-y12); //pos vs. l2
        return((t2^t1) < 0);
    }
    return(-2);
}


//
// spritewallfront (internal)
//
static inline int32_t spritewallfront(spritetype *s, int32_t w)
{
    walltype *wal;
    int32_t x1, y1;

    wal = &wall[w]; x1 = wal->x; y1 = wal->y;
    wal = &wall[wal->point2];
    return (dmulscale32(wal->x-x1,s->y-y1,-(s->x-x1),wal->y-y1) >= 0);
}

//
//  spritebehindwall(internal)
//
#if 0
static int32_t spriteobstructswall(spritetype *s, int32_t w)
{
    walltype *wal;
    int32_t x, y;
    int32_t x1, y1;
    int32_t x2, y2;
    double a1, b1, c1;
    double a2, b2, c2;
    double d1, d2;

    // wall line equation
    wal = &wall[w]; x1 = wal->x - globalposx; y1 = wal->y - globalposy;
    wal = &wall[wal->point2]; x2 = wal->x - globalposx; y2 = wal->y - globalposy;
    if ((x2 - x1) != 0)
        a1 = (float)(y2 - y1)/(x2 - x1);
    else
        a1 = 1e+37; // not infinite, but almost ;)
    b1 = -1;
    c1 = (y1 - (a1 * x1));

    // player to sprite line equation
    if ((s->x - globalposx) != 0)
        a2 = (float)(s->y - globalposy)/(s->x - globalposx);
    else
        a2 = 1e+37;
    b2 = -1;
    c2 = 0;

    // intersection point
    d1 = (float)(1) / (a1*b2 - a2*b1);
    x = ((b1*c2 - b2*c1) * d1);
    y = ((a2*c1 - a1*c2) * d1);

    // distance between the sprite and the player
    a1 = s->x - globalposx;
    b1 = s->y - globalposy;
    d1 = (a1 * a1 + b1 * b1);

    // distance between the intersection point and the player
    d2 = (x * x + y * y);

    // check if the sprite obstructs the wall
    if ((d1 < d2) && (min(x1, x2) <= x) && (x <= max(x1, x2)) && (min(y1, y2) <= y) && (y <= max(y1, y2)))
        return (1);
    else
        return (0);
}
#endif
//
// bunchfront (internal)
//
static inline int32_t bunchfront(int32_t b1, int32_t b2)
{
    int32_t x1b1, x2b1, x1b2, x2b2, b1f, b2f, i;

    b1f = bunchfirst[b1]; x1b1 = xb1[b1f]; x2b2 = xb2[bunchlast[b2]]+1;
    if (x1b1 >= x2b2) return(-1);
    b2f = bunchfirst[b2]; x1b2 = xb1[b2f]; x2b1 = xb2[bunchlast[b1]]+1;
    if (x1b2 >= x2b1) return(-1);

    if (x1b1 >= x1b2)
    {
        for (i=b2f; xb2[i]<x1b1; i=p2[i]);
        return(wallfront(b1f,i));
    }
    for (i=b1f; xb2[i]<x1b2; i=p2[i]);
    return(wallfront(i,b2f));
}


//
// hline (internal)
//
static inline void hline(int32_t xr, int32_t yp)
{
    int32_t xl, r, s;

    xl = lastx[yp]; if (xl > xr) return;
    r = horizlookup2[yp-globalhoriz+horizycent];
    asm1 = globalx1*r;
    asm2 = globaly2*r;
    s = ((int32_t)getpalookup((int32_t)mulscale16(r,globvis),globalshade)<<8);

    hlineasm4(xr-xl,0,s,globalx2*r+globalypanning,globaly1*r+globalxpanning,
              ylookup[yp]+xr+frameoffset);
}


//
// slowhline (internal)
//
static inline void slowhline(int32_t xr, int32_t yp)
{
    int32_t xl, r;

    xl = lastx[yp]; if (xl > xr) return;
    r = horizlookup2[yp-globalhoriz+horizycent];
    asm1 = globalx1*r;
    asm2 = globaly2*r;

    asm3 = (intptr_t)globalpalwritten + ((intptr_t)getpalookup((int32_t)mulscale16(r,globvis),globalshade)<<8);
    if (!(globalorientation&256))
    {
        mhline(globalbufplc,globaly1*r+globalxpanning-asm1*(xr-xl),(xr-xl)<<16,0L,
               globalx2*r+globalypanning-asm2*(xr-xl),ylookup[yp]+xl+frameoffset);
        return;
    }
    thline(globalbufplc,globaly1*r+globalxpanning-asm1*(xr-xl),(xr-xl)<<16,0L,
           globalx2*r+globalypanning-asm2*(xr-xl),ylookup[yp]+xl+frameoffset);
}


//
// prepwall (internal)
//
static void prepwall(int32_t z, walltype *wal)
{
    int32_t i, l=0, ol=0, splc, sinc, x, topinc, top, botinc, bot, walxrepeat;

    walxrepeat = (wal->xrepeat<<3);

    //lwall calculation
    i = xb1[z]-halfxdimen;
    topinc = -(ry1[z]>>2);
    botinc = ((ry2[z]-ry1[z])>>8);
    top = mulscale5(rx1[z],xdimen)+mulscale2(topinc,i);
    bot = mulscale11(rx1[z]-rx2[z],xdimen)+mulscale2(botinc,i);

    splc = mulscale19(ry1[z],xdimscale);
    sinc = mulscale16(ry2[z]-ry1[z],xdimscale);

    x = xb1[z];
    if (bot != 0)
    {
        l = divscale12(top,bot);
        swall[x] = mulscale21(l,sinc)+splc;
        l *= walxrepeat;
        lwall[x] = (l>>18);
    }
    while (x+4 <= xb2[z])
    {
        top += topinc; bot += botinc;
        if (bot != 0)
        {
            ol = l; l = divscale12(top,bot);
            swall[x+4] = mulscale21(l,sinc)+splc;
            l *= walxrepeat;
            lwall[x+4] = (l>>18);
        }
        i = ((ol+l)>>1);
        lwall[x+2] = (i>>18);
        lwall[x+1] = ((ol+i)>>19);
        lwall[x+3] = ((l+i)>>19);
        swall[x+2] = ((swall[x]+swall[x+4])>>1);
        swall[x+1] = ((swall[x]+swall[x+2])>>1);
        swall[x+3] = ((swall[x+4]+swall[x+2])>>1);
        x += 4;
    }
    if (x+2 <= xb2[z])
    {
        top += (topinc>>1); bot += (botinc>>1);
        if (bot != 0)
        {
            ol = l; l = divscale12(top,bot);
            swall[x+2] = mulscale21(l,sinc)+splc;
            l *= walxrepeat;
            lwall[x+2] = (l>>18);
        }
        lwall[x+1] = ((l+ol)>>19);
        swall[x+1] = ((swall[x]+swall[x+2])>>1);
        x += 2;
    }
    if (x+1 <= xb2[z])
    {
        bot += (botinc>>2);
        if (bot != 0)
        {
            l = divscale12(top+(topinc>>2),bot);
            swall[x+1] = mulscale21(l,sinc)+splc;
            lwall[x+1] = mulscale18(l,walxrepeat);
        }
    }

    if (lwall[xb1[z]] < 0) lwall[xb1[z]] = 0;
    if ((lwall[xb2[z]] >= walxrepeat) && (walxrepeat)) lwall[xb2[z]] = walxrepeat-1;
    if (wal->cstat&8)
    {
        walxrepeat--;
        for (x=xb1[z]; x<=xb2[z]; x++) lwall[x] = walxrepeat-lwall[x];
    }
}


//
// animateoffs (internal)
//
inline int32_t animateoffs(int16_t tilenum, int16_t fakevar)
{
    int32_t i, k, offs;

    UNREFERENCED_PARAMETER(fakevar);

    offs = 0;
    i = (totalclocklock>>((picanm[tilenum]>>24)&15));
    if ((picanm[tilenum]&63) > 0)
    {
        switch (picanm[tilenum]&192)
        {
        case 64:
            k = (i%((picanm[tilenum]&63)<<1));
            if (k < (picanm[tilenum]&63))
                offs = k;
            else
                offs = (((picanm[tilenum]&63)<<1)-k);
            break;
        case 128:
            offs = (i%((picanm[tilenum]&63)+1));
            break;
        case 192:
            offs = -(i%((picanm[tilenum]&63)+1));
        }
    }
    return(offs);
}


//
// owallmost (internal)
//
static int32_t owallmost(int16_t *mostbuf, int32_t w, int32_t z)
{
    int32_t bad, inty, xcross, y, yinc;
    int32_t s1, s2, s3, s4, ix1, ix2, iy1, iy2, t;
    int32_t i, tmp;

    z <<= 7;
    s1 = mulscale20(globaluclip,yb1[w]); s2 = mulscale20(globaluclip,yb2[w]);
    s3 = mulscale20(globaldclip,yb1[w]); s4 = mulscale20(globaldclip,yb2[w]);
    bad = (z<s1)+((z<s2)<<1)+((z>s3)<<2)+((z>s4)<<3);

    ix1 = xb1[w]; iy1 = yb1[w];
    ix2 = xb2[w]; iy2 = yb2[w];

    if ((bad&3) == 3)
    {
        //clearbufbyte(&mostbuf[ix1],(ix2-ix1+1)*sizeof(mostbuf[0]),0L);
        for (i=ix1; i<=ix2; i++) mostbuf[i] = 0;
        return(bad);
    }

    if ((bad&12) == 12)
    {
        //clearbufbyte(&mostbuf[ix1],(ix2-ix1+1)*sizeof(mostbuf[0]),ydimen+(ydimen<<16));
        for (i=ix1; i<=ix2; i++) mostbuf[i] = ydimen;
        return(bad);
    }

    if (bad&3)
    {
        t = divscale30(z-s1,s2-s1);
        inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        xcross = xb1[w] + scale(mulscale30(yb2[w],t),xb2[w]-xb1[w],inty);

        if ((bad&3) == 2)
        {
            if (xb1[w] <= xcross) { iy2 = inty; ix2 = xcross; }
            //clearbufbyte(&mostbuf[xcross+1],(xb2[w]-xcross)*sizeof(mostbuf[0]),0L);
            for (i=xcross+1; i<=xb2[w]; i++) mostbuf[i] = 0;
        }
        else
        {
            if (xcross <= xb2[w]) { iy1 = inty; ix1 = xcross; }
            //clearbufbyte(&mostbuf[xb1[w]],(xcross-xb1[w]+1)*sizeof(mostbuf[0]),0L);
            for (i=xb1[w]; i<=xcross; i++) mostbuf[i] = 0;
        }
    }

    if (bad&12)
    {
        t = divscale30(z-s3,s4-s3);
        inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        xcross = xb1[w] + scale(mulscale30(yb2[w],t),xb2[w]-xb1[w],inty);

        if ((bad&12) == 8)
        {
            if (xb1[w] <= xcross) { iy2 = inty; ix2 = xcross; }
            //clearbufbyte(&mostbuf[xcross+1],(xb2[w]-xcross)*sizeof(mostbuf[0]),ydimen+(ydimen<<16));
            for (i=xcross+1; i<=xb2[w]; i++) mostbuf[i] = ydimen;
        }
        else
        {
            if (xcross <= xb2[w]) { iy1 = inty; ix1 = xcross; }
            //clearbufbyte(&mostbuf[xb1[w]],(xcross-xb1[w]+1)*sizeof(mostbuf[0]),ydimen+(ydimen<<16));
            for (i=xb1[w]; i<=xcross; i++) mostbuf[i] = ydimen;
        }
    }

    y = (scale(z,xdimenscale,iy1)<<4);

//// enable for paranoia:
//    ix1 = clamp(ix1, 0, xres-1);
//    ix2 = clamp(ix2, 0, xres-1);
//    if (ix2-ix1 < 0)
//        swaplong(&ix1, &ix2);

    // PK 20110423: a bit consistency checking is a good thing:
    tmp = (ix2-ix1 >= 0) ? (ix2-ix1+1) : 1;
    yinc = ((scale(z,xdimenscale,iy2)<<4)-y) / tmp;
    qinterpolatedown16short((intptr_t)&mostbuf[ix1],tmp,y+(globalhoriz<<16),yinc);

    if (mostbuf[ix1] < 0) mostbuf[ix1] = 0;
    if (mostbuf[ix1] > ydimen) mostbuf[ix1] = ydimen;
    if (mostbuf[ix2] < 0) mostbuf[ix2] = 0;
    if (mostbuf[ix2] > ydimen) mostbuf[ix2] = ydimen;

    return(bad);
}


//
// wallmost (internal)
//
static int32_t wallmost(int16_t *mostbuf, int32_t w, int32_t sectnum, char dastat)
{
    int32_t bad, i, j, t, y, z, inty, intz, xcross, yinc, fw;
    int32_t x1, y1, z1, x2, y2, z2, xv, yv, dx, dy, dasqr, oz1, oz2;
    int32_t s1, s2, s3, s4, ix1, ix2, iy1, iy2;
    int32_t tmp;
    //char datempbuf[256];

    if (dastat == 0)
    {
        z = sector[sectnum].ceilingz-globalposz;
        if ((sector[sectnum].ceilingstat&2) == 0) return(owallmost(mostbuf,w,z));
    }
    else
    {
        z = sector[sectnum].floorz-globalposz;
        if ((sector[sectnum].floorstat&2) == 0) return(owallmost(mostbuf,w,z));
    }

    i = thewall[w];
    if (i == sector[sectnum].wallptr) return(owallmost(mostbuf,w,z));

    x1 = wall[i].x; x2 = wall[wall[i].point2].x-x1;
    y1 = wall[i].y; y2 = wall[wall[i].point2].y-y1;

    fw = sector[sectnum].wallptr; i = wall[fw].point2;
    dx = wall[i].x-wall[fw].x; dy = wall[i].y-wall[fw].y;
    dasqr = krecipasm(nsqrtasm(dx*dx+dy*dy));

    if (xb1[w] == 0)
        { xv = cosglobalang+sinviewingrangeglobalang; yv = singlobalang-cosviewingrangeglobalang; }
    else
        { xv = x1-globalposx; yv = y1-globalposy; }
    i = xv*(y1-globalposy)-yv*(x1-globalposx); j = yv*x2-xv*y2;
    if (klabs(j) > klabs(i>>3)) i = divscale28(i,j);
    if (dastat == 0)
    {
        t = mulscale15(sector[sectnum].ceilingheinum,dasqr);
        z1 = sector[sectnum].ceilingz;
    }
    else
    {
        t = mulscale15(sector[sectnum].floorheinum,dasqr);
        z1 = sector[sectnum].floorz;
    }
    z1 = dmulscale24(dx*t,mulscale20(y2,i)+((y1-wall[fw].y)<<8),
                     -dy*t,mulscale20(x2,i)+((x1-wall[fw].x)<<8))+((z1-globalposz)<<7);


    if (xb2[w] == xdimen-1)
        { xv = cosglobalang-sinviewingrangeglobalang; yv = singlobalang+cosviewingrangeglobalang; }
    else
        { xv = (x2+x1)-globalposx; yv = (y2+y1)-globalposy; }
    i = xv*(y1-globalposy)-yv*(x1-globalposx); j = yv*x2-xv*y2;
    if (klabs(j) > klabs(i>>3)) i = divscale28(i,j);
    if (dastat == 0)
    {
        t = mulscale15(sector[sectnum].ceilingheinum,dasqr);
        z2 = sector[sectnum].ceilingz;
    }
    else
    {
        t = mulscale15(sector[sectnum].floorheinum,dasqr);
        z2 = sector[sectnum].floorz;
    }
    z2 = dmulscale24(dx*t,mulscale20(y2,i)+((y1-wall[fw].y)<<8),
                     -dy*t,mulscale20(x2,i)+((x1-wall[fw].x)<<8))+((z2-globalposz)<<7);


    s1 = mulscale20(globaluclip,yb1[w]); s2 = mulscale20(globaluclip,yb2[w]);
    s3 = mulscale20(globaldclip,yb1[w]); s4 = mulscale20(globaldclip,yb2[w]);
    bad = (z1<s1)+((z2<s2)<<1)+((z1>s3)<<2)+((z2>s4)<<3);

    ix1 = xb1[w]; ix2 = xb2[w];
    iy1 = yb1[w]; iy2 = yb2[w];
    oz1 = z1; oz2 = z2;

    if ((bad&3) == 3)
    {
        //clearbufbyte(&mostbuf[ix1],(ix2-ix1+1)*sizeof(mostbuf[0]),0L);
        for (i=ix1; i<=ix2; i++) mostbuf[i] = 0;
        return(bad);
    }

    if ((bad&12) == 12)
    {
        //clearbufbyte(&mostbuf[ix1],(ix2-ix1+1)*sizeof(mostbuf[0]),ydimen+(ydimen<<16));
        for (i=ix1; i<=ix2; i++) mostbuf[i] = ydimen;
        return(bad);
    }

    if (bad&3)
    {
        //inty = intz / (globaluclip>>16)
        t = divscale30(oz1-s1,s2-s1+oz1-oz2);
        inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        intz = oz1 + mulscale30(oz2-oz1,t);
        xcross = xb1[w] + scale(mulscale30(yb2[w],t),xb2[w]-xb1[w],inty);

        //t = divscale30((x1<<4)-xcross*yb1[w],xcross*(yb2[w]-yb1[w])-((x2-x1)<<4));
        //inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        //intz = z1 + mulscale30(z2-z1,t);

        if ((bad&3) == 2)
        {
            if (xb1[w] <= xcross) { z2 = intz; iy2 = inty; ix2 = xcross; }
            //clearbufbyte(&mostbuf[xcross+1],(xb2[w]-xcross)*sizeof(mostbuf[0]),0L);
            for (i=xcross+1; i<=xb2[w]; i++) mostbuf[i] = 0;
        }
        else
        {
            if (xcross <= xb2[w]) { z1 = intz; iy1 = inty; ix1 = xcross; }
            //clearbufbyte(&mostbuf[xb1[w]],(xcross-xb1[w]+1)*sizeof(mostbuf[0]),0L);
            for (i=xb1[w]; i<=xcross; i++) mostbuf[i] = 0;
        }
    }

    if (bad&12)
    {
        //inty = intz / (globaldclip>>16)
        t = divscale30(oz1-s3,s4-s3+oz1-oz2);
        inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        intz = oz1 + mulscale30(oz2-oz1,t);
        xcross = xb1[w] + scale(mulscale30(yb2[w],t),xb2[w]-xb1[w],inty);

        //t = divscale30((x1<<4)-xcross*yb1[w],xcross*(yb2[w]-yb1[w])-((x2-x1)<<4));
        //inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        //intz = z1 + mulscale30(z2-z1,t);

        if ((bad&12) == 8)
        {
            if (xb1[w] <= xcross) { z2 = intz; iy2 = inty; ix2 = xcross; }
            //clearbufbyte(&mostbuf[xcross+1],(xb2[w]-xcross)*sizeof(mostbuf[0]),ydimen+(ydimen<<16));
            for (i=xcross+1; i<=xb2[w]; i++) mostbuf[i] = ydimen;
        }
        else
        {
            if (xcross <= xb2[w]) { z1 = intz; iy1 = inty; ix1 = xcross; }
            //clearbufbyte(&mostbuf[xb1[w]],(xcross-xb1[w]+1)*sizeof(mostbuf[0]),ydimen+(ydimen<<16));
            for (i=xb1[w]; i<=xcross; i++) mostbuf[i] = ydimen;
        }
    }

    y = (scale(z1,xdimenscale,iy1)<<4);

//// enable for paranoia:
//    ix1 = clamp(ix1, 0, xres-1);
//    ix2 = clamp(ix2, 0, xres-1);
//    if (ix2-ix1 < 0)
//        swaplong(&ix1, &ix2);

    // PK 20110423: a bit consistency checking is a good thing:
    tmp = (ix2-ix1 >= 0) ? (ix2-ix1+1) : 1;
    yinc = ((scale(z2,xdimenscale,iy2)<<4)-y) / tmp;
    qinterpolatedown16short((intptr_t)&mostbuf[ix1],tmp,y+(globalhoriz<<16),yinc);

    if (mostbuf[ix1] < 0) mostbuf[ix1] = 0;
    if (mostbuf[ix1] > ydimen) mostbuf[ix1] = ydimen;
    if (mostbuf[ix2] < 0) mostbuf[ix2] = 0;
    if (mostbuf[ix2] > ydimen) mostbuf[ix2] = ydimen;

    return(bad);
}


//
// ceilscan (internal)
//
static void ceilscan(int32_t x1, int32_t x2, int32_t sectnum)
{
    int32_t i, j, ox, oy, x, y1, y2, twall, bwall;
    sectortype *sec;

    sec = &sector[sectnum];
    if (palookup[sec->ceilingpal] != globalpalwritten)
    {
        globalpalwritten = palookup[sec->ceilingpal];
        if (!globalpalwritten) globalpalwritten = palookup[globalpal];  // JBF: fixes null-pointer crash
        setpalookupaddress(globalpalwritten);
    }

    globalzd = sec->ceilingz-globalposz;
    if (globalzd > 0) return;
    globalpicnum = sec->ceilingpicnum;
    if ((unsigned)globalpicnum >= (unsigned)MAXTILES) globalpicnum = 0;
    setgotpic(globalpicnum);
    if ((tilesizx[globalpicnum] <= 0) || (tilesizy[globalpicnum] <= 0)) return;
    if (picanm[globalpicnum]&192) globalpicnum += animateoffs((int16_t)globalpicnum,(int16_t)sectnum);

    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);
    globalbufplc = waloff[globalpicnum];

    globalshade = (int32_t)sec->ceilingshade;
    globvis = globalcisibility;
    if (sec->visibility != 0) globvis = mulscale4(globvis,(int32_t)((uint8_t)(sec->visibility+16)));
    globalorientation = (int32_t)sec->ceilingstat;


    if ((globalorientation&64) == 0)
    {
        globalx1 = singlobalang; globalx2 = singlobalang;
        globaly1 = cosglobalang; globaly2 = cosglobalang;
        globalxpanning = (globalposx<<20);
        globalypanning = -(globalposy<<20);
    }
    else
    {
        j = sec->wallptr;
        ox = wall[wall[j].point2].x - wall[j].x;
        oy = wall[wall[j].point2].y - wall[j].y;
        i = nsqrtasm(ox*ox+oy*oy); if (i == 0) i = 1024; else i = 1048576/i;
        globalx1 = mulscale10(dmulscale10(ox,singlobalang,-oy,cosglobalang),i);
        globaly1 = mulscale10(dmulscale10(ox,cosglobalang,oy,singlobalang),i);
        globalx2 = -globalx1;
        globaly2 = -globaly1;

        ox = ((wall[j].x-globalposx)<<6); oy = ((wall[j].y-globalposy)<<6);
        i = dmulscale14(oy,cosglobalang,-ox,singlobalang);
        j = dmulscale14(ox,cosglobalang,oy,singlobalang);
        ox = i; oy = j;
        globalxpanning = globalx1*ox - globaly1*oy;
        globalypanning = globaly2*ox + globalx2*oy;
    }
    globalx2 = mulscale16(globalx2,viewingrangerecip);
    globaly1 = mulscale16(globaly1,viewingrangerecip);
    globalxshift = (8-(picsiz[globalpicnum]&15));
    globalyshift = (8-(picsiz[globalpicnum]>>4));
    if (globalorientation&8) { globalxshift++; globalyshift++; }

    if ((globalorientation&0x4) > 0)
    {
        i = globalxpanning; globalxpanning = globalypanning; globalypanning = i;
        i = globalx2; globalx2 = -globaly1; globaly1 = -i;
        i = globalx1; globalx1 = globaly2; globaly2 = i;
    }
    if ((globalorientation&0x10) > 0) globalx1 = -globalx1, globaly1 = -globaly1, globalxpanning = -globalxpanning;
    if ((globalorientation&0x20) > 0) globalx2 = -globalx2, globaly2 = -globaly2, globalypanning = -globalypanning;
    globalx1 <<= globalxshift; globaly1 <<= globalxshift;
    globalx2 <<= globalyshift;  globaly2 <<= globalyshift;
    globalxpanning <<= globalxshift; globalypanning <<= globalyshift;
    globalxpanning += (((int32_t)sec->ceilingxpanning)<<24);
    globalypanning += (((int32_t)sec->ceilingypanning)<<24);
    globaly1 = (-globalx1-globaly1)*halfxdimen;
    globalx2 = (globalx2-globaly2)*halfxdimen;

    sethlinesizes(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4,globalbufplc);

    globalx2 += globaly2*(x1-1);
    globaly1 += globalx1*(x1-1);
    globalx1 = mulscale16(globalx1,globalzd);
    globalx2 = mulscale16(globalx2,globalzd);
    globaly1 = mulscale16(globaly1,globalzd);
    globaly2 = mulscale16(globaly2,globalzd);
    globvis = klabs(mulscale10(globvis,globalzd));

    if (!(globalorientation&0x180))
    {
        y1 = umost[x1]; y2 = y1;
        for (x=x1; x<=x2; x++)
        {
            twall = umost[x]-1; bwall = min(uplc[x],dmost[x]);
            if (twall < bwall-1)
            {
                if (twall >= y2)
                {
                    while (y1 < y2-1) hline(x-1,++y1);
                    y1 = twall;
                }
                else
                {
                    while (y1 < twall) hline(x-1,++y1);
                    while (y1 > twall) lastx[y1--] = x;
                }
                while (y2 > bwall) hline(x-1,--y2);
                while (y2 < bwall) lastx[y2++] = x;
            }
            else
            {
                while (y1 < y2-1) hline(x-1,++y1);
                if (x == x2) { globalx2 += globaly2; globaly1 += globalx1; break; }
                y1 = umost[x+1]; y2 = y1;
            }
            globalx2 += globaly2; globaly1 += globalx1;
        }
        while (y1 < y2-1) hline(x2,++y1);
        faketimerhandler();
        return;
    }

    switch (globalorientation&0x180)
    {
    case 128:
        msethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    case 256:
        settransnormal();
        tsethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    case 384:
        settransreverse();
        tsethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    }

    y1 = umost[x1]; y2 = y1;
    for (x=x1; x<=x2; x++)
    {
        twall = umost[x]-1; bwall = min(uplc[x],dmost[x]);
        if (twall < bwall-1)
        {
            if (twall >= y2)
            {
                while (y1 < y2-1) slowhline(x-1,++y1);
                y1 = twall;
            }
            else
            {
                while (y1 < twall) slowhline(x-1,++y1);
                while (y1 > twall) lastx[y1--] = x;
            }
            while (y2 > bwall) slowhline(x-1,--y2);
            while (y2 < bwall) lastx[y2++] = x;
        }
        else
        {
            while (y1 < y2-1) slowhline(x-1,++y1);
            if (x == x2) { globalx2 += globaly2; globaly1 += globalx1; break; }
            y1 = umost[x+1]; y2 = y1;
        }
        globalx2 += globaly2; globaly1 += globalx1;
    }
    while (y1 < y2-1) slowhline(x2,++y1);
    faketimerhandler();
}


//
// florscan (internal)
//
static void florscan(int32_t x1, int32_t x2, int32_t sectnum)
{
    int32_t i, j, ox, oy, x, y1, y2, twall, bwall;
    sectortype *sec;

    sec = &sector[sectnum];
    if (palookup[sec->floorpal] != globalpalwritten)
    {
        globalpalwritten = palookup[sec->floorpal];
        if (!globalpalwritten) globalpalwritten = palookup[globalpal];  // JBF: fixes null-pointer crash
        setpalookupaddress(globalpalwritten);
    }

    globalzd = globalposz-sec->floorz;
    if (globalzd > 0) return;
    globalpicnum = sec->floorpicnum;
    if ((unsigned)globalpicnum >= (unsigned)MAXTILES) globalpicnum = 0;
    setgotpic(globalpicnum);
    if ((tilesizx[globalpicnum] <= 0) || (tilesizy[globalpicnum] <= 0)) return;
    if (picanm[globalpicnum]&192) globalpicnum += animateoffs((int16_t)globalpicnum,(int16_t)sectnum);

    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);
    globalbufplc = waloff[globalpicnum];

    globalshade = (int32_t)sec->floorshade;
    globvis = globalcisibility;
    if (sec->visibility != 0) globvis = mulscale4(globvis,(int32_t)((uint8_t)(sec->visibility+16)));
    globalorientation = (int32_t)sec->floorstat;


    if ((globalorientation&64) == 0)
    {
        globalx1 = singlobalang; globalx2 = singlobalang;
        globaly1 = cosglobalang; globaly2 = cosglobalang;
        globalxpanning = (globalposx<<20);
        globalypanning = -(globalposy<<20);
    }
    else
    {
        j = sec->wallptr;
        ox = wall[wall[j].point2].x - wall[j].x;
        oy = wall[wall[j].point2].y - wall[j].y;
        i = nsqrtasm(ox*ox+oy*oy); if (i == 0) i = 1024; else i = 1048576/i;
        globalx1 = mulscale10(dmulscale10(ox,singlobalang,-oy,cosglobalang),i);
        globaly1 = mulscale10(dmulscale10(ox,cosglobalang,oy,singlobalang),i);
        globalx2 = -globalx1;
        globaly2 = -globaly1;

        ox = ((wall[j].x-globalposx)<<6); oy = ((wall[j].y-globalposy)<<6);
        i = dmulscale14(oy,cosglobalang,-ox,singlobalang);
        j = dmulscale14(ox,cosglobalang,oy,singlobalang);
        ox = i; oy = j;
        globalxpanning = globalx1*ox - globaly1*oy;
        globalypanning = globaly2*ox + globalx2*oy;
    }
    globalx2 = mulscale16(globalx2,viewingrangerecip);
    globaly1 = mulscale16(globaly1,viewingrangerecip);
    globalxshift = (8-(picsiz[globalpicnum]&15));
    globalyshift = (8-(picsiz[globalpicnum]>>4));
    if (globalorientation&8) { globalxshift++; globalyshift++; }

    if ((globalorientation&0x4) > 0)
    {
        i = globalxpanning; globalxpanning = globalypanning; globalypanning = i;
        i = globalx2; globalx2 = -globaly1; globaly1 = -i;
        i = globalx1; globalx1 = globaly2; globaly2 = i;
    }
    if ((globalorientation&0x10) > 0) globalx1 = -globalx1, globaly1 = -globaly1, globalxpanning = -globalxpanning;
    if ((globalorientation&0x20) > 0) globalx2 = -globalx2, globaly2 = -globaly2, globalypanning = -globalypanning;
    globalx1 <<= globalxshift; globaly1 <<= globalxshift;
    globalx2 <<= globalyshift;  globaly2 <<= globalyshift;
    globalxpanning <<= globalxshift; globalypanning <<= globalyshift;
    globalxpanning += (((int32_t)sec->floorxpanning)<<24);
    globalypanning += (((int32_t)sec->floorypanning)<<24);
    globaly1 = (-globalx1-globaly1)*halfxdimen;
    globalx2 = (globalx2-globaly2)*halfxdimen;

    sethlinesizes(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4,globalbufplc);

    globalx2 += globaly2*(x1-1);
    globaly1 += globalx1*(x1-1);
    globalx1 = mulscale16(globalx1,globalzd);
    globalx2 = mulscale16(globalx2,globalzd);
    globaly1 = mulscale16(globaly1,globalzd);
    globaly2 = mulscale16(globaly2,globalzd);
    globvis = klabs(mulscale10(globvis,globalzd));

    if (!(globalorientation&0x180))
    {
        y1 = max(dplc[x1],umost[x1]); y2 = y1;
        for (x=x1; x<=x2; x++)
        {
            twall = max(dplc[x],umost[x])-1; bwall = dmost[x];
            if (twall < bwall-1)
            {
                if (twall >= y2)
                {
                    while (y1 < y2-1) hline(x-1,++y1);
                    y1 = twall;
                }
                else
                {
                    while (y1 < twall) hline(x-1,++y1);
                    while (y1 > twall) lastx[y1--] = x;
                }
                while (y2 > bwall) hline(x-1,--y2);
                while (y2 < bwall) lastx[y2++] = x;
            }
            else
            {
                while (y1 < y2-1) hline(x-1,++y1);
                if (x == x2) { globalx2 += globaly2; globaly1 += globalx1; break; }
                y1 = max(dplc[x+1],umost[x+1]); y2 = y1;
            }
            globalx2 += globaly2; globaly1 += globalx1;
        }
        while (y1 < y2-1) hline(x2,++y1);
        faketimerhandler();
        return;
    }

    switch (globalorientation&0x180)
    {
    case 128:
        msethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    case 256:
        settransnormal();
        tsethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    case 384:
        settransreverse();
        tsethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    }

    y1 = max(dplc[x1],umost[x1]); y2 = y1;
    for (x=x1; x<=x2; x++)
    {
        twall = max(dplc[x],umost[x])-1; bwall = dmost[x];
        if (twall < bwall-1)
        {
            if (twall >= y2)
            {
                while (y1 < y2-1) slowhline(x-1,++y1);
                y1 = twall;
            }
            else
            {
                while (y1 < twall) slowhline(x-1,++y1);
                while (y1 > twall) lastx[y1--] = x;
            }
            while (y2 > bwall) slowhline(x-1,--y2);
            while (y2 < bwall) lastx[y2++] = x;
        }
        else
        {
            while (y1 < y2-1) slowhline(x-1,++y1);
            if (x == x2) { globalx2 += globaly2; globaly1 += globalx1; break; }
            y1 = max(dplc[x+1],umost[x+1]); y2 = y1;
        }
        globalx2 += globaly2; globaly1 += globalx1;
    }
    while (y1 < y2-1) slowhline(x2,++y1);
    faketimerhandler();
}


//
// wallscan (internal)
//
static void wallscan(int32_t x1, int32_t x2, int16_t *uwal, int16_t *dwal, int32_t *swal, int32_t *lwal)
{
    int32_t x, xnice, ynice;
    intptr_t fpalookup;
    int32_t y1ve[4], y2ve[4], tsizx, tsizy;
#ifndef ENGINE_USING_A_C
    char bad;
    int32_t i, u4, d4, z;
#endif
#ifdef YAX_ENABLE
    if (g_nodraw)
        return;
#endif
    if (x2 >= xdim) x2 = xdim-1;

    tsizx = tilesizx[globalpicnum];
    tsizy = tilesizy[globalpicnum];
    setgotpic(globalpicnum);
    if ((tsizx <= 0) || (tsizy <= 0)) return;
    if ((uwal[x1] > ydimen) && (uwal[x2] > ydimen)) return;
    if ((dwal[x1] < 0) && (dwal[x2] < 0)) return;

    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);

    xnice = (pow2long[picsiz[globalpicnum]&15] == tsizx);
    if (xnice) tsizx--;
    ynice = (pow2long[picsiz[globalpicnum]>>4] == tsizy);
    if (ynice) tsizy = (picsiz[globalpicnum]>>4);

    fpalookup = FP_OFF(palookup[globalpal]);

    setupvlineasm(globalshiftval);

#ifndef ENGINE_USING_A_C

    x = x1;
    while ((umost[x] > dmost[x]) && (x <= x2)) x++;

    for (; (x<=x2)&&((x+frameoffset)&3); x++)
    {
        y1ve[0] = max(uwal[x],umost[x]);
        y2ve[0] = min(dwal[x],dmost[x]);
        if (y2ve[0] <= y1ve[0]) continue;

        palookupoffse[0] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x],globvis),globalshade)<<8);

        bufplce[0] = lwal[x] + globalxpanning;
        if (bufplce[0] >= tsizx) { if (xnice == 0) bufplce[0] %= tsizx; else bufplce[0] &= tsizx; }
        if (ynice == 0) bufplce[0] *= tsizy; else bufplce[0] <<= tsizy;

        vince[0] = swal[x]*globalyscale;
        vplce[0] = globalzd + vince[0]*(y1ve[0]-globalhoriz+1);

        vlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0]+waloff[globalpicnum],x+frameoffset+ylookup[y1ve[0]]);
    }
    for (; x<=x2-3; x+=4)
    {
        bad = 0;
        for (z=3; z>=0; z--)
        {
            y1ve[z] = max(uwal[x+z],umost[x+z]);
            y2ve[z] = min(dwal[x+z],dmost[x+z])-1;
            if (y2ve[z] < y1ve[z]) { bad += pow2char[z]; continue; }

            i = lwal[x+z] + globalxpanning;
            if (i >= tsizx) { if (xnice == 0) i %= tsizx; else i &= tsizx; }
            if (ynice == 0) i *= tsizy; else i <<= tsizy;
            bufplce[z] = waloff[globalpicnum]+i;

            vince[z] = swal[x+z]*globalyscale;
            vplce[z] = globalzd + vince[z]*(y1ve[z]-globalhoriz+1);
        }
        if (bad == 15) continue;

        palookupoffse[0] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x],globvis),globalshade)<<8);
        palookupoffse[3] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x+3],globvis),globalshade)<<8);

        if ((palookupoffse[0] == palookupoffse[3]) && ((bad&0x9) == 0))
        {
            palookupoffse[1] = palookupoffse[0];
            palookupoffse[2] = palookupoffse[0];
        }
        else
        {
            palookupoffse[1] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x+1],globvis),globalshade)<<8);
            palookupoffse[2] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x+2],globvis),globalshade)<<8);
        }

        u4 = max(max(y1ve[0],y1ve[1]),max(y1ve[2],y1ve[3]));
        d4 = min(min(y2ve[0],y2ve[1]),min(y2ve[2],y2ve[3]));

        if ((bad != 0) || (u4 >= d4))
        {
            if (!(bad&1)) prevlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0],vplce[0],bufplce[0],ylookup[y1ve[0]]+x+frameoffset+0);
            if (!(bad&2)) prevlineasm1(vince[1],palookupoffse[1],y2ve[1]-y1ve[1],vplce[1],bufplce[1],ylookup[y1ve[1]]+x+frameoffset+1);
            if (!(bad&4)) prevlineasm1(vince[2],palookupoffse[2],y2ve[2]-y1ve[2],vplce[2],bufplce[2],ylookup[y1ve[2]]+x+frameoffset+2);
            if (!(bad&8)) prevlineasm1(vince[3],palookupoffse[3],y2ve[3]-y1ve[3],vplce[3],bufplce[3],ylookup[y1ve[3]]+x+frameoffset+3);
            continue;
        }

        if (u4 > y1ve[0]) vplce[0] = prevlineasm1(vince[0],palookupoffse[0],u4-y1ve[0]-1,vplce[0],bufplce[0],ylookup[y1ve[0]]+x+frameoffset+0);
        if (u4 > y1ve[1]) vplce[1] = prevlineasm1(vince[1],palookupoffse[1],u4-y1ve[1]-1,vplce[1],bufplce[1],ylookup[y1ve[1]]+x+frameoffset+1);
        if (u4 > y1ve[2]) vplce[2] = prevlineasm1(vince[2],palookupoffse[2],u4-y1ve[2]-1,vplce[2],bufplce[2],ylookup[y1ve[2]]+x+frameoffset+2);
        if (u4 > y1ve[3]) vplce[3] = prevlineasm1(vince[3],palookupoffse[3],u4-y1ve[3]-1,vplce[3],bufplce[3],ylookup[y1ve[3]]+x+frameoffset+3);

        if (d4 >= u4) vlineasm4(d4-u4+1,ylookup[u4]+x+frameoffset);

        i = x+frameoffset+ylookup[d4+1];
        if (y2ve[0] > d4) prevlineasm1(vince[0],palookupoffse[0],y2ve[0]-d4-1,vplce[0],bufplce[0],i+0);
        if (y2ve[1] > d4) prevlineasm1(vince[1],palookupoffse[1],y2ve[1]-d4-1,vplce[1],bufplce[1],i+1);
        if (y2ve[2] > d4) prevlineasm1(vince[2],palookupoffse[2],y2ve[2]-d4-1,vplce[2],bufplce[2],i+2);
        if (y2ve[3] > d4) prevlineasm1(vince[3],palookupoffse[3],y2ve[3]-d4-1,vplce[3],bufplce[3],i+3);
    }
    for (; x<=x2; x++)
    {
        y1ve[0] = max(uwal[x],umost[x]);
        y2ve[0] = min(dwal[x],dmost[x]);
        if (y2ve[0] <= y1ve[0]) continue;

        palookupoffse[0] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x],globvis),globalshade)<<8);

        bufplce[0] = lwal[x] + globalxpanning;
        if (bufplce[0] >= tsizx) { if (xnice == 0) bufplce[0] %= tsizx; else bufplce[0] &= tsizx; }
        if (ynice == 0) bufplce[0] *= tsizy; else bufplce[0] <<= tsizy;

        vince[0] = swal[x]*globalyscale;
        vplce[0] = globalzd + vince[0]*(y1ve[0]-globalhoriz+1);

        vlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0]+waloff[globalpicnum],x+frameoffset+ylookup[y1ve[0]]);
    }

#else   // ENGINE_USING_A_C

    for (x=x1; x<=x2; x++)
    {
        y1ve[0] = max(uwal[x],umost[x]);
        y2ve[0] = min(dwal[x],dmost[x]);
        if (y2ve[0] <= y1ve[0]) continue;

        palookupoffse[0] = fpalookup+(getpalookup((int32_t)mulscale16(swal[x],globvis),globalshade)<<8);

        bufplce[0] = lwal[x] + globalxpanning;
        if (bufplce[0] >= tsizx) { if (xnice == 0) bufplce[0] %= tsizx; else bufplce[0] &= tsizx; }
        if (ynice == 0) bufplce[0] *= tsizy; else bufplce[0] <<= tsizy;

        vince[0] = swal[x]*globalyscale;
        vplce[0] = globalzd + vince[0]*(y1ve[0]-globalhoriz+1);

        vlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0]+waloff[globalpicnum],x+frameoffset+ylookup[y1ve[0]]);
    }

#endif

    faketimerhandler();
}


//
// transmaskvline (internal)
//
static void transmaskvline(int32_t x)
{
    int32_t vplc, vinc, i;
    intptr_t palookupoffs;
    intptr_t bufplc,p;
    int16_t y1v, y2v;

    if ((x < 0) || (x >= xdimen)) return;

    y1v = max(uwall[x],startumost[x+windowx1]-windowy1);
    y2v = min(dwall[x],startdmost[x+windowx1]-windowy1);
    y2v--;
    if (y2v < y1v) return;

    palookupoffs = FP_OFF(palookup[globalpal]) + (getpalookup((int32_t)mulscale16(swall[x],globvis),globalshade)<<8);

    vinc = swall[x]*globalyscale;
    vplc = globalzd + vinc*(y1v-globalhoriz+1);

    i = lwall[x]+globalxpanning;
    if (i >= tilesizx[globalpicnum]) i %= tilesizx[globalpicnum];
    bufplc = waloff[globalpicnum]+i*tilesizy[globalpicnum];

    p = ylookup[y1v]+x+frameoffset;

    tvlineasm1(vinc,palookupoffs,y2v-y1v,vplc,bufplc,p);
}


//
// transmaskvline2 (internal)
//
#ifndef ENGINE_USING_A_C
static void transmaskvline2(int32_t x)
{
    int32_t i, y1, y2, x2;
    int16_t y1ve[2], y2ve[2];

    uintptr_t p;

    if ((x < 0) || (x >= xdimen)) return;
    if (x == xdimen-1) { transmaskvline(x); return; }

    x2 = x+1;

    y1ve[0] = max(uwall[x],startumost[x+windowx1]-windowy1);
    y2ve[0] = min(dwall[x],startdmost[x+windowx1]-windowy1)-1;
    if (y2ve[0] < y1ve[0]) { transmaskvline(x2); return; }
    y1ve[1] = max(uwall[x2],startumost[x2+windowx1]-windowy1);
    y2ve[1] = min(dwall[x2],startdmost[x2+windowx1]-windowy1)-1;
    if (y2ve[1] < y1ve[1]) { transmaskvline(x); return; }

    palookupoffse[0] = FP_OFF(palookup[globalpal]) + (getpalookup((int32_t)mulscale16(swall[x],globvis),globalshade)<<8);
    palookupoffse[1] = FP_OFF(palookup[globalpal]) + (getpalookup((int32_t)mulscale16(swall[x2],globvis),globalshade)<<8);

    setuptvlineasm2(globalshiftval,palookupoffse[0],palookupoffse[1]);

    vince[0] = swall[x]*globalyscale;
    vince[1] = swall[x2]*globalyscale;
    vplce[0] = globalzd + vince[0]*(y1ve[0]-globalhoriz+1);
    vplce[1] = globalzd + vince[1]*(y1ve[1]-globalhoriz+1);

    i = lwall[x] + globalxpanning;
    if (i >= tilesizx[globalpicnum]) i %= tilesizx[globalpicnum];
    bufplce[0] = waloff[globalpicnum]+i*tilesizy[globalpicnum];

    i = lwall[x2] + globalxpanning;
    if (i >= tilesizx[globalpicnum]) i %= tilesizx[globalpicnum];
    bufplce[1] = waloff[globalpicnum]+i*tilesizy[globalpicnum];

    y1 = max(y1ve[0],y1ve[1]);
    y2 = min(y2ve[0],y2ve[1]);

    p = x+frameoffset;

    if (y1ve[0] != y1ve[1])
    {
        if (y1ve[0] < y1)
            vplce[0] = tvlineasm1(vince[0],palookupoffse[0],y1-y1ve[0]-1,vplce[0],bufplce[0],ylookup[y1ve[0]]+p);
        else
            vplce[1] = tvlineasm1(vince[1],palookupoffse[1],y1-y1ve[1]-1,vplce[1],bufplce[1],ylookup[y1ve[1]]+p+1);
    }

    if (y2 > y1)
    {
        asm1 = vince[1];
        asm2 = ylookup[y2]+p+1;
        tvlineasm2(vplce[1],vince[0],bufplce[0],bufplce[1],vplce[0],ylookup[y1]+p);
    }
    else
    {
        asm1 = vplce[0];
        asm2 = vplce[1];
    }

    if (y2ve[0] > y2ve[1])
        tvlineasm1(vince[0],palookupoffse[0],y2ve[0]-y2-1,asm1,bufplce[0],ylookup[y2+1]+p);
    else if (y2ve[0] < y2ve[1])
        tvlineasm1(vince[1],palookupoffse[1],y2ve[1]-y2-1,asm2,bufplce[1],ylookup[y2+1]+p+1);

    faketimerhandler();
}
#endif

//
// transmaskwallscan (internal)
//
static inline void transmaskwallscan(int32_t x1, int32_t x2)
{
    int32_t x;

    setgotpic(globalpicnum);
    if ((tilesizx[globalpicnum] <= 0) || (tilesizy[globalpicnum] <= 0)) return;

    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);

    setuptvlineasm(globalshiftval);

    x = x1;
    while ((startumost[x+windowx1] > startdmost[x+windowx1]) && (x <= x2)) x++;
#ifndef ENGINE_USING_A_C
    if ((x <= x2) && (x&1)) transmaskvline(x), x++;
    while (x < x2) transmaskvline2(x), x += 2;
#endif
    while (x <= x2) transmaskvline(x), x++;
    faketimerhandler();
}


////////// NON-power-of-two replacements for mhline/thline, adapted from a.c //////////
#if defined(__GNUC__) && defined(__i386__) && !defined(NOASM)
// from pragmas.h
# define ourdivscale32(d,b) \
        ({ int32_t __d=(d), __b=(b), __r; \
           __asm__ __volatile__ ("xorl %%eax, %%eax; idivl %%ebx" \
                : "=a" (__r), "=d" (__d) : "d" (__d), "b" (__b) : "cc"); \
         __r; })

#else
# define ourdivscale32(d,b) divscale32(d,b)
#endif

// cntup16>>16 iterations
static void nonpow2_mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, char *p)
{
    char ch;

    const char *const buf = (char *)bufplc;
    const char *const pal = (char *)asm3;

    const uint32_t xdiv = globalxspan > 1 ? (uint32_t)ourdivscale32(1, globalxspan) : UINT32_MAX;
    const uint32_t ydiv = globalyspan > 1 ? (uint32_t)ourdivscale32(1, globalyspan) : UINT32_MAX;
    const uint32_t yspan = globalyspan;
    const int32_t xinc = asm1, yinc = asm2;

    UNREFERENCED_PARAMETER(junk);

    for (cntup16>>=16; cntup16>0; cntup16--)
    {
        ch = buf[(bx/xdiv)*yspan + by/ydiv];

        if (ch != 255) *p = pal[ch];
        bx += xinc;
        by += yinc;
        p++;
    }
}

// cntup16>>16 iterations
static void nonpow2_thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, char *p)
{
    char ch;

    const char *const buf = (char *)bufplc;
    const char *const pal = (char *)asm3;
    const char *const trans = transluc;

    const uint32_t xdiv = globalxspan > 1 ? (uint32_t)ourdivscale32(1, globalxspan) : UINT32_MAX;
    const uint32_t ydiv = globalyspan > 1 ? (uint32_t)ourdivscale32(1, globalyspan) : UINT32_MAX;
    const uint32_t yspan = globalyspan;
    const int32_t xinc = asm1, yinc = asm2;

    UNREFERENCED_PARAMETER(junk);

    if (globalorientation&512)
    {
        for (cntup16>>=16; cntup16>0; cntup16--)
        {
            ch = buf[(bx/xdiv)*yspan + by/ydiv];
            if (ch != 255) *p = trans[(*p)|(pal[ch]<<8)];
            bx += xinc;
            by += yinc;
            p++;
        }
    }
    else
    {
        for (cntup16>>=16; cntup16>0; cntup16--)
        {
            ch = buf[(bx/xdiv)*yspan + by/ydiv];
            if (ch != 255) *p = trans[((*p)<<8)|pal[ch]];
            bx += xinc;
            by += yinc;
            p++;
        }
    }
}
////////// END non-power-of-two replacements //////////

//
// ceilspritehline (internal)
//
static inline void ceilspritehline(int32_t x2, int32_t y)
{
    int32_t x1, v, bx, by;

    //x = x1 + (x2-x1)t + (y1-y2)u  ~  x = 160v
    //y = y1 + (y2-y1)t + (x2-x1)u  ~  y = (scrx-160)v
    //z = z1 = z2                   ~  z = posz + (scry-horiz)v

    x1 = lastx[y]; if (x2 < x1) return;

    v = mulscale20(globalzd,horizlookup[y-globalhoriz+horizycent]);
    bx = mulscale14(globalx2*x1+globalx1,v) + globalxpanning;
    by = mulscale14(globaly2*x1+globaly1,v) + globalypanning;
    asm1 = mulscale14(globalx2,v);
    asm2 = mulscale14(globaly2,v);

    asm3 = FP_OFF(palookup[globalpal]) + (getpalookup((int32_t)mulscale28(klabs(v),globvis),globalshade)<<8);

    if (globalispow2)
    {
        if ((globalorientation&2) == 0)
            mhline(globalbufplc,bx,(x2-x1)<<16,0L,by,ylookup[y]+x1+frameoffset);
        else
        {
            thline(globalbufplc,bx,(x2-x1)<<16,0L,by,ylookup[y]+x1+frameoffset);
        }
    }
    else
    {
        if ((globalorientation&2) == 0)
            nonpow2_mhline(globalbufplc,bx,(x2-x1)<<16,0L,by,(char *)(ylookup[y]+x1+frameoffset));
        else
        {
            nonpow2_thline(globalbufplc,bx,(x2-x1)<<16,0L,by,(char *)(ylookup[y]+x1+frameoffset));
        }
    }
}


//
// ceilspritescan (internal)
//
static inline void ceilspritescan(int32_t x1, int32_t x2)
{
    int32_t x, y1, y2, twall, bwall;

    y1 = uwall[x1]; y2 = y1;
    for (x=x1; x<=x2; x++)
    {
        twall = uwall[x]-1; bwall = dwall[x];
        if (twall < bwall-1)
        {
            if (twall >= y2)
            {
                while (y1 < y2-1) ceilspritehline(x-1,++y1);
                y1 = twall;
            }
            else
            {
                while (y1 < twall) ceilspritehline(x-1,++y1);
                while (y1 > twall) lastx[y1--] = x;
            }
            while (y2 > bwall) ceilspritehline(x-1,--y2);
            while (y2 < bwall) lastx[y2++] = x;
        }
        else
        {
            while (y1 < y2-1) ceilspritehline(x-1,++y1);
            if (x == x2) break;
            y1 = uwall[x+1]; y2 = y1;
        }
    }
    while (y1 < y2-1) ceilspritehline(x2,++y1);
    faketimerhandler();
}

////////// translucent slope vline, based on a-c.c's slopevlin //////////
static int32_t gglogx, gglogy, ggpinc;
static char *ggbuf, *ggpal;

static void setupslopevlin_alsotrans(int32_t logylogx, intptr_t bufplc, int32_t pinc)
{
    setupslopevlin(logylogx, bufplc, pinc);
    gglogx = (logylogx&255); gglogy = (logylogx>>8);
    ggbuf = (char *)bufplc; ggpinc = pinc;
    ggpal = palookup[globalpal] + (getpalookup(0,globalshade)<<8);
}

// cnt iterations
static void tslopevlin(uint8_t *p, int32_t i, const intptr_t *slopalptr, int32_t cnt, int32_t bx, int32_t by)
{
    const char *const buf = ggbuf;
    const char *const pal = ggpal;
    const char *const trans = transluc;
    const int32_t bzinc = (asm1>>3), pinc = ggpinc;

    const int32_t transmode = (globalorientation&128);
    const int32_t xtou = globalx3, ytov = globaly3;
    const int32_t logx = gglogx, logy = gglogy;

    int32_t bz = asm3;

    do
    {
        uint8_t ch;
        uint32_t u, v;

        i = krecipasm(bz>>6); bz += bzinc;
        u = bx + xtou*i;
        v = by + ytov*i;
        ch = *(uint8_t *)(slopalptr[0] + buf[((u>>(32-logx))<<logy)+(v>>(32-logy))]);

        if (transmode)
        {
            if (ch != 255) *p = trans[*p|(pal[ch]<<8)];
        }
        else
        {
            if (ch != 255) *p = trans[(*p<<8)|pal[ch]];
        }

        slopalptr--;
        p += pinc;
    }
    while (--cnt);
}

//
// grouscan (internal)
//
#define BITSOFPRECISION 3  //Don't forget to change this in A.ASM also!
static void grouscan(int32_t dax1, int32_t dax2, int32_t sectnum, char dastat)
{
    int32_t i, l, x, y, dx, dy, wx, wy, y1, y2, daz;
    int32_t daslope, dasqr;
    int32_t shoffs, shinc, m1, m2;
    intptr_t *mptr1, *mptr2, *nptr1, *nptr2,j;
    walltype *wal;
    sectortype *sec;

    sec = &sector[sectnum];

    if (dastat == 0)
    {
        if (globalposz <= getceilzofslope(sectnum,globalposx,globalposy))
            return;  //Back-face culling
        globalorientation = sec->ceilingstat;
        globalpicnum = sec->ceilingpicnum;
        globalshade = sec->ceilingshade;
        globalpal = sec->ceilingpal;
        daslope = sec->ceilingheinum;
        daz = sec->ceilingz;
    }
    else
    {
        if (globalposz >= getflorzofslope(sectnum,globalposx,globalposy))
            return;  //Back-face culling
        globalorientation = sec->floorstat;
        globalpicnum = sec->floorpicnum;
        globalshade = sec->floorshade;
        globalpal = sec->floorpal;
        daslope = sec->floorheinum;
        daz = sec->floorz;
    }

    if ((picanm[globalpicnum]&192) != 0) globalpicnum += animateoffs(globalpicnum,sectnum);
    setgotpic(globalpicnum);
    if ((tilesizx[globalpicnum] <= 0) || (tilesizy[globalpicnum] <= 0)) return;
    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);

    wal = &wall[sec->wallptr];
    wx = wall[wal->point2].x - wal->x;
    wy = wall[wal->point2].y - wal->y;
    dasqr = krecipasm(nsqrtasm(wx*wx+wy*wy));
    i = mulscale21(daslope,dasqr);
    wx *= i; wy *= i;

    globalx = -mulscale19(singlobalang,xdimenrecip);
    globaly = mulscale19(cosglobalang,xdimenrecip);
    globalx1 = (globalposx<<8);
    globaly1 = -(globalposy<<8);
    i = (dax1-halfxdimen)*xdimenrecip;
    globalx2 = mulscale16(cosglobalang<<4,viewingrangerecip) - mulscale27(singlobalang,i);
    globaly2 = mulscale16(singlobalang<<4,viewingrangerecip) + mulscale27(cosglobalang,i);
    globalzd = (xdimscale<<9);
    globalzx = -dmulscale17(wx,globaly2,-wy,globalx2) + mulscale10(1-globalhoriz,globalzd);
    globalz = -dmulscale25(wx,globaly,-wy,globalx);

    if (globalorientation&64)  //Relative alignment
    {
        dx = mulscale14(wall[wal->point2].x-wal->x,dasqr);
        dy = mulscale14(wall[wal->point2].y-wal->y,dasqr);

        i = nsqrtasm(daslope*daslope+16777216);

        x = globalx; y = globaly;
        globalx = dmulscale16(x,dx,y,dy);
        globaly = mulscale12(dmulscale16(-y,dx,x,dy),i);

        x = ((wal->x-globalposx)<<8); y = ((wal->y-globalposy)<<8);
        globalx1 = dmulscale16(-x,dx,-y,dy);
        globaly1 = mulscale12(dmulscale16(-y,dx,x,dy),i);

        x = globalx2; y = globaly2;
        globalx2 = dmulscale16(x,dx,y,dy);
        globaly2 = mulscale12(dmulscale16(-y,dx,x,dy),i);
    }
    if (globalorientation&0x4)
    {
        i = globalx; globalx = -globaly; globaly = -i;
        i = globalx1; globalx1 = globaly1; globaly1 = i;
        i = globalx2; globalx2 = -globaly2; globaly2 = -i;
    }
    if (globalorientation&0x10) { globalx1 = -globalx1, globalx2 = -globalx2, globalx = -globalx; }
    if (globalorientation&0x20) { globaly1 = -globaly1, globaly2 = -globaly2, globaly = -globaly; }

    daz = dmulscale9(wx,globalposy-wal->y,-wy,globalposx-wal->x) + ((daz-globalposz)<<8);
    globalx2 = mulscale20(globalx2,daz); globalx = mulscale28(globalx,daz);
    globaly2 = mulscale20(globaly2,-daz); globaly = mulscale28(globaly,-daz);

    i = 8-(picsiz[globalpicnum]&15); j = 8-(picsiz[globalpicnum]>>4);
    if (globalorientation&8) { i++; j++; }
    globalx1 <<= (i+12); globalx2 <<= i; globalx <<= i;
    globaly1 <<= (j+12); globaly2 <<= j; globaly <<= j;

    if (dastat == 0)
    {
        globalx1 += (((int32_t)sec->ceilingxpanning)<<24);
        globaly1 += (((int32_t)sec->ceilingypanning)<<24);
    }
    else
    {
        globalx1 += (((int32_t)sec->floorxpanning)<<24);
        globaly1 += (((int32_t)sec->floorypanning)<<24);
    }

    asm1 = -(globalzd>>(16-BITSOFPRECISION));

    globvis = globalvisibility;
    if (sec->visibility != 0) globvis = mulscale4(globvis,(int32_t)((uint8_t)(sec->visibility+16)));
    globvis = mulscale13(globvis,daz);
    globvis = mulscale16(globvis,xdimscale);
    j = FP_OFF(palookup[globalpal]);

    setupslopevlin_alsotrans(((int32_t)(picsiz[globalpicnum]&15))+(((int32_t)(picsiz[globalpicnum]>>4))<<8),
                             waloff[globalpicnum],-ylookup[1]);

    l = (globalzd>>16);

    shinc = mulscale16(globalz,xdimenscale);
    if (shinc > 0) shoffs = (4<<15); else shoffs = ((16380-ydimen)<<15);    // JBF: was 2044
    if (dastat == 0) y1 = umost[dax1]; else y1 = max(umost[dax1],dplc[dax1]);
    m1 = mulscale16(y1,globalzd) + (globalzx>>6);
    //Avoid visibility overflow by crossing horizon
    if (globalzd > 0) m1 += (globalzd>>16); else m1 -= (globalzd>>16);
    m2 = m1+l;
    mptr1 = (intptr_t *)&slopalookup[y1+(shoffs>>15)]; mptr2 = mptr1+1;

    for (x=dax1; x<=dax2; x++)
    {
        if (dastat == 0) { y1 = umost[x]; y2 = min(dmost[x],uplc[x])-1; }
        else { y1 = max(umost[x],dplc[x]); y2 = dmost[x]-1; }
        if (y1 <= y2)
        {
            nptr1 = (intptr_t *)&slopalookup[y1+(shoffs>>15)];
            nptr2 = (intptr_t *)&slopalookup[y2+(shoffs>>15)];
            while (nptr1 <= mptr1)
            {
                *mptr1-- = j + (getpalookup((int32_t)mulscale24(krecipasm(m1),globvis),globalshade)<<8);
                m1 -= l;
            }
            while (nptr2 >= mptr2)
            {
                *mptr2++ = j + (getpalookup((int32_t)mulscale24(krecipasm(m2),globvis),globalshade)<<8);
                m2 += l;
            }

            globalx3 = (globalx2>>10);
            globaly3 = (globaly2>>10);
            asm3 = mulscale16(y2,globalzd) + (globalzx>>6);
            if ((globalorientation&256)==0)
                slopevlin(ylookup[y2]+x+frameoffset,krecipasm(asm3>>3),(intptr_t)nptr2,y2-y1+1,globalx1,globaly1);
            else
                tslopevlin((uint8_t *)(ylookup[y2]+x+frameoffset),0,nptr2,y2-y1+1,globalx1,globaly1);

            if ((x&15) == 0) faketimerhandler();
        }
        globalx2 += globalx;
        globaly2 += globaly;
        globalzx += globalz;
        shoffs += shinc;
    }
}


//
// parascan (internal)
//
static void parascan(int32_t dax1, int32_t dax2, int32_t sectnum, char dastat, int32_t bunch)
{
    sectortype *sec;
    int32_t j, k, l, m, n, x, z, wallnum, nextsectnum, globalhorizbak;
    int16_t *topptr, *botptr;

    int32_t dapyscale;
    int16_t dapskybits;
    static const int16_t zeropskyoff[MAXPSKYTILES];
    const int16_t *dapskyoff;

    UNREFERENCED_PARAMETER(dax1);
    UNREFERENCED_PARAMETER(dax2);

    sectnum = thesector[bunchfirst[bunch]]; sec = &sector[sectnum];

    globalhorizbak = globalhoriz;