Subversion Repositories eduke32

Rev

Rev 5056 | Blame | Compare with Previous | Last modification | View Log | RSS feed

// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
// Ken Silverman's official web site: "http://www.advsys.net/ken"
// See the included license file "BUILDLIC.TXT" for license info.
//
// This file has been modified from Ken Silverman's original release
// by Jonathon Fowler (jf@jonof.id.au)


#define ENGINE

#if (PNG_LIBPNG_VER > 10599)
# include <string.h>
#endif
#include "compat.h"
#include "build.h"
#include "editor.h"
#include "pragmas.h"
#include "cache1d.h"
#include "a.h"
#include "osd.h"
#include "crc32.h"
#include "xxhash.h"
#include "lz4.h"

#include "baselayer.h"
#include "scriptfile.h"

#ifdef USE_OPENGL
# include "glbuild.h"
# include "mdsprite.h"
# ifdef POLYMER
#  include "polymer.h"
# endif
# include "hightile.h"
# include "polymost.h"
# ifdef _WIN32
#  define WIN32_LEAN_AND_MEAN
#  include <windows.h>
# endif
#endif

#ifdef USE_LIBPNG
//# include <setjmp.h>
# include <png.h>
#endif

#include <math.h>  // pow

#include "engine_priv.h"

#ifdef LUNATIC
# include "lunatic.h"
L_State g_engState;
#endif

#define CACHEAGETIME 16

//////////
// Compilation switches for optional/extended engine features

#if !defined(__arm__) && !defined(GEKKO)
# define HIGH_PRECISION_SPRITE
#endif

#if !defined EDUKE32_TOUCH_DEVICES && !defined GEKKO && !defined __OPENDINGUX__
// Handle absolute z difference of floor/ceiling to camera >= 1<<24.
// Also: higher precision view-relative x and y for drawvox().
# define CLASSIC_Z_DIFF_64
#endif

#define MULTI_COLUMN_VLINE
//#define DEBUG_TILESIZY_512
//#define DEBUG_TILEOFFSETS
//////////

#ifdef LUNATIC
# if !defined DEBUG_MAIN_ARRAYS
LUNATIC_EXTERN const int32_t engine_main_arrays_are_static = 0;  // for Lunatic
# else
LUNATIC_EXTERN const int32_t engine_main_arrays_are_static = 1;
# endif

#if MAXSECTORS==MAXSECTORSV8
LUNATIC_EXTERN const int32_t engine_v8 = 1;
#else
LUNATIC_EXTERN const int32_t engine_v8 = 0;
#endif
#endif

#ifdef DEBUGGINGAIDS
float debug1, debug2;
#endif

int32_t mapversion=7; // JBF 20040211: default mapversion to 7
int32_t g_loadedMapVersion = -1;  // -1: none (e.g. started new)
usermaphack_t g_loadedMapHack;  // used only for the MD4 part

int32_t compare_usermaphacks(const void *a, const void *b)
{
    return Bmemcmp(((usermaphack_t*) a)->md4, ((usermaphack_t*) b)->md4, 16);
}
usermaphack_t *usermaphacks;
int32_t num_usermaphacks;

static int32_t get_mapversion(void);

// Handle nonpow2-ysize walls the old way?
static inline int32_t oldnonpow2(void)
{
#if !defined CLASSIC_NONPOW2_YSIZE_WALLS
    return 1;
#else
    return (g_loadedMapVersion < 10);
#endif
}

static void drawpixel_safe(void *s, char a)
{
#if defined __GNUC__
    if (__builtin_expect((intptr_t)s >= frameplace && (intptr_t)s < frameplace+bytesperline*ydim, 1))
#else
    if ((intptr_t)s >= frameplace && (intptr_t)s < frameplace+bytesperline*ydim)
#endif
        drawpixel(s, a);
#ifdef DEBUGGINGAIDS
    else
    {
        const char c = editorcolors[15];

        drawpixel((intptr_t *)frameplace, c);
        drawpixel((intptr_t *)frameplace+1, c);
        drawpixel((intptr_t *)frameplace+2, c);
        drawpixel((intptr_t *)frameplace+bytesperline, c);
        drawpixel((intptr_t *)frameplace+bytesperline+1, c);
        drawpixel((intptr_t *)frameplace+bytesperline+2, c);
        drawpixel((intptr_t *)frameplace+2*bytesperline, c);
        drawpixel((intptr_t *)frameplace+2*bytesperline+1, c);
        drawpixel((intptr_t *)frameplace+2*bytesperline+2, c);
    }
#endif
}

//void loadvoxel(int32_t voxindex) { UNREFERENCED_PARAMATER(voxindex); }
int16_t tiletovox[MAXTILES];
int32_t usevoxels = 1;
#ifdef USE_OPENGL
static char *voxfilenames[MAXVOXELS], g_haveVoxels=0;  // for deferred voxel->model conversion
#endif
//#define kloadvoxel loadvoxel

int32_t novoxmips = 1;
int32_t editorgridextent = 131072;

//These variables need to be copied into BUILD
#define MAXXSIZ 256
#define MAXYSIZ 256
#define MAXZSIZ 255
#define MAXVOXMIPS 5
#ifdef EDUKE32_TOUCH_DEVICES
# define DISTRECIPSIZ (65536+256)
#else
# define DISTRECIPSIZ 131072
#endif
intptr_t voxoff[MAXVOXELS][MAXVOXMIPS]; // used in KenBuild
static char voxlock[MAXVOXELS][MAXVOXMIPS];
int32_t voxscale[MAXVOXELS];

static int32_t ggxinc[MAXXSIZ+1], ggyinc[MAXXSIZ+1];
static int32_t lowrecip[1024], nytooclose;
static const int32_t nytoofar = DISTRECIPSIZ*16384ull - 1048576;
static uint32_t *distrecip;

static int32_t *lookups = NULL;
static int32_t dommxoverlay = 1, beforedrawrooms = 1;
int32_t indrawroomsandmasks = 0;

static int32_t oxdimen = -1, oviewingrange = -1, oxyaspect = -1;

// r_usenewaspect is the cvar, newaspect_enable to trigger the new behaviour in the code
int32_t r_usenewaspect = 1, newaspect_enable=0;
uint32_t r_screenxy = 0;

int32_t globalflags;

int32_t curbrightness = 0, gammabrightness = 0;

float vid_gamma = DEFAULT_GAMMA;
float vid_contrast = DEFAULT_CONTRAST;
float vid_brightness = DEFAULT_BRIGHTNESS;

//Textured Map variables
static char globalpolytype;
static int16_t **dotp1, **dotp2;

static int8_t tempbuf[MAXWALLS];

// referenced from asm
#if !defined(NOASM) && defined __cplusplus
extern "C" {
#endif
int32_t ebpbak, espbak;
int32_t reciptable[2048], fpuasm;
intptr_t asm1, asm2, asm3, asm4, palookupoffse[4];
uint32_t vplce[4];
int32_t vince[4];
intptr_t bufplce[4];
int32_t globaltilesizy;
int32_t globalx1, globaly2, globalx3, globaly3;
#if !defined(NOASM) && defined __cplusplus
}
#endif

int32_t sloptable[16384];
static intptr_t slopalookup[16384];    // was 2048
#if defined(USE_OPENGL)
palette_t palookupfog[MAXPALOOKUPS];
#endif

// For every pal number, whether tsprite pal should not be taken over from
// floor pal.
// NOTE: g_noFloorPal[0] is irrelevant as it's never checked.
int8_t g_noFloorPal[MAXPALOOKUPS];

static void *pic = NULL;

// The tile file number (tilesXXX <- this) of each tile:
// 0 <= . < MAXARTFILES_BASE: tile is in a "base" ART file
// MAXARTFILES_BASE <= . < MAXARTFILES_TOTAL: tile is in a map-specific ART file
static uint8_t tilefilenum[MAXTILES];
EDUKE32_STATIC_ASSERT(MAXARTFILES_TOTAL <= 256);

static int32_t tilefileoffs[MAXTILES];
static int32_t lastageclock;

// Backup tilefilenum[] and tilefileoffs[]. These get allocated only when
// necessary (have per-map ART files).
static uint8_t *g_bakTileFileNum;
static int32_t *g_bakTileFileOffs;
static vec2_t *g_bakTileSiz;
static picanm_t *g_bakPicAnm;
// NOTE: picsiz[] is not backed up, but recalculated when necessary.

//static int32_t artsize = 0;
static int32_t cachesize = 0;

// Whole ART file contents loaded from ZIPs in memory.
static char *artptrs[MAXARTFILES_TOTAL];

static int32_t no_radarang2 = 0;
static int16_t radarang[1280], *radarang2;

uint16_t ATTRIBUTE((used)) sqrtable[4096], ATTRIBUTE((used)) shlookup[4096+256];
const char pow2char[8] = {1,2,4,8,16,32,64,128};
const int32_t pow2long[32] =
{
    1, 2, 4, 8,
    16, 32, 64, 128,
    256, 512, 1024, 2048,
    4096, 8192, 16384, 32768,
    65536, 131072, 262144, 524288,
    1048576, 2097152, 4194304, 8388608,
    16777216, 33554432, 67108864, 134217728,
    268435456, 536870912, 1073741824, 2147483647
};

char britable[16][256]; // JBF 20040207: full 8bit precision

extern char textfont[2048], smalltextfont[2048];

static char kensmessage[128];
const char *engineerrstr = "No error";

int32_t showfirstwall=0;
int32_t showheightindicators=1;
int32_t circlewall=-1;

int32_t whitecol;

#ifdef POLYMER
static int16_t maphacklightcnt=0;
static int16_t maphacklight[PR_MAXLIGHTS];
#endif

// forward refs
#ifdef __cplusplus
extern "C" {
#endif
void setblendtab(int32_t blend, const char *tab);
#ifdef LUNATIC
extern const char *(getblendtab)(int32_t blend);
int32_t setpalookup(int32_t palnum, const uint8_t *shtab);
#endif
void setup_sideview_sincos(void);
int32_t getscreenvdisp(int32_t bz, int32_t zoome);
void screencoords(int32_t *xres, int32_t *yres, int32_t x, int32_t y, int32_t zoome);
int32_t scalescreeny(int32_t sy);
#ifdef YAX_ENABLE
void yax_tweakpicnums(int32_t bunchnum, int32_t cf, int32_t restore);
#endif
int32_t getinvdisplacement(int32_t *dx, int32_t *dy, int32_t dz);
#ifdef __cplusplus
}
#endif

static void scansector(int16_t startsectnum);
static void draw_rainbow_background(void);

int16_t editstatus = 0;
static int32_t global100horiz;  // (-100..300)-scale horiz (the one passed to drawrooms)


////////// YAX //////////

int32_t numgraysects = 0;
uint8_t graysectbitmap[MAXSECTORS>>3];
uint8_t graywallbitmap[MAXWALLS>>3];
int32_t autogray = 0, showinnergray = 1;

//#define YAX_DEBUG_YMOSTS

#ifdef YAX_DEBUG
// XXX: This could be replaced with the use of gethiticks().
double u64tickspersec;
#endif
#ifdef ENGINE_SCREENSHOT_DEBUG
int32_t engine_screenshot = 0;
#endif

int32_t get_alwaysshowgray(void)
{
    return showinnergray || !(editorzrange[0]==INT32_MIN && editorzrange[1]==INT32_MAX);
}

void yax_updategrays(int32_t posze)
{
    int32_t i, j, k=1;
#ifdef YAX_ENABLE
    int32_t mingoodz=INT32_MAX, maxgoodz=INT32_MIN;
#else
    UNREFERENCED_PARAMETER(posze);
#endif

    Bmemset(graysectbitmap, 0, sizeof(graysectbitmap));
    Bmemset(graywallbitmap, 0, sizeof(graywallbitmap));

    for (i=0; i<numsectors; i++)
    {
#ifdef YAX_ENABLE
        int16_t cb, fb;

        yax_getbunches(i, &cb, &fb);
        // update grayouts due to yax  --v-- has to be half-open  --v--
        // because only one level should v  be ever active          v
        k = ((cb<0 || sector[i].ceilingz < posze) && (fb<0 || posze <= sector[i].floorz));
        if (autogray && (cb>=0 || fb>=0) && (sector[i].ceilingz <= posze && posze <= sector[i].floorz))
        {
            mingoodz = min(mingoodz, sector[i].ceilingz);
            maxgoodz = max(maxgoodz, sector[i].floorz);
        }
#endif
        // update grayouts due to editorzrange
        k &= (sector[i].ceilingz >= editorzrange[0] && sector[i].floorz <= editorzrange[1]);

        if (!k)  // outside bounds, gray out!
            graysectbitmap[i>>3] |= (1<<(i&7));
    }

#ifdef YAX_ENABLE
    if (autogray && mingoodz<=maxgoodz)
    {
        for (i=0; i<numsectors; i++)
            if (!(mingoodz <= sector[i].ceilingz && sector[i].floorz <= maxgoodz))
                graysectbitmap[i>>3] |= (1<<(i&7));
    }
#endif

    numgraysects = 0;
    for (i=0; i<numsectors; i++)
    {
        if (graysectbitmap[i>>3]&(1<<(i&7)))
        {
            numgraysects++;
            for (j=sector[i].wallptr; j<sector[i].wallptr+sector[i].wallnum; j++)
                graywallbitmap[j>>3] |= (1<<(j&7));
        }
    }
}


#if !defined YAX_ENABLE
# warning Non-TROR builds are supported only for debugging. Expect savegame breakage etc...
#endif

#ifdef YAX_ENABLE
// all references to floor/ceiling bunchnums should be through the
// get/set functions!

int32_t g_nodraw = 0;
int32_t scansector_retfast = 0;
static int32_t scansector_collectsprites = 1;
int32_t yax_globalcf = -1, yax_nomaskpass=0, yax_nomaskdidit;  // engine internal
int32_t r_tror_nomaskpass = 1;  // cvar
int32_t yax_globallev = YAX_MAXDRAWS;
int32_t yax_globalbunch = -1;

// duplicated tsprites
//  [i]:
//   i==MAXDRAWS: base level
//   i<MAXDRAWS: MAXDRAWS-i-1 is level towards ceiling
//   i>MAXDRAWS: i-MAXDRAWS-1 is level towards floor
static int16_t yax_spritesortcnt[1 + 2*YAX_MAXDRAWS];
static uint16_t yax_tsprite[1 + 2*YAX_MAXDRAWS][MAXSPRITESONSCREEN];
static uint8_t yax_tsprfrombunch[1 + 2*YAX_MAXDRAWS][MAXSPRITESONSCREEN];

// drawn sectors
uint8_t yax_gotsector[MAXSECTORS>>3];  // engine internal

# if !defined NEW_MAP_FORMAT
// Game-time YAX data structures, V7-V9 map formats.
int16_t yax_bunchnum[MAXSECTORS][2];
int16_t yax_nextwall[MAXWALLS][2];

static inline int32_t yax_islockededge(int32_t line, int32_t cf)
{
    return !!(wall[line].cstat&(YAX_NEXTWALLBIT(cf)));
}

#define YAX_PTRBUNCHNUM(Ptr, Sect, Cf) (*(&Ptr[Sect].ceilingxpanning + 8*Cf))
#define YAX_BUNCHNUM(Sect, Cf) YAX_PTRBUNCHNUM(sector, Sect, Cf)

//// bunch getters/setters
int16_t yax_getbunch(int16_t i, int16_t cf)
{
    if (editstatus==0)
        return yax_bunchnum[i][cf];

    if (((*(&sector[i].ceilingstat + cf))&YAX_BIT)==0)
        return -1;

    return YAX_BUNCHNUM(i, cf);
}
# else
#  define YAX_PTRBUNCHNUM(Ptr, Sect, Cf) (*((Cf) ? &(Ptr)[Sect].floorbunch : &(Ptr)[Sect].ceilingbunch))
#  define YAX_BUNCHNUM(Sect, Cf) YAX_PTRBUNCHNUM(sector, Sect, Cf)

#  if !defined NEW_MAP_FORMAT
static inline int32_t yax_islockededge(int32_t line, int32_t cf)
{
    return (yax_getnextwall(line, cf) >= 0);
}
#  endif
# endif

// bunchnum: -1: also clear yax-nextwalls (forward and reverse)
//           -2: don't clear reverse yax-nextwalls
//           -3: don't clear either forward or reverse yax-nextwalls
void yax_setbunch(int16_t i, int16_t cf, int16_t bunchnum)
{
    if (editstatus==0)
    {
#ifdef NEW_MAP_FORMAT
        YAX_BUNCHNUM(i, cf) = bunchnum;
#else
        yax_bunchnum[i][cf] = bunchnum;
#endif
        return;
    }

    if (bunchnum < 0)
    {
        int32_t j;
        int16_t ynw;

        if (bunchnum > -3)
        {
            // TODO: for in-game too?
            for (j=sector[i].wallptr; j<sector[i].wallptr+sector[i].wallnum; j++)
            {
                ynw = yax_getnextwall(j, cf);
                if (ynw >= 0)
                {
                    if (bunchnum > -2)
                        yax_setnextwall(ynw, !cf, -1);
                    yax_setnextwall(j, cf, -1);
                }
            }
        }

#if !defined NEW_MAP_FORMAT
        *(&sector[i].ceilingstat + cf) &= ~YAX_BIT;
        // NOTE: Don't reset xpanning-as-index, since we can be called from
        // e.g. Mapster32's "Inner loop made into new sector" functionality.
//        YAX_BUNCHNUM(i, cf) = 0;
#else
        YAX_BUNCHNUM(i, cf) = -1;
#endif
        return;
    }

#if !defined NEW_MAP_FORMAT
    *(&sector[i].ceilingstat + cf) |= YAX_BIT;
#endif
    YAX_BUNCHNUM(i, cf) = bunchnum;
}

void yax_setbunches(int16_t i, int16_t cb, int16_t fb)
{
    yax_setbunch(i, YAX_CEILING, cb);
    yax_setbunch(i, YAX_FLOOR, fb);
}

# if !defined NEW_MAP_FORMAT
//// nextwall getters/setters
int16_t yax_getnextwall(int16_t wal, int16_t cf)
{
    if (editstatus==0)
        return yax_nextwall[wal][cf];

    if (!yax_islockededge(wal, cf))
        return -1;

    return YAX_NEXTWALL(wal, cf);
}

// unchecked!
void yax_setnextwall(int16_t wal, int16_t cf, int16_t thenextwall)
{
    if (editstatus==0)
    {
        yax_nextwall[wal][cf] = thenextwall;
        return;
    }

    if (thenextwall >= 0)
    {
        wall[wal].cstat |= YAX_NEXTWALLBIT(cf);
        YAX_NEXTWALL(wal, cf) = thenextwall;
    }
    else
    {
        wall[wal].cstat &= ~YAX_NEXTWALLBIT(cf);
        YAX_NEXTWALL(wal, cf) = YAX_NEXTWALLDEFAULT(cf);
    }
}
# endif

// make one step in the vertical direction, and if the wall we arrive at
// is red, return its nextsector.
int16_t yax_vnextsec(int16_t line, int16_t cf)
{
    int16_t const ynw = yax_getnextwall(line, cf);
    return (ynw < 0) ? -1 : wall[ynw].nextsector;
}


//// in-struct --> array transfer (only resetstat==0); list construction
// resetstat:  0: reset and read data from structs and construct linked lists etc.
//             1: only reset
//             2: read data from game-time arrays and construct linked lists etc.
void yax_update(int32_t resetstat)
{
    int32_t i;
#if !defined NEW_MAP_FORMAT
    int32_t j;
    const int32_t oeditstatus=editstatus;
#endif
    int16_t cb, fb;

    if (resetstat != 2)
        numyaxbunches = 0;

    for (i=0; i<MAXSECTORS; i++)
    {
#if !defined NEW_MAP_FORMAT
        if (resetstat != 2 || i>=numsectors)
            yax_bunchnum[i][0] = yax_bunchnum[i][1] = -1;
#endif
        nextsectbunch[0][i] = nextsectbunch[1][i] = -1;
    }
    for (i=0; i<YAX_MAXBUNCHES; i++)
        headsectbunch[0][i] = headsectbunch[1][i] = -1;
#if !defined NEW_MAP_FORMAT
    for (i=0; i<MAXWALLS; i++)
        if (resetstat != 2 || i>=numwalls)
            yax_nextwall[i][0] = yax_nextwall[i][1] = -1;
#endif

    if (resetstat==1)
        return;

    // Constuct singly linked list of sectors-of-bunch.

#if !defined NEW_MAP_FORMAT
    // Read bunchnums directly from the sector struct in yax_[gs]etbunch{es}!
    editstatus = (resetstat==0);
    // NOTE: Use oeditstatus to check for in-gamedness from here on!
#endif

    if (resetstat==0)
    {
        // make bunchnums consecutive
        uint8_t *const havebunch = (uint8_t *)tempbuf;
        uint8_t *const bunchmap = havebunch + (YAX_MAXBUNCHES>>3);
        int32_t dasub = 0;

        Bmemset(havebunch, 0, YAX_MAXBUNCHES>>3);
        for (i=0; i<numsectors; i++)
        {
            yax_getbunches(i, &cb, &fb);
            if (cb>=0)
                havebunch[cb>>3] |= (1<<(cb&7));
            if (fb>=0)
                havebunch[fb>>3] |= (1<<(fb&7));
        }

        for (i=0; i<YAX_MAXBUNCHES; i++)
        {
            if ((havebunch[i>>3]&(1<<(i&7)))==0)
            {
                bunchmap[i] = 255;
                dasub++;
                continue;
            }

            bunchmap[i] = i-dasub;
        }

        for (i=0; i<numsectors; i++)
        {
            yax_getbunches(i, &cb, &fb);
            if (cb>=0)
                yax_setbunch(i, YAX_CEILING, bunchmap[cb]);
            if (fb>=0)
                yax_setbunch(i, YAX_FLOOR, bunchmap[fb]);
        }
    }

    // In-struct --> array transfer (resetstat==0 and !defined NEW_MAP_FORMAT)
    // and list construction.
    for (i=numsectors-1; i>=0; i--)
    {
        yax_getbunches(i, &cb, &fb);
#if !defined NEW_MAP_FORMAT
        if (resetstat==0)
        {
            yax_bunchnum[i][0] = cb;
            yax_bunchnum[i][1] = fb;
        }
#endif

        if (cb >= 0)
        {
#if !defined NEW_MAP_FORMAT
            if (resetstat==0)
                for (j=sector[i].wallptr; j<sector[i].wallptr+sector[i].wallnum; j++)
                {
                    if (yax_islockededge(j,YAX_CEILING))
                    {
                        yax_nextwall[j][0] = YAX_NEXTWALL(j,0);
                        if (oeditstatus==0)
                            YAX_NEXTWALL(j,0) = 0;  // reset lotag!
                    }
                }
#endif
            if (headsectbunch[0][cb] == -1)
            {
                headsectbunch[0][cb] = i;
                // not duplicated in floors, since every extended ceiling
                // must have a corresponding floor:
                if (resetstat==0)
                    numyaxbunches++;
            }
            else
            {
                int32_t tmpsect = headsectbunch[0][cb];
                headsectbunch[0][cb] = i;
                nextsectbunch[0][i] = tmpsect;
            }
        }

        if (fb >= 0)
        {
#if !defined NEW_MAP_FORMAT
            if (resetstat==0)
                for (j=sector[i].wallptr; j<sector[i].wallptr+sector[i].wallnum; j++)
                {
                    if (yax_islockededge(j,YAX_FLOOR))
                    {
                        yax_nextwall[j][1] = YAX_NEXTWALL(j,1);
                        if (oeditstatus==0)
                            YAX_NEXTWALL(j,1) = -1;  // reset extra!
                    }
                }
#endif
            if (headsectbunch[1][fb] == -1)
                headsectbunch[1][fb] = i;
            else
            {
                int32_t tmpsect = headsectbunch[1][fb];
                headsectbunch[1][fb] = i;
                nextsectbunch[1][i] = tmpsect;
            }
        }
    }

#if !defined NEW_MAP_FORMAT
    editstatus = oeditstatus;
#else
    mapversion = get_mapversion();
#endif
}

int32_t yax_getneighborsect(int32_t x, int32_t y, int32_t sectnum, int32_t cf)
{
    int16_t bunchnum = yax_getbunch(sectnum, cf);
    int32_t i;

    if (bunchnum < 0)
        return -1;

    for (SECTORS_OF_BUNCH(bunchnum, !cf, i))
        if (inside(x, y, i)==1)
            return i;

    return -1;
}

// indexed as a list:
static int16_t bunches[2][YAX_MAXBUNCHES];
// indexed with bunchnums directly:
static int16_t bunchsec[YAX_MAXBUNCHES], bunchdist[YAX_MAXBUNCHES];

static int32_t ymostallocsize = 0;  // numyaxbunches*xdimen (no sizeof(int16_t) here!)
static int16_t *yumost=NULL, *ydmost=NULL;  // used as if [numyaxbunches][xdimen]
uint8_t haveymost[YAX_MAXBUNCHES>>3];

// adapted from build.c
static void yax_getclosestpointonwall(int32_t dawall, int32_t *closestx, int32_t *closesty)
{
    int64_t i, j, wx,wy, wx2,wy2, dx, dy;

    wx = wall[dawall].x;
    wy = wall[dawall].y;
    wx2 = wall[wall[dawall].point2].x;
    wy2 = wall[wall[dawall].point2].y;

    dx = wx2 - wx;
    dy = wy2 - wy;
    i = dx*(globalposx-wx) + dy*(globalposy-wy);
    if (i <= 0) { *closestx = wx; *closesty = wy; return; }
    j = dx*dx + dy*dy;
    if (i >= j) { *closestx = wx2; *closesty = wy2; return; }
    i=((i<<15)/j)<<15;
    *closestx = wx + ((dx*i)>>30);
    *closesty = wy + ((dy*i)>>30);
}

static inline int32_t yax_walldist(int32_t w)
{
    int32_t closestx, closesty;

    yax_getclosestpointonwall(w, &closestx, &closesty);
    return klabs(closestx-globalposx) + klabs(closesty-globalposy);

//    return klabs(wall[w].x-globalposx) + klabs(wall[w].y-globalposy);
}

// calculate distances to bunches and best start-drawing sectors
static void yax_scanbunches(int32_t bbeg, int32_t numhere, const uint8_t *lastgotsector)
{
    int32_t bnchcnt, bunchnum, j, k;
    int32_t startwall, endwall;

    UNREFERENCED_PARAMETER(lastgotsector);

    scansector_retfast = 1;
    scansector_collectsprites = 0;

    for (bnchcnt=bbeg; bnchcnt<bbeg+numhere; bnchcnt++)
    {
        int32_t walldist, bestsec=-1;
        int32_t bestwalldist=INT32_MAX, bestbestdist=INT32_MAX;

        bunchnum = bunches[yax_globalcf][bnchcnt];

        for (SECTORS_OF_BUNCH(bunchnum,!yax_globalcf, k))
        {
            int32_t checkthisec = 0;

            if (inside(globalposx, globalposy, k)==1)
            {
                bestsec = k;
                bestbestdist = 0;
                break;
            }

            startwall = sector[k].wallptr;
            endwall = startwall+sector[k].wallnum;

            for (j=startwall; j<endwall; j++)
            {
/*
                if ((w=yax_getnextwall(j,!yax_globalcf))>=0)
                    if ((ns=wall[w].nextsector)>=0)
                        if ((lastgotsector[ns>>3]&(1<<(ns&7)))==0)
                            continue;
*/

                walldist = yax_walldist(j);
                if (walldist < bestwalldist)
                {
                    checkthisec = 1;
                    bestwalldist = walldist;
                }
            }

            if (checkthisec)
            {
                numscans = numbunches = 0;
                if (getrendermode() == REND_CLASSIC)
                    scansector(k);
#ifdef USE_OPENGL
                else
                    polymost_scansector(k);
#endif
                if (numbunches > 0)
                {
                    bestsec = k;
                    bestbestdist = bestwalldist;
                }
            }
        }

        bunchsec[bunchnum] = bestsec;
        bunchdist[bunchnum] = bestbestdist;
    }

    scansector_collectsprites = 1;
    scansector_retfast = 0;
}

static int yax_cmpbunches(const void *b1, const void *b2)
{
    return (bunchdist[*(int16_t *)b2] - bunchdist[*(int16_t *)b1]);
}


void yax_tweakpicnums(int32_t bunchnum, int32_t cf, int32_t restore)
{
    // for polymer, this is called before polymer_drawrooms() with restore==0
    // and after polymer_drawmasks() with restore==1

    int32_t i, dastat;
    static int16_t opicnum[2][MAXSECTORS];
#ifdef DEBUGGINGAIDS
    static uint8_t expect_restore[2][YAX_MAXBUNCHES];

    // must call this with restore == 0, 1,  0, 1,  0, 1,  ...
    Bassert(expect_restore[cf][bunchnum] == restore);
    expect_restore[cf][bunchnum] = !expect_restore[cf][bunchnum];
#endif

    for (SECTORS_OF_BUNCH(bunchnum, cf, i))
    {
        dastat = (SECTORFLD(i,stat, cf)&(128+256));

        // only consider non-masked ceilings/floors
        if (dastat==0 || (restore==1 && opicnum[cf][i]&0x8000))
        {
            if (!restore)
            {
                opicnum[cf][i] = SECTORFLD(i,picnum, cf);
                if (editstatus && showinvisibility)
                    SECTORFLD(i,picnum, cf) = MAXTILES-1;
                else //if ((dastat&(128+256))==0)
                    SECTORFLD(i,picnum, cf) = 13; //FOF;
            }
            else
            {
                SECTORFLD(i,picnum, cf) = opicnum[cf][i];
            }
#ifdef POLYMER
            // will be called only in editor
            if (getrendermode() == REND_POLYMER)
            {
                if (!restore)
                {
                    SECTORFLD(i,stat, cf) |= 128;
                    opicnum[cf][i] |= 0x8000;
                }
                else
                {
                    SECTORFLD(i,stat, cf) &= ~128;
                    SECTORFLD(i,picnum, cf) &= 0x7fff;
                    opicnum[cf][i] = 0;
                }
            }
#endif
        }
    }
}

static void yax_copytsprites()
{
    int32_t i, spritenum, gotthrough, sectnum;
    int32_t sortcnt = yax_spritesortcnt[yax_globallev];
    const spritetype *spr;

    for (i=0; i<sortcnt; i++)
    {
        spritenum = yax_tsprite[yax_globallev][i];

        gotthrough = spritenum&(MAXSPRITES|(MAXSPRITES<<1));

        spritenum &= MAXSPRITES-1;
        spr = &sprite[spritenum];
        sectnum = spr->sectnum;

        if (gotthrough == (MAXSPRITES|(MAXSPRITES<<1)))
        {
            if (yax_globalbunch != yax_tsprfrombunch[yax_globallev][i])
                continue;
        }
        else
        {
            int32_t cf = -1;

            if (gotthrough == MAXSPRITES)
                cf = YAX_CEILING;  // sprite got here through the ceiling of lower sector
            else if (gotthrough == (MAXSPRITES<<1))
                cf = YAX_FLOOR;  // sprite got here through the floor of upper sector

            if (cf != -1)
            {
                if ((yax_globallev-YAX_MAXDRAWS)*(-1 + 2*cf) > 0)
                    if (yax_getbunch(sectnum, cf) != yax_globalbunch)
                        continue;

                sectnum = yax_getneighborsect(spr->x, spr->y, sectnum, cf);
                if (sectnum < 0)
                    continue;
            }
        }

        if (spritesortcnt >= MAXSPRITESONSCREEN)
            break;

        Bmemcpy(&tsprite[spritesortcnt], spr, sizeof(spritetype));
        tsprite[spritesortcnt].owner = spritenum;

        tsprite[spritesortcnt].sectnum = sectnum;  // potentially tweak sectnum!
        spritesortcnt++;
    }
}


void yax_preparedrawrooms(void)
{
    if (getrendermode() == REND_POLYMER || numyaxbunches==0)
        return;

    g_nodraw = 1;
    Bmemset(yax_spritesortcnt, 0, sizeof(yax_spritesortcnt));
    Bmemset(haveymost, 0, (numyaxbunches+7)>>3);

    if (getrendermode() == REND_CLASSIC && ymostallocsize < xdimen*numyaxbunches)
    {
        ymostallocsize = xdimen*numyaxbunches;
        yumost = (int16_t *)Xrealloc(yumost, ymostallocsize*sizeof(int16_t));
        ydmost = (int16_t *)Xrealloc(ydmost, ymostallocsize*sizeof(int16_t));
    }
}

void yax_drawrooms(void (*SpriteAnimFunc)(int32_t,int32_t,int32_t,int32_t),
                   int16_t sectnum, int32_t didmirror, int32_t smoothr)
{
    static uint8_t havebunch[YAX_MAXBUNCHES>>3];

    const int32_t horiz = global100horiz;

    int32_t i, j, k, lev, cf, nmp;
    int32_t bnchcnt, bnchnum[2] = {0,0}, maxlev[2];
    int16_t ourbunch[2] = {-1,-1}, osectnum=sectnum;
    int32_t bnchbeg[YAX_MAXDRAWS][2], bnchend[YAX_MAXDRAWS][2];
    int32_t bbeg, numhere;

    // original (1st-draw) and accumulated ('per-level') gotsector bitmaps
    static uint8_t ogotsector[MAXSECTORS>>3], lgotsector[MAXSECTORS>>3];
#ifdef YAX_DEBUG
    uint64_t t;
#endif

    if (getrendermode() == REND_POLYMER || numyaxbunches==0)
    {
#ifdef ENGINE_SCREENSHOT_DEBUG
        engine_screenshot = 0;
#endif
        return;
    }

    // if we're here, there was just a drawrooms() call with g_nodraw=1

    Bmemcpy(ogotsector, gotsector, (numsectors+7)>>3);

    if (sectnum >= 0)
        yax_getbunches(sectnum, &ourbunch[0], &ourbunch[1]);
    Bmemset(&havebunch, 0, (numyaxbunches+7)>>3);

    // first scan all bunches above, then all below...
    for (cf=0; cf<2; cf++)
    {
        yax_globalcf = cf;

        if (cf==1)
        {
            sectnum = osectnum;
            Bmemcpy(gotsector, ogotsector, (numsectors+7)>>3);
        }

        for (lev=0; /*lev<YAX_MAXDRAWS*/; lev++)
        {
            yax_globallev = YAX_MAXDRAWS + (-1 + 2*cf)*(lev+1);

            bbeg = bnchbeg[lev][cf] = bnchend[lev][cf] = bnchnum[cf];
            numhere = 0;

            for (i=0; i<numsectors; i++)
            {
                if (!(gotsector[i>>3]&(1<<(i&7))))
                    continue;

                j = yax_getbunch(i, cf);
                if (j >= 0 && !(havebunch[j>>3]&(1<<(j&7))))
                {
                    if (getrendermode() == REND_CLASSIC && (haveymost[j>>3]&(1<<(j&7)))==0)
                    {
                        yaxdebug("%s, l %d: skipped bunch %d (no *most)", cf?"v":"^", lev, j);
                        continue;
                    }

                    if ((SECTORFLD(i,stat, cf)&2) ||
                            (cf==0 && globalposz > sector[i].ceilingz) ||
                            (cf==1 && globalposz < sector[i].floorz))
                    {
                        havebunch[j>>3] |= (1<<(j&7));
                        bunches[cf][bnchnum[cf]++] = j;
                        bnchend[lev][cf]++;
                        numhere++;
                    }
                }
            }

            if (numhere > 0)
            {
                // found bunches -- need to fake-draw

                yax_scanbunches(bbeg, numhere, (uint8_t *)gotsector);

                qsort(&bunches[cf][bbeg], numhere, sizeof(int16_t), &yax_cmpbunches);

                if (numhere > 1 && lev != YAX_MAXDRAWS-1)
                    Bmemset(lgotsector, 0, (numsectors+7)>>3);

                for (bnchcnt=bbeg; bnchcnt < bbeg+numhere; bnchcnt++)
                {
                    j = bunches[cf][bnchcnt];  // the actual bunchnum...
                    yax_globalbunch = j;
#ifdef YAX_DEBUG
                    t=getu64ticks();
#endif
                    k = bunchsec[j];

                    if (k < 0)
                    {
                        yaxprintf("%s, l %d: skipped bunch %d\n", cf?"v":"^", lev, j);
                        continue;
                    }

                    if (lev != YAX_MAXDRAWS-1)
                    {
#ifdef YAX_DEBUG
                        int32_t odsprcnt = yax_spritesortcnt[yax_globallev];
#endif
                        // +MAXSECTORS: force
                        drawrooms(globalposx,globalposy,globalposz,globalang,horiz,k+MAXSECTORS);
                        if (numhere > 1)
                            for (i=0; i<(numsectors+7)>>3; i++)
                                lgotsector[i] |= gotsector[i];

                        yaxdebug("l%d: faked (bn %2d) sec %4d,%3d dspr, ob=[%2d,%2d], sn=%4d, %.3f ms",
                                 yax_globallev-YAX_MAXDRAWS, j, k, yax_spritesortcnt[yax_globallev]-odsprcnt,
                                 ourbunch[0],ourbunch[1],sectnum,
                                 (double)(1000*(getu64ticks()-t))/u64tickspersec);
                    }

                    if (ourbunch[cf]==j)
                    {
                        ourbunch[cf] = yax_getbunch(k, cf);
                        sectnum = k;
                    }
                }

                if (numhere > 1 && lev != YAX_MAXDRAWS-1)
                    Bmemcpy(gotsector, lgotsector, (numsectors+7)>>3);
            }

            if (numhere==0 || lev==YAX_MAXDRAWS-1)
            {
                // no new bunches or max level reached
                maxlev[cf] = lev - (numhere==0);
                break;
            }
        }
    }

//    yax_globalcf = -1;

    // now comes the real drawing!
    g_nodraw = 0;
    scansector_collectsprites = 0;

    if (editstatus==1)
    {
        if (getrendermode() == REND_CLASSIC)
        {
            begindrawing();
            draw_rainbow_background();
            enddrawing();
        }
#ifdef USE_OPENGL
        else
        {
            bglClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);
        }
#endif
    }

    for (cf=0; cf<2; cf++)
    {
        yax_globalcf = cf;

        for (lev=maxlev[cf]; lev>=0; lev--)
        {
            yax_globallev = YAX_MAXDRAWS + (-1 + 2*cf)*(lev+1);
            scansector_collectsprites = (lev == YAX_MAXDRAWS-1);

            for (bnchcnt=bnchbeg[lev][cf]; bnchcnt<bnchend[lev][cf]; bnchcnt++)
            {
                j = bunches[cf][bnchcnt];  // the actual bunchnum...
                k = bunchsec[j];  // best start-drawing sector
                yax_globalbunch = j;
#ifdef YAX_DEBUG
                t=getu64ticks();
#endif
                yax_tweakpicnums(j, cf, 0);
                if (k < 0)
                    continue;

                yax_nomaskdidit = 0;
                for (nmp=r_tror_nomaskpass; nmp>=0; nmp--)
                {
                    yax_nomaskpass = nmp;
                    drawrooms(globalposx,globalposy,globalposz,globalang,horiz,k+MAXSECTORS);  // +MAXSECTORS: force

                    if (nmp==1)
                    {
                        yaxdebug("nm1 l%d: DRAWN (bn %2d) sec %4d,          %.3f ms",
                                 yax_globallev-YAX_MAXDRAWS, j, k,
                                 (double)(1000*(getu64ticks()-t))/u64tickspersec);

                        if (!yax_nomaskdidit)
                        {
                            yax_nomaskpass = 0;
                            break;  // no need to draw the same stuff twice
                        }
                        Bmemcpy(yax_gotsector, gotsector, (numsectors+7)>>3);
                    }
                }

                if (!scansector_collectsprites)
                    spritesortcnt = 0;
                yax_copytsprites();
                yaxdebug("nm0 l%d: DRAWN (bn %2d) sec %4d,%3d tspr, %.3f ms",
                         yax_globallev-YAX_MAXDRAWS, j, k, spritesortcnt,
                         (double)(1000*(getu64ticks()-t))/u64tickspersec);

                SpriteAnimFunc(globalposx, globalposy, globalang, smoothr);
                drawmasks();
            }

            if (lev < maxlev[cf])
                for (bnchcnt=bnchbeg[lev+1][cf]; bnchcnt<bnchend[lev+1][cf]; bnchcnt++)
                    yax_tweakpicnums(bunches[cf][bnchcnt], cf, 1);  // restore picnums
        }
    }

#ifdef YAX_DEBUG
    t=getu64ticks();
#endif
    yax_globalcf = -1;
    yax_globalbunch = -1;
    yax_globallev = YAX_MAXDRAWS;
    scansector_collectsprites = 0;

    // draw base level
    drawrooms(globalposx,globalposy,globalposz,globalang,horiz,
              osectnum + MAXSECTORS*didmirror);
//    if (scansector_collectsprites)
//        spritesortcnt = 0;
    yax_copytsprites();
    yaxdebug("DRAWN base level sec %d,%3d tspr, %.3f ms", osectnum,
             spritesortcnt, (double)(1000*(getu64ticks()-t))/u64tickspersec);
    scansector_collectsprites = 1;

    for (cf=0; cf<2; cf++)
        if (maxlev[cf] >= 0)
            for (bnchcnt=bnchbeg[0][cf]; bnchcnt<bnchend[0][cf]; bnchcnt++)
                yax_tweakpicnums(bunches[cf][bnchcnt], cf, 1);  // restore picnums

#ifdef ENGINE_SCREENSHOT_DEBUG
    engine_screenshot = 0;
#endif

#ifdef YAX_DEBUG_YMOSTS
    if (getrendermode() == REND_CLASSIC && numyaxbunches>0)
    {
        char purple = getclosestcol(63, 0, 63);
        char yellow = getclosestcol(63, 63, 0);

        begindrawing();
        for (i=0; i<numyaxbunches; i++)
        {
            int32_t x, x1;

            if ((haveymost[i>>3]&(1<<i&7))==0)
                continue;

            x1 = i*xdimen;

            for (x=x1; x<x1+xdimen; x++)
            {
                if (yumost[x] >= 0 && yumost[x] < ydim && (x&1))
                    *((char *)frameplace + yumost[x]*bytesperline + x-x1) = purple;

                if (ydmost[x]-1 >= 0 && ydmost[x]-1 < ydim && !(x&1))
                    *((char *)frameplace + (ydmost[x]-1)*bytesperline + x-x1) = yellow;
            }
        }
        enddrawing();
    }
#endif
}

#endif  // defined YAX_ENABLE

// must have writable frame buffer, i.e. done begindrawing()
static void draw_rainbow_background(void)
{
    int32_t y, i;
    const int32_t N = 240;  // don't use fullbright colors
    const int32_t numfull=bytesperline/N, numrest=bytesperline%N;

    const char *const src = palookup[0] + 256*18;
    char *dst = (char *)frameplace;

    for (y=0; y<ydim; y++)
    {
        for (i=0; i<numfull; i++)
            Bmemcpy(&dst[N*i], src, N);
        if (numrest > 0)
            Bmemcpy(&dst[N*i], src, numrest);

        dst += bytesperline;
    }
}

//
// setslope
//
void setslope(int32_t sectnum, int32_t cf, int16_t slope)
{
    if (slope==0)
    {
        SECTORFLD(sectnum,stat, cf) &= ~2;
        SECTORFLD(sectnum,heinum, cf) = 0;
    }
    else
    {
        SECTORFLD(sectnum,stat, cf) |= 2;
        SECTORFLD(sectnum,heinum, cf) = slope;
    }
}

////////// editor side view //////////
int32_t m32_sideview = 0;
int32_t m32_sideelev = 256;  // elevation in BUILD degrees, 0..512
int16_t m32_sideang = 200;  // azimuth, 0..2047

int32_t m32_sidecos, m32_sidesin;
int32_t m32_swcnt;
int32_t m32_wallscreenxy[MAXWALLS][2];
int16_t m32_wallsprite[MAXWALLS+MAXSPRITES];
static int32_t m32_sidedist[MAXWALLS+MAXSPRITES];
static vec3_t m32_viewplane;


////// sector-like clipping for sprites //////
#ifdef HAVE_CLIPSHAPE_FEATURE
typedef struct
{
    int16_t numsectors, numwalls;
    tsectortype *sector;
    twalltype *wall;
} mapinfo_t;

static void mapinfo_set(mapinfo_t *bak, mapinfo_t *newmap)
{
    if (bak)
    {
        bak->numsectors = numsectors;
        bak->numwalls = numwalls;
        bak->sector = (tsectortype *)sector;
        bak->wall = (twalltype *)wall;
    }

    if (newmap)
    {
        numsectors = newmap->numsectors;
        numwalls = newmap->numwalls;
        sector = (sectortype *)newmap->sector;
        wall = (walltype *)newmap->wall;
    }
}

static mapinfo_t origmapinfo, clipmapinfo;
static int32_t quickloadboard=0;


#define CM_MAX 256  // must be a power of 2

typedef struct
{
    int16_t qbeg, qend;  // indices into sectq
    int16_t picnum, next;
    int32_t maxdist;
} clipinfo_t;

static int32_t numclipmaps;
static clipinfo_t clipinfo[CM_MAX];

static int32_t numclipsects;  // number in sectq[]
static int16_t *sectoidx, *sectq;  // [numsectors]
static int16_t pictoidx[MAXTILES];  // maps tile num to clipinfo[] index
static int16_t *tempictoidx;

static tsectortype *loadsector;
static twalltype *loadwall, *loadwallinv;
static tspritetype *loadsprite;

// sectoidx bits
#undef CM_NONE
#define CM_NONE (CM_MAX<<1)
#define CM_SOME (CM_NONE-1)
#define CM_OUTER (CM_MAX)   // sector surrounds clipping sector

// sprite -> sector tag mappings
#define CM_XREPEAT floorpal
#define CM_YREPEAT floorxpanning
#define CM_XOFFSET ceilingshade
#define CM_YOFFSET floorshade
#define CM_CSTAT hitag
#define CM_ANG extra
#define CM_FLOORZ(Sec) (*(int32_t *)&sector[Sec].ceilingxpanning)  // ceilingxpanning,ceilingypanning,floorpicnum
#define CM_CEILINGZ(Sec) (*(int32_t *)&sector[Sec].visibility)  // visibility,fogpal,lotag

// backup of original normalized coordinates
#define CM_WALL_X(Wal) (*(int32_t *)&wall[Wal].picnum)  // picnum, overpicnum
#define CM_WALL_Y(Wal) (*(int32_t *)&wall[Wal].lotag)  // lotag, hitag

// don't rotate when applying clipping, for models with rotational symmetry
#define CM_NOROT(Spri) (sprite[Spri].cstat&2)
#define CM_NOROTS(Sect) (sector[Sect].CM_CSTAT&2)


static void clipmapinfo_init()
{
    numclipmaps = 0;
    numclipsects = 0;

    DO_FREE_AND_NULL(sectq);
    DO_FREE_AND_NULL(sectoidx);
    DO_FREE_AND_NULL(tempictoidx);
    DO_FREE_AND_NULL(loadsector);
    DO_FREE_AND_NULL(loadwall);
    DO_FREE_AND_NULL(loadwallinv);
    DO_FREE_AND_NULL(loadsprite);

    // two's complement trick, -1 = 0xff
    Bmemset(&pictoidx, -1, sizeof(pictoidx));
    Bmemset(&clipmapinfo, 0, sizeof(mapinfo_t));

    numsectors = 0;
    numwalls = 0;
}

// loads the clip maps.
// this should be called before any real map is loaded.
int32_t clipmapinfo_load(void)
{
    int32_t i,k,w;

    int32_t lwcp = 0;
    int32_t fi;

    int32_t *fisec = NULL;
    int32_t *fispr = NULL;

    int32_t ournumsectors=0, ournumwalls=0, ournumsprites=0;

    clipmapinfo_init();

    loadsector = (tsectortype *)Xmalloc(MAXSECTORS * sizeof(sectortype));
    loadwall = (twalltype *)Xmalloc(MAXWALLS * sizeof(walltype));
    loadsprite = (tspritetype *)Xmalloc(MAXSPRITES * sizeof(spritetype));

    if (g_clipMapFilesNum)
        fisec = (int32_t *)Xcalloc(g_clipMapFilesNum, sizeof (int32_t));
    if (g_clipMapFilesNum)
        fispr = (int32_t *)Xcalloc(g_clipMapFilesNum, sizeof (int32_t));

    quickloadboard = 1;
    for (fi = 0; fi < g_clipMapFilesNum; ++fi)
    {
        int16_t ang,cs;
        vec3_t tmppos;

        fisec[fi] = ournumsectors;
        fispr[fi] = ournumsprites;

        i = loadboard(g_clipMapFiles[fi], 8, &tmppos, &ang, &cs);
        if (i<0)
            continue;
        // Numsprites will now be set!

        initprintf("Loading clip map: %s\n", g_clipMapFiles[fi]);

        if (ournumsectors+numsectors>MAXSECTORS ||
                ournumwalls+numwalls>MAXWALLS ||
                ournumsprites+Numsprites>MAXSPRITES)
        {
            initprintf("clip map: warning: exceeded limits when loading %s, aborting.\n", g_clipMapFiles[fi]);
            break;
        }

        Bmemcpy(loadsector+ournumsectors, sector, numsectors*sizeof(sectortype));
        Bmemcpy(loadwall+ournumwalls, wall, numwalls*sizeof(walltype));
        Bmemcpy(loadsprite+ournumsprites, sprite, Numsprites*sizeof(spritetype));
        for (i=ournumsectors; i<ournumsectors+numsectors; i++)
            loadsector[i].wallptr += ournumwalls;
        for (i=ournumwalls; i<ournumwalls+numwalls; i++)
        {
            if (loadwall[i].point2>=0)
                loadwall[i].point2 += ournumwalls;
            if (loadwall[i].nextwall>=0)
            {
                loadwall[i].nextwall += ournumwalls;
                loadwall[i].nextsector += ournumsectors;
            }
        }
        for (i=ournumsprites; i<ournumsprites+Numsprites; i++)
            if (loadsprite[i].sectnum>=0)
                loadsprite[i].sectnum += ournumsectors;
        ournumsectors += numsectors;
        ournumwalls += numwalls;
        ournumsprites += Numsprites;

        ++lwcp;
    }
    quickloadboard = 0;

    if (ournumsectors==0 || ournumwalls==0 || ournumsprites==0)  // nothing loaded
    {
        clipmapinfo_init();

        Bfree(fisec);
        Bfree(fispr);

        return -1;
    }

    // shrink
    loadsector = (tsectortype *)Xrealloc(loadsector, ournumsectors*sizeof(sectortype));
    loadwall = (twalltype *)Xrealloc(loadwall, ournumwalls*sizeof(walltype));

    Bmemcpy(sector, loadsector, ournumsectors*sizeof(sectortype));
    Bmemcpy(wall, loadwall, ournumwalls*sizeof(walltype));
    Bmemcpy(sprite, loadsprite, ournumsprites*sizeof(spritetype));
    numsectors = ournumsectors;
    numwalls = ournumwalls;

    //  vvvv    don't use headsprite[sect,stat]!   vvvv

    sectoidx = (int16_t *)Xmalloc(numsectors*sizeof(sectoidx[0]));

    for (i=0; i<numsectors; i++)
        sectoidx[i] = CM_NONE;

    // determine outer sectors
    for (i=0; i<numsectors; i++)
    {
        for (w=sector[i].wallptr; w<sector[i].wallptr+sector[i].wallnum; w++)
            if (wall[w].nextsector<0)
            {
                sectoidx[i] = CM_OUTER;
                break;
            }
    }
    // break connections between outer sectors
    for (i=0; i<numsectors; i++)
    {
        if (sectoidx[i] == CM_OUTER)
            for (w=sector[i].wallptr; w<sector[i].wallptr+sector[i].wallnum; w++)
            {
                k = wall[w].nextwall;
                if (k>=0 && sectoidx[wall[w].nextsector]==CM_OUTER)
                {
                    wall[k].nextwall = wall[k].nextsector = -1;
                    wall[w].nextwall = wall[w].nextsector = -1;
                }
            }
    }

    {
        int16_t ns, outersect;
        int32_t pn,scnt, x,y,z, maxdist;

        sectq = (int16_t *)Xmalloc(numsectors*sizeof(sectq[0]));
        tempictoidx = (int16_t *)Xmalloc(MAXTILES*sizeof(tempictoidx[0]));

        for (i=0; i<MAXTILES; i++)
            tempictoidx[i]=-1;

        // collect sprite picnums
        for (i=0; i<MAXSPRITES && sprite[i].statnum<MAXSTATUS; i++)
        {
            pn = sprite[i].picnum;
            k = sprite[i].sectnum;
            //    -v-  note the <=                         ignore sprites in outer sectors
            if (pn<=0 || pn>=MAXTILES || k<0 || k>=numsectors || (sectoidx[k]&CM_OUTER))
                continue;

            if (numclipmaps >= CM_MAX)
            {
                initprintf("warning: reached max clip map number %d, not processing any more\n", CM_MAX);
                break;
            }

            // chain
            if (pictoidx[pn]>=0)
            {
                if (sectoidx[k]&CM_SOME)
                {
                    for (fi = 0; fi < g_clipMapFilesNum; ++fi)
                        if (k>=fisec[fi])
                            break;
                    initprintf("clip map \"%s\": error: tried to chain picnum %d (sprite %d) in sector %d which"
                               " already belongs to picnum %d.\n", g_clipMapFiles[fi], pn, i-fispr[fi], k-fisec[fi],
                               clipinfo[sectoidx[k]].picnum);
                    clipmapinfo_init();

                    Bfree(fisec);
                    Bfree(fispr);

                    return 2;
                }

                // new one is front
                clipinfo[numclipmaps].next = pictoidx[pn];
                pictoidx[pn] = numclipmaps;
            }
            else
            {
                clipinfo[numclipmaps].next = -1;
                pictoidx[pn] = numclipmaps;
            }

            if (!CM_NOROT(i))
            {
                if (sprite[i].ang!=1536 && sprite[i].ang!=512)
                {
                    for (fi = 0; fi < g_clipMapFilesNum; ++fi)
                        if (i>=fispr[fi])
                            break;
                    initprintf("clip map \"%s\": warning: sprite %d pointing neither northward nor southward. %s will be wrong.\n",
                               g_clipMapFiles[fi], i-fispr[fi], (sprite[i].cstat&48)==32 ? "Scaling and flipping" : "X-flipping");
                }
            }

            clipinfo[numclipmaps].picnum = pn;

            // collect sectors
            scnt = numclipsects;
            sectq[numclipsects++] = k;
            sectoidx[k] = numclipmaps;

            clipinfo[numclipmaps].qbeg = scnt;

            outersect = -1;

            do
            {
                k = sectq[scnt];

                for (w=sector[k].wallptr; w<sector[k].wallptr+sector[k].wallnum; w++)
                {
                    ns = wall[w].nextsector;
                    if (ns>=0)
                    {
                        if (sectoidx[ns]==CM_NONE)
                        {
                            sectoidx[ns] = numclipmaps;
                            sectq[numclipsects++] = ns;
                        }
                        else if (sectoidx[ns]&CM_OUTER)
                        {
                            if (outersect>=0 && ns!=outersect)
                            {
                                for (fi = 0; fi < g_clipMapFilesNum; ++fi)
                                    if (ns>=fisec[fi])
                                        break;
                                initprintf("clip map \"%s\": error: encountered more than one outer sector (%d and %d)"
                                           " for sprite %d.\n", g_clipMapFiles[fi], outersect-fisec[fi], ns-fisec[fi], i-fispr[fi]);
                                clipmapinfo_init();

                                Bfree(fisec);
                                Bfree(fispr);

                                return 3;
                            }

                            outersect = ns;
                            sectoidx[outersect] |= numclipmaps;
                        }
                        else if (sectoidx[ns]!=numclipmaps)
                        {
                            for (fi = 0; fi < g_clipMapFilesNum; ++fi)
                                if (ns>=fisec[fi])
                                    break;
                            initprintf("clip map \"%s\": error: encountered sector %d belonging to index %d"
                                       " while collecting sectors for sprite %d (index %d).\n",
                                       g_clipMapFiles[fi], ns-fisec[fi], sectoidx[ns], i-fispr[fi], numclipmaps);
                            clipmapinfo_init();

                            Bfree(fisec);
                            Bfree(fispr);

                            return 4;
                        }
                    }
                }
            }
            while (++scnt < numclipsects);

            if (outersect==-1)
            {
                initprintf("clip map: INTERNAL ERROR: outersect==-1!\n");
                clipmapinfo_init();

                Bfree(fisec);
                Bfree(fispr);

                return 5;
            }

            sectq[numclipsects++] = outersect;  // last is outer
            clipinfo[numclipmaps].qend = numclipsects-1;

            // normalize
            maxdist = 0;

            for (scnt=clipinfo[numclipmaps].qbeg; scnt<=clipinfo[numclipmaps].qend; scnt++)
            {
                k = sectq[scnt];

                x = sprite[i].x;
                y = sprite[i].y;
                z = sprite[i].z;

                sector[k].floorz -= z;
                sector[k].ceilingz -= z;

                if (scnt==clipinfo[numclipmaps].qbeg)
                {
                    // backup sprite tags since we'll discard sprites later
                    sector[k].CM_XREPEAT = sprite[i].xrepeat;
                    sector[k].CM_YREPEAT = sprite[i].yrepeat;
                    sector[k].CM_XOFFSET = sprite[i].xoffset;
                    sector[k].CM_YOFFSET = sprite[i].yoffset;
                    sector[k].CM_CSTAT = sprite[i].cstat;
                    sector[k].CM_ANG = sprite[i].ang;
                }

                // backup floor and ceiling z
                CM_FLOORZ(k) = sector[k].floorz;
                CM_CEILINGZ(k) = sector[k].ceilingz;

                for (w=sector[k].wallptr; w<sector[k].wallptr+sector[k].wallnum; w++)
                {
                    wall[w].x -= x;
                    wall[w].y -= y;

                    if (scnt!=clipinfo[numclipmaps].qend)
                    {
                        if (CM_NOROT(i))
                        {
                            if (klabs(wall[w].x) > maxdist)
                                maxdist = klabs(wall[w].x);
                            if (klabs(wall[w].y) > maxdist)
                                maxdist = klabs(wall[w].y);
                        }
                        else
                        {
                            int32_t tmp = ksqrt(uhypsq(wall[w].x, wall[w].y));
                            if (tmp > maxdist)
                                maxdist = tmp;
                        }
                    }

                    // aliasing
                    if (wall[w].lotag>0 || wall[w].hitag>0)
                    {
                        int32_t ii;

                        if (wall[w].lotag>0 && wall[w].hitag>0)
                        {
                            if (wall[w].lotag > wall[w].hitag)
                                swapshort(&wall[w].lotag, &wall[w].hitag);

                            for (ii=wall[w].lotag; ii<wall[w].hitag; ii++)
                                tempictoidx[ii] = numclipmaps;
                        }
                        else if (wall[w].lotag>0)
                        {
                            if (wall[w].lotag<MAXTILES)
                                tempictoidx[wall[w].lotag] = numclipmaps;
                        }
                        else
                        {
                            if (wall[w].hitag<MAXTILES)
                                tempictoidx[wall[w].hitag] = numclipmaps;
                        }
                    }

                    CM_WALL_X(w) = wall[w].x;
                    CM_WALL_Y(w) = wall[w].y;
                }
            }

            clipinfo[numclipmaps].maxdist = maxdist;
            numclipmaps++;
        }
    }

    // yes, too much copying, but better than ugly code
    Bmemcpy(loadsector, sector, ournumsectors*sizeof(sectortype));
    Bmemcpy(loadwall, wall, ournumwalls*sizeof(walltype));

    // loadwallinv will contain all walls with inverted orientation for x/y-flip handling
    loadwallinv = (twalltype *)Xmalloc(ournumwalls*sizeof(walltype));

    {
        int32_t j, loopstart, loopend, numloopwalls;

        // invert walls!
        loopstart = 0;
        for (j=0; j<ournumwalls; j++)
        {
            wall[j].nextsector = wall[j].nextwall = -1;

            if (wall[j].point2 < j)
            {
                loopend = j+1;
                numloopwalls = loopend-loopstart;

                if (numloopwalls<3)
                {
                    loopstart = loopend;
                    continue;
                }

                for (k=0; k<numloopwalls; k++)
                {
                    wall[loopstart+k].x = loadwall[loopstart + (numloopwalls+1-k)%numloopwalls].x;
                    wall[loopstart+k].y = loadwall[loopstart + (numloopwalls+1-k)%numloopwalls].y;

                    CM_WALL_X(loopstart+k) = wall[loopstart+k].x;
                    CM_WALL_Y(loopstart+k) = wall[loopstart+k].y;
                }

                loopstart = loopend;
            }
        }

        // reconstruct wall connections
        for (i=0; i<ournumsectors; i++)
        {
            for (j=sector[i].wallptr; j<sector[i].wallptr+sector[i].wallnum; j++)
                checksectorpointer(j, i);
        }
    }
    Bmemcpy(loadwallinv, wall, ournumwalls*sizeof(walltype));

    clipmapinfo.numsectors = numsectors;
    clipmapinfo.sector = loadsector;
    clipmapinfo.numwalls = numwalls;
    clipmapinfo.wall = loadwall;

    for (i=0; i<MAXTILES; i++)
    {
        if (pictoidx[i]==-1 && tempictoidx[i]>=0)
            pictoidx[i]=tempictoidx[i];
    }

    Bfree(loadsprite); loadsprite=NULL;
    Bfree(tempictoidx); tempictoidx=NULL;

    // don't let other code be distracted by the temporary map we constructed
    numsectors = 0;
    numwalls = 0;
    initspritelists();

    if (lwcp > 0)
        initprintf("Loaded clip map%s.\n", lwcp==1?"":"s");

    Bfree(fisec);
    Bfree(fispr);

    return 0;
}


int32_t clipshape_idx_for_sprite(spritetype *curspr, int32_t curidx)
{
    if (curidx < 0)  // per-sprite init
        curidx = pictoidx[curspr->picnum];
    else
        curidx = clipinfo[curidx].next;

    while (curidx>=0 && (curspr->cstat&32) != (sector[sectq[clipinfo[curidx].qbeg]].CM_CSTAT&32))
        curidx = clipinfo[curidx].next;

    return curidx;
}
#else
int32_t clipshape_idx_for_sprite(spritetype *curspr, int32_t curidx)
{
    UNREFERENCED_PARAMETER(curspr);
    UNREFERENCED_PARAMETER(curidx);
    return -1;
}
#endif  // HAVE_CLIPSHAPE_FEATURE
////// //////

#define WALLS_ARE_CONSISTENT(k) ((wall[k].x == x2 && wall[k].y == y2)   \
                                 && ((wall[wall[k].point2]).x == x1 && (wall[wall[k].point2]).y == y1))


static int32_t getscore(int32_t w1c, int32_t w1f, int32_t w2c, int32_t w2f)
{
    if (w1c > w1f)
        swaplong(&w1c, &w1f);
    if (w2c > w2f)
        swaplong(&w2c, &w2f);

    // now: c <= f for each "wall-vline"

    int32_t maxceil = max(w1c, w2c);
    int32_t minflor = min(w1f, w2f);

    return minflor-maxceil;
}

const int16_t *chsecptr_onextwall = NULL;

int32_t checksectorpointer(int16_t i, int16_t sectnum)
{
    int32_t startsec, endsec;
    int32_t j, k, startwall, endwall, x1, y1, x2, y2, numnewwalls=0;
    int32_t bestnextwall=-1, bestnextsec=-1, bestwallscore=INT32_MIN;
    int32_t cz[4], fz[4], tmp[2], tmpscore=0;
#ifdef YAX_ENABLE
    int16_t cb[2], fb[2];
#endif

#if 0
    if (checksectorpointer_warn && (i<0 || i>=max(numwalls,newnumwalls)))
    {
        char buf[128];
        Bsprintf(buf, "WARN: checksectorpointer called with i=%d but (new)numwalls=%d", i, max(numwalls,newnumwalls));
        OSD_Printf("%s\n", buf);
        printmessage16("%s", buf);
        return 0;
    }
#endif

    x1 = wall[i].x;
    y1 = wall[i].y;
    x2 = (wall[wall[i].point2]).x;
    y2 = (wall[wall[i].point2]).y;

    k = wall[i].nextwall;
    if (k >= 0)          //Check for early exit
    {
        if (WALLS_ARE_CONSISTENT(k))
            return 0;

        wall[k].nextwall = wall[k].nextsector = -1;
    }

    if ((unsigned)wall[i].nextsector < (unsigned)numsectors && wall[i].nextwall < 0)
    {
        // if we have a nextsector but no nextwall, take this as a hint
        // to search only the walls of that sector
        startsec = wall[i].nextsector;
        endsec = startsec+1;
    }
    else
    {
        startsec = 0;
        endsec = numsectors;
    }

    wall[i].nextsector = wall[i].nextwall = -1;

    if (chsecptr_onextwall && (k=chsecptr_onextwall[i])>=0 && wall[k].nextwall<0)
    {
        // old next wall found
        if (WALLS_ARE_CONSISTENT(k))
        {
            j = sectorofwall(k);

            wall[i].nextsector = j;
            wall[i].nextwall = k;
            wall[k].nextsector = sectnum;
            wall[k].nextwall = i;

            return 1;
        }
    }

    for (j=startsec; j<endsec; j++)
    {
        if (j == sectnum)
            continue;

        YAX_SKIPSECTOR(j);

        startwall = sector[j].wallptr;
        endwall = startwall + sector[j].wallnum;
        for (k=startwall; k<endwall; k++)
        {
            if (!WALLS_ARE_CONSISTENT(k))
                continue;

            // Don't create link if the other side is connected to another wall.
            // The nextwall relation should be definitely one-to-one at all times!
            if (wall[k].nextwall>=0 && wall[k].nextwall != i)
                continue;
#ifdef YAX_ENABLE
            yax_getbunches(sectnum, &cb[0], &fb[0]);
            yax_getbunches(j, &cb[1], &fb[1]);

            if ((cb[0]>=0 && cb[0]==cb[1]) || (fb[0]>=0 && fb[0]==fb[1]))
            {
                tmpscore = INT32_MAX;
            }
            else
#endif
            {
                getzsofslope(sectnum, x1,y1, &cz[0],&fz[0]);
                getzsofslope(sectnum, x2,y2, &cz[1],&fz[1]);
                getzsofslope(j, x1,y1, &cz[2],&fz[2]);
                getzsofslope(j, x2,y2, &cz[3],&fz[3]);

                tmp[0] = getscore(cz[0],fz[0], cz[2],fz[2]);
                tmp[1] = getscore(cz[1],fz[1], cz[3],fz[3]);

                if ((tmp[0]^tmp[1]) >= 0)
                    tmpscore = tmp[0]+tmp[1];
                else
                    tmpscore = max(tmp[0], tmp[1]);
            }

            if (bestnextwall == -1 || tmpscore > bestwallscore)
            {
                bestwallscore = tmpscore;
                bestnextwall = k;
                bestnextsec = j;
            }

            numnewwalls++;
        }
    }

    // sectnum -2 means dry run
    if (bestnextwall >= 0 && sectnum!=-2)
#ifdef YAX_ENABLE
        // for walls with TROR neighbors, be conservative in case if score <=0
        // (meaning that no wall area is mutually visible) -- it could be that
        // another sector is a better candidate later on
        if ((yax_getnextwall(i, 0)<0 && yax_getnextwall(i, 1)<0) || bestwallscore>0)
#endif
        {
//    initprintf("w%d new nw=%d (score %d)\n", i, bestnextwall, bestwallscore)
            wall[i].nextsector = bestnextsec;
            wall[i].nextwall = bestnextwall;
            wall[bestnextwall].nextsector = sectnum;
            wall[bestnextwall].nextwall = i;
        }

    return numnewwalls;
}

#undef WALLS_ARE_CONSISTENT


#if defined(_MSC_VER) && !defined(NOASM)

//
// Microsoft C Inline Assembly Routines
//

static inline int32_t nsqrtasm(int32_t a)
{
    _asm
    {
        push ebx
        mov eax, a
        test eax, 0xff000000
        mov ebx, eax
        jnz short over24
        shr ebx, 12
        mov cx, word ptr shlookup[ebx*2]
        jmp short under24
        over24:
        shr ebx, 24
        mov cx, word ptr shlookup[ebx*2+8192]
        under24:
        shr eax, cl
        mov cl, ch
        mov ax, word ptr sqrtable[eax*2]
        shr eax, cl
        pop ebx
    }
}

static inline int32_t msqrtasm(int32_t c)
{
    _asm
    {
        push ebx
        mov ecx, c
        mov eax, 0x40000000
        mov ebx, 0x20000000
        begit:
        cmp ecx, eax
        jl skip
        sub ecx, eax
        lea eax, [eax+ebx*4]
        skip:
        sub eax, ebx
        shr eax, 1
        shr ebx, 2
        jnz begit
        cmp ecx, eax
        sbb eax, -1
        shr eax, 1
        pop ebx
    }
}

static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d)
{
    _asm
    {
        push ebx
        mov eax, a
        mov ebx, b
        mov ecx, c
        mov edx, d
        sar eax, 31
        add ebx, ebx
        adc eax, eax
        add ecx, ecx
        adc eax, eax
        add edx, edx
        adc eax, eax
        mov ebx, eax
        shl ebx, 4
        or al, 0xf0
        xor eax, ebx
        pop ebx
    }
}

static inline int32_t getkensmessagecrc(void *b)
{
    _asm
    {
        push ebx
        mov ebx, b
        xor eax, eax
        mov ecx, 32
        beg:
        mov edx, dword ptr [ebx+ecx*4-4]
        ror edx, cl
        adc eax, edx
        bswap eax
        loop short beg
        pop ebx
    }
}

#elif defined(__GNUC__) && defined(__i386__) && !defined(NOASM) // _MSC_VER

//
// GCC "Inline" Assembly Routines
//

#define nsqrtasm(a) \
    ({ int32_t __r, __a=(a); \
       __asm__ __volatile__ ( \
        "testl $0xff000000, %%eax\n\t" \
        "movl %%eax, %%ebx\n\t" \
        "jnz 0f\n\t" \
        "shrl $12, %%ebx\n\t" \
        "movw " ASMSYM("shlookup") "(,%%ebx,2), %%cx\n\t" \
        "jmp 1f\n\t" \
        "0:\n\t" \
        "shrl $24, %%ebx\n\t" \
        "movw (" ASMSYM("shlookup") "+8192)(,%%ebx,2), %%cx\n\t" \
        "1:\n\t" \
        "shrl %%cl, %%eax\n\t" \
        "movb %%ch, %%cl\n\t" \
        "movw " ASMSYM("sqrtable") "(,%%eax,2), %%ax\n\t" \
        "shrl %%cl, %%eax" \
        : "=a" (__r) : "a" (__a) : "ebx", "ecx", "cc"); \
     __r; })


// edx is blown by this code somehow?!
#define msqrtasm(c) \
    ({ int32_t __r, __c=(c); \
       __asm__ __volatile__ ( \
        "movl $0x40000000, %%eax\n\t" \
        "movl $0x20000000, %%ebx\n\t" \
        "0:\n\t" \
        "cmpl %%eax, %%ecx\n\t" \
        "jl 1f\n\t" \
        "subl %%eax, %%ecx\n\t" \
        "leal (%%eax,%%ebx,4), %%eax\n\t" \
        "1:\n\t" \
        "subl %%ebx, %%eax\n\t" \
        "shrl $1, %%eax\n\t" \
        "shrl $2, %%ebx\n\t" \
        "jnz 0b\n\t" \
        "cmpl %%eax, %%ecx\n\t" \
        "sbbl $-1, %%eax\n\t" \
        "shrl $1, %%eax" \
        : "=a" (__r) : "c" (__c) : "edx","ebx", "cc"); \
     __r; })


#define getclipmask(a,b,c,d) \
    ({ int32_t __a=(a), __b=(b), __c=(c), __d=(d); \
       __asm__ __volatile__ ("sarl $31, %%eax; addl %%ebx, %%ebx; adcl %%eax, %%eax; " \
                "addl %%ecx, %%ecx; adcl %%eax, %%eax; addl %%edx, %%edx; " \
                "adcl %%eax, %%eax; movl %%eax, %%ebx; shl $4, %%ebx; " \
                "orb $0xf0, %%al; xorl %%ebx, %%eax" \
        : "=a" (__a), "=b" (__b), "=c" (__c), "=d" (__d) \
        : "a" (__a), "b" (__b), "c" (__c), "d" (__d) : "cc"); \
     __a; })



#define getkensmessagecrc(b) \
    ({ int32_t __a, __b=(b); \
       __asm__ __volatile__ ( \
        "xorl %%eax, %%eax\n\t" \
        "movl $32, %%ecx\n\t" \
        "0:\n\t" \
        "movl -4(%%ebx,%%ecx,4), %%edx\n\t" \
        "rorl %%cl, %%edx\n\t" \
        "adcl %%edx, %%eax\n\t" \
        "bswapl %%eax\n\t" \
        "loop 0b" \
        : "=a" (__a) : "b" (__b) : "ecx", "edx" \
     __a; })


#else   // __GNUC__ && __i386__

static inline int32_t nsqrtasm(uint32_t a)
{
    // JBF 20030901: This was a damn lot simpler to reverse engineer than
    // msqrtasm was. Really, it was just like simplifying an algebra equation.
    uint16_t c;

    if (a & 0xff000000)                         // test eax, 0xff000000  /  jnz short over24
    {
        c = shlookup[(a >> 24) + 4096]; // mov ebx, eax
        // over24: shr ebx, 24
        // mov cx, word ptr shlookup[ebx*2+8192]
    }
    else
    {
        c = shlookup[a >> 12];          // mov ebx, eax
        // shr ebx, 12
        // mov cx, word ptr shlookup[ebx*2]
        // jmp short under24
    }
    a >>= c&0xff;                               // under24: shr eax, cl
    a = (a&0xffff0000)|(sqrtable[a]);   // mov ax, word ptr sqrtable[eax*2]
    a >>= ((c&0xff00) >> 8);            // mov cl, ch
    // shr eax, cl
    return a;
}

static inline int32_t msqrtasm(uint32_t c)
{
    uint32_t a,b;

    a = 0x40000000l;            // mov eax, 0x40000000
    b = 0x20000000l;            // mov ebx, 0x20000000
    do                                  // begit:
    {
        if (c >= a)             // cmp ecx, eax  /  jl skip
        {
            c -= a;             // sub ecx, eax
            a += b*4;   // lea eax, [eax+ebx*4]
        }                       // skip:
        a -= b;                 // sub eax, ebx
        a >>= 1;                // shr eax, 1
        b >>= 2;                // shr ebx, 2
    }
    while (b);                  // jnz begit
    if (c >= a)                 // cmp ecx, eax
        a++;                    // sbb eax, -1
    a >>= 1;                    // shr eax, 1
    return a;
}

static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d)
{
    // Ken did this
    d = ((a<0)<<3) + ((b<0)<<2) + ((c<0)<<1) + (d<0);
    return(((d<<4)^0xf0)|d);
}

inline int32_t getkensmessagecrc(int32_t b)
{
    UNREFERENCED_PARAMETER(b);
    return 0x56c764d4l;
}

#endif


int32_t xb1[MAXWALLSB];  // Polymost uses this as a temp array
static int32_t yb1[MAXWALLSB], xb2[MAXWALLSB], yb2[MAXWALLSB];
int32_t rx1[MAXWALLSB], ry1[MAXWALLSB];
static int32_t rx2[MAXWALLSB], ry2[MAXWALLSB];
int16_t bunchp2[MAXWALLSB], thesector[MAXWALLSB];

int16_t bunchfirst[MAXWALLSB], bunchlast[MAXWALLSB];

static int32_t nodesperline, ysavecnt;
static int16_t *smost, *umost, *dmost, *bakumost, *bakdmost;
static int16_t *uplc, *dplc, *uwall, *dwall;
static int32_t *swplc, *lplc, *swall, *lwall;
#ifdef HIGH_PRECISION_SPRITE
static float *swallf;
#endif

static int32_t smostcnt;
static int32_t smoststart[MAXWALLSB];
static char smostwalltype[MAXWALLSB];
static int32_t smostwall[MAXWALLSB], smostwallcnt = -1;

static vec3_t spritesxyz[MAXSPRITESONSCREEN+1];

int32_t xdimen = -1, xdimenrecip, halfxdimen, xdimenscale, xdimscale;
float fxdimen = -1.f;
int32_t ydimen;
static int32_t wx1, wy1, wx2, wy2;
intptr_t frameoffset;

static int32_t nrx1[8], nry1[8], nrx2[8], nry2[8]; // JBF 20031206: Thanks Ken

static int32_t rxi[8], ryi[8], rzi[8], rxi2[8], ryi2[8], rzi2[8];
static int32_t xsi[8], ysi[8], horizycent;
static int32_t *horizlookup=0, *horizlookup2=0;

int32_t globalposx, globalposy, globalposz, globalhoriz;
float fglobalposx, fglobalposy, fglobalposz;
int16_t globalang, globalcursectnum;
int32_t globalpal, cosglobalang, singlobalang;
static int32_t globalblend;
int32_t cosviewingrangeglobalang, sinviewingrangeglobalang;
static char *globalpalwritten;
static int32_t globaluclip, globaldclip;
int32_t globvis, globalvisibility;
int32_t globalhisibility, globalpisibility, globalcisibility;
//char globparaceilclip, globparaflorclip;

int32_t xyaspect;
static int32_t viewingrangerecip;

static char globalxshift, globalyshift;
static int32_t globalxpanning, globalypanning;
int32_t globalshade, globalorientation;
int16_t globalpicnum;
static int16_t globalshiftval;
#ifdef HIGH_PRECISION_SPRITE
static int64_t globalzd;
#else
static int32_t globalzd;
#endif
static int32_t globalyscale;
static int32_t globalxspan, globalyspan, globalispow2=1;  // true if texture has power-of-two x and y size
static intptr_t globalbufplc;

static int32_t globaly1, globalx2;

int16_t sectorborder[256];
int32_t ydim16, qsetmode = 0;
int16_t pointhighlight=-1, linehighlight=-1, highlightcnt=0;
static int32_t *lastx;

static char paletteloaded = 0;

int32_t halfxdim16, midydim16;

#define FASTPALGRIDSIZ 8
static int32_t rdist[129], gdist[129], bdist[129];
static char colhere[((FASTPALGRIDSIZ+2)*(FASTPALGRIDSIZ+2)*(FASTPALGRIDSIZ+2))>>3];
static char colhead[(FASTPALGRIDSIZ+2)*(FASTPALGRIDSIZ+2)*(FASTPALGRIDSIZ+2)];
static int32_t colnext[256];
static char coldist[8] = {0,1,2,3,4,3,2,1};
static int32_t colscan[27];

static int16_t clipnum;
static const int32_t hitscangoalx = (1<<29)-1, hitscangoaly = (1<<29)-1;
#ifdef USE_OPENGL
int32_t hitallsprites = 0;
#endif

typedef struct { int32_t x1, y1, x2, y2; } linetype;
static linetype clipit[MAXCLIPNUM];
static int32_t clipsectnum, origclipsectnum, clipspritenum;
static int16_t clipsectorlist[MAXCLIPNUM], origclipsectorlist[MAXCLIPNUM];
#ifdef HAVE_CLIPSHAPE_FEATURE
static int16_t clipspritelist[MAXCLIPNUM];  // sector-like sprite clipping
#endif
static int16_t clipobjectval[MAXCLIPNUM];

typedef struct
{
    int32_t sx, sy, z;
    int16_t a, picnum;
    int8_t dashade;
    char dapalnum, dastat;
    uint8_t daalpha, dablend;
    char pagesleft;
    int32_t cx1, cy1, cx2, cy2;
    int32_t uniqid;    //JF extension
} permfifotype;
static permfifotype permfifo[MAXPERMS];
static int32_t permhead = 0, permtail = 0;

EDUKE32_STATIC_ASSERT(MAXWALLSB < INT16_MAX);
int16_t numscans, numbunches;
static int16_t numhits;
int16_t capturecount = 0;

char vgapal16[4*256] =
{
    00,00,00,00, 42,00,00,00, 00,42,00,00, 42,42,00,00, 00,00,42,00,
    42,00,42,00, 00,21,42,00, 42,42,42,00, 21,21,21,00, 63,21,21,00,
    21,63,21,00, 63,63,21,00, 21,21,63,00, 63,21,63,00, 21,63,63,00,
    63,63,63,00
};

int16_t searchit;
int32_t searchx = -1, searchy;                          //search input
int16_t searchsector, searchwall, searchstat;     //search output

// SEARCHBOTTOMWALL:
//   When aiming at a the bottom part of a 2-sided wall whose bottom part
//   is swapped (.cstat&2), searchbottomwall equals that wall's .nextwall. In all
//   other cases (when aiming at a wall), searchbottomwall equals searchwall.
//
// SEARCHISBOTTOM:
//  When aiming at a 2-sided wall, 1 if aiming at the bottom part, 0 else
int16_t searchbottomwall, searchisbottom;

static char artfilename[20];
static char mapartfilename[BMAX_PATH];  // map-specific ART file name
static int32_t mapartfnXXofs;  // byte offset to 'XX' (the number part) in the above
static int32_t artfil = -1, artfilnum, artfilplc;

char inpreparemirror = 0;
static int32_t mirrorsx1, mirrorsy1, mirrorsx2, mirrorsy2;

static int32_t setviewcnt = 0; // interface layers use this now
static int32_t bakframeplace[4], bakxsiz[4], bakysiz[4];
static int32_t bakwindowx1[4], bakwindowy1[4];
static int32_t bakwindowx2[4], bakwindowy2[4];
#ifdef USE_OPENGL
static int32_t bakrendmode;
#endif
static int32_t baktile;

char apptitle[256] = "Build Engine";

static uint8_t basepalreset=1;
uint8_t basepalcount;
uint8_t curbasepal;

static uint32_t g_lastpalettesum = 0;
palette_t curpalette[256];                      // the current palette, unadjusted for brightness or tint
palette_t curpalettefaded[256];         // the current palette, adjusted for brightness and tint (ie. what gets sent to the card)
palette_t palfadergb = { 0,0,0,0 };
char palfadedelta = 0;



//
// Internal Engine Functions
//

// blendtable[1] to blendtable[numalphatabs] are considered to be
// alpha-blending tables:
static uint8_t numalphatabs;

static char *blendtable[MAXBLENDTABS];
#define getblendtab(blend) (blendtable[blend])

static void setpalettefade_calc(uint8_t offset);

void fade_screen_black(int32_t moreopaquep)
{
#ifdef USE_OPENGL
    if (getrendermode() >= REND_POLYMOST)
        fullscreen_tint_gl(0,0,0, moreopaquep ? 168 : 84);
    else
#endif
    {
        Bassert(!offscreenrendering);

        begindrawing();
        {
            char *const p = (char *)frameplace;
            const char *const trans = getblendtab(0);
            const int32_t shiftamnt = ((!!moreopaquep)*8);
            const int32_t dimprod = xdim*ydim;
            int32_t i = 0;

#ifdef CLASSIC_SLICE_BY_4
            for (; i<dimprod-4; i+=4)
            {
                p[i] = trans[p[i]<<shiftamnt];
                p[i+1] = trans[p[i+1]<<shiftamnt];
                p[i+2] = trans[p[i+2]<<shiftamnt];
                p[i+3] = trans[p[i+3]<<shiftamnt];
            }
#endif

            for (; i<dimprod; i++)
                p[i] = trans[p[i]<<shiftamnt];
        }
        enddrawing();
    }
}


// returns: 0=continue sprite collecting;
//          1=break out of sprite collecting;
int32_t engine_addtsprite(int16_t z, int16_t sectnum)
{
    spritetype *spr = &sprite[z];
#ifdef YAX_ENABLE
    int16_t cb, fb, *sortcnt;
    int32_t spheight, spzofs;

    if (g_nodraw==0)
    {
        if (numyaxbunches==0)
        {
#endif
            if (spritesortcnt >= MAXSPRITESONSCREEN)
                return 1;

            Bmemcpy(&tsprite[spritesortcnt], spr, sizeof(spritetype));
            tsprite[spritesortcnt++].owner = z;

#ifdef YAX_ENABLE
        }
    }
    else
        if (yax_nomaskpass==0)
    {
        sortcnt = &yax_spritesortcnt[yax_globallev];
        if (*sortcnt >= MAXSPRITESONSCREEN)
            return 1;

        yax_tsprite[yax_globallev][*sortcnt] = z;
        if (yax_globalbunch >= 0)
        {
            yax_tsprite[yax_globallev][*sortcnt] |= (MAXSPRITES|(MAXSPRITES<<1));
            yax_tsprfrombunch[yax_globallev][*sortcnt] = yax_globalbunch;
        }
        (*sortcnt)++;

        // now check whether the tsprite needs duplication into another level
        if ((spr->cstat&48)==32)
            return 0;

        yax_getbunches(sectnum, &cb, &fb);
        if (cb < 0 && fb < 0)
            return 0;

        spzofs = spriteheightofs(z, &spheight, 1);

        // TODO: get*zofslope?
        if (cb>=0 && spr->z+spzofs-spheight < sector[sectnum].ceilingz)
        {
            sortcnt = &yax_spritesortcnt[yax_globallev-1];
            if (*sortcnt < MAXSPRITESONSCREEN)
            {
                yax_tsprite[yax_globallev-1][*sortcnt] = z|MAXSPRITES;
                (*sortcnt)++;
            }
        }
        if (fb>=0 && spr->z+spzofs > sector[sectnum].floorz)
        {
            sortcnt = &yax_spritesortcnt[yax_globallev+1];
            if (*sortcnt < MAXSPRITESONSCREEN)
            {
                yax_tsprite[yax_globallev+1][*sortcnt] = z|(MAXSPRITES<<1);
                (*sortcnt)++;
            }
        }
    }
#endif

    return 0;
}

static inline vec2_t get_rel_coords(int32_t const x, int32_t const y)
{
    vec2_t const p = {
        dmulscale6(y,cosglobalang, -x,singlobalang),
        dmulscale6(x,cosviewingrangeglobalang, y,sinviewingrangeglobalang)
    };

    return p;
}

// Note: the returned y coordinates are not actually screen coordinates, but
// potentially clipped player-relative y coordinates.
static int get_screen_coords(const vec2_t p1, const vec2_t p2,
                             int32_t *sx1ptr, int32_t *sy1ptr,
                             int32_t *sx2ptr, int32_t *sy2ptr)
{
    int32_t sx1, sy1, sx2, sy2;

    // First point.

    if (p1.x >= -p1.y)
    {
        if (p1.x > p1.y || p1.y == 0)
            return 0;

        sx1 = halfxdimen + scale(p1.x, halfxdimen, p1.y)
            + (p1.x >= 0);  // Fix for SIGNED divide
        if (sx1 >= xdimen)
            sx1 = xdimen-1;

        sy1 = p1.y;
    }
    else
    {
        if (p2.x < -p2.y)
            return 0;

        sx1 = 0;

        int32_t tempint = (p1.x + p1.y) - (p2.x + p2.y);
        if (tempint == 0)
            return 0;
        sy1 = p1.y + scale(p2.y-p1.y, p1.x+p1.y, tempint);
    }

    if (sy1 < 256)
        return 0;

    // Second point.

    if (p2.x <= p2.y)
    {
        if (p2.x < -p2.y || p2.y == 0)
            return 0;

        sx2 = halfxdimen + scale(p2.x,halfxdimen,p2.y) - 1
            + (p2.x >= 0);  // Fix for SIGNED divide
        if (sx2 >= xdimen)
            sx2 = xdimen-1;

        sy2 = p2.y;
    }
    else
    {
        if (p1.x > p1.y)
            return 0;

        sx2 = xdimen-1;

        int32_t tempint = (p1.y - p1.x) + (p2.x - p2.y);
        if (tempint == 0)
            return 0;
        sy2 = p1.y + scale(p2.y-p1.y, p1.y-p1.x, tempint);
    }

    if (sy2 < 256 || sx1 > sx2)
        return 0;

    *sx1ptr = sx1; *sy1ptr = sy1;
    *sx2ptr = sx2; *sy2ptr = sy2;

    return 1;
}

//
// scansector (internal)
//
static void scansector(int16_t startsectnum)
{
    int32_t sectorbordercnt;

    if (startsectnum < 0)
        return;

    sectorborder[0] = startsectnum, sectorbordercnt = 1;

    do
    {
        const int32_t sectnum = sectorborder[--sectorbordercnt];

#ifdef YAX_ENABLE
        if (scansector_collectsprites)
#endif
        for (int32_t i=headspritesect[sectnum]; i>=0; i=nextspritesect[i])
        {
            const spritetype *const spr = &sprite[i];

            if (((spr->cstat&0x8000) == 0 || showinvisibility) &&
                    spr->xrepeat > 0 && spr->yrepeat > 0)
            {
                int32_t xs = spr->x-globalposx, ys = spr->y-globalposy;

                if ((spr->cstat&48) || ((coord_t)xs*cosglobalang+(coord_t)ys*singlobalang > 0))
                    if ((spr->cstat&(64+48))!=(64+16) || dmulscale6(sintable[(spr->ang+512)&2047],-xs, sintable[spr->ang&2047],-ys) > 0)
                        if (engine_addtsprite(i, sectnum))
                            break;
            }
        }

        gotsector[sectnum>>3] |= pow2char[sectnum&7];

        const int32_t onumbunches = numbunches;
        const int32_t onumscans = numscans;

        const int32_t startwall = sector[sectnum].wallptr;
        const int32_t endwall = startwall + sector[sectnum].wallnum;
        int32_t scanfirst = numscans;

        vec2_t p1, p2 = { 0, 0 };

        for (int32_t w=startwall; w<endwall; w++)
        {
            const walltype *const wal = &wall[w];
            const int32_t nextsectnum = wal->nextsector;
            const walltype *const wal2 = &wall[wal->point2];

            const int32_t x1 = wal->x-globalposx, y1 = wal->y-globalposy;
            const int32_t x2 = wal2->x-globalposx, y2 = wal2->y-globalposy;

            // The following block checks for a potential collection of a
            // sector that is "thin" in screen space. This is necessary because
            // not all sectors that are needed to be drawn may be collected via
            // drawalls() -> scansector() (although those are the majority).
            // Example: standing at exactly the intersection of a large sector
            // into four quadrant sub-sectors.
#if 1
            if (nextsectnum >= 0 && (wal->cstat&32) == 0 && sectorbordercnt < ARRAY_SSIZE(sectorborder))
#ifdef YAX_ENABLE
                if (yax_nomaskpass==0 || !yax_isislandwall(w, !yax_globalcf) || (yax_nomaskdidit=1, 0))
#endif
                if ((gotsector[nextsectnum>>3]&pow2char[nextsectnum&7]) == 0)
                {
                    // OV: E2L10
                    coord_t temp = (coord_t)x1*y2-(coord_t)x2*y1;
                    int32_t tempint = temp;
                    if (((uint64_t)tempint+262144) < 524288)  // BXY_MAX?
                        if (mulscale5(tempint,tempint) <= (x2-x1)*(x2-x1)+(y2-y1)*(y2-y1))
                        {
                            sectorborder[sectorbordercnt++] = nextsectnum;
                            gotsector[nextsectnum>>3] |= pow2char[nextsectnum&7];
                        }
                }
#endif
            p1 = (w == startwall || wall[w - 1].point2 != w) ? get_rel_coords(x1, y1) : p2;
            p2 = get_rel_coords(x2, y2);

            if (p1.y < 256 && p2.y < 256)
                goto skipitaddwall;

            // If wall's NOT facing you
            if (dmulscale32(p1.x, p2.y, -p2.x, p1.y) >= 0)
                goto skipitaddwall;

            if (get_screen_coords(p1, p2, &xb1[numscans], &yb1[numscans], &xb2[numscans], &yb2[numscans]))
            {
                // Made it all the way!
                thesector[numscans] = sectnum; thewall[numscans] = w;
                rx1[numscans] = p1.x; ry1[numscans] = p1.y;
                rx2[numscans] = p2.x; ry2[numscans] = p2.y;
                bunchp2[numscans] = numscans+1;
                numscans++;
            }

skipitaddwall:
            if (wall[w].point2 < w && scanfirst < numscans)
                bunchp2[numscans-1] = scanfirst, scanfirst = numscans;
        }

        for (int32_t s=onumscans; s<numscans; s++)
            if (wall[thewall[s]].point2 != thewall[bunchp2[s]] || xb2[s] >= xb1[bunchp2[s]])
            {
                bunchfirst[numbunches++] = bunchp2[s], bunchp2[s] = -1;
#ifdef YAX_ENABLE
                if (scansector_retfast)
                    return;
#endif
            }

        for (int32_t bn=onumbunches; bn<numbunches; bn++)
        {
            int32_t s;
            for (s=bunchfirst[bn]; bunchp2[s]>=0; s=bunchp2[s])
                /* do nothing */;
            bunchlast[bn] = s;
        }
    }
    while (sectorbordercnt > 0);
}

#if DEBUGGINGAIDS >= 2
// Printing functions for collected scans (called "wall proxies" by
// http://fabiensanglard.net/duke3d/build_engine_internals.php) and
// bunches. For use from within the debugger.

void printscans(void)
{
    static uint8_t didscan[(MAXWALLSB+7)>>3];

    Bmemset(didscan, 0, sizeof(didscan));

    for (int s=0; s<numscans; s++)
    {
        if (bunchp2[s] >= 0 && (didscan[s>>3] & (1<<(s&7)))==0)
        {
            printf("scan ");

            int z = s;
            do
            {
                const int cond = (wall[thewall[z]].point2 != thewall[bunchp2[z]] ||
                                  xb2[z] >= xb1[bunchp2[z]]);

                printf("%s%d(%d) ", cond ? "!" : "", z, thewall[z]);

                if (didscan[z>>3] & (1<<(z&7)))
                {
                    printf("*");
                    break;
                }

                didscan[z>>3] |= (1<<(z&7));
                z = bunchp2[z];
            } while (z >= 0);

            printf("\n");
        }
    }
}

void printbunches(void)
{
    for (int bn=0; bn<numbunches; bn++)
    {
        printf("bunch %d: ", bn);
        for (int s=bunchfirst[bn]; s>=0; s=bunchp2[s])
            printf("%d(%d) ", s, thewall[s]);
        printf("\n");
    }
}
#endif

////////// *WALLSCAN HELPERS //////////

#define WSHELPER_DECL inline //ATTRIBUTE((always_inline))

static WSHELPER_DECL void tweak_tsizes(vec2_t *tsiz)
{
    if (pow2long[picsiz[globalpicnum]&15] == tsiz->x)
        tsiz->x--;
    else
        tsiz->x = -tsiz->x;

    if (pow2long[picsiz[globalpicnum]>>4] == tsiz->y)
        tsiz->y = (picsiz[globalpicnum]>>4);
    else
        tsiz->y = -tsiz->y;
}

static WSHELPER_DECL void calc_bufplc(intptr_t *bufplc, int32_t lw, vec2_t tsiz)
{
    // CAUTION: lw can be negative!
    int32_t i = lw + globalxpanning;

//    if (i >= tsizx)
    {
        if (tsiz.x < 0)
            i = (uint32_t)i % -tsiz.x;
        else
            i &= tsiz.x;
    }

    if (tsiz.y < 0)
        i *= -tsiz.y;
    else
        i <<= tsiz.y;

//    Bassert(i >= 0 && i < tilesiz[globalpicnum].x*tilesiz[globalpicnum].y);

    // Address is at the first row of tile storage (which is column-major).
    *bufplc = waloff[globalpicnum] + i;
}

static WSHELPER_DECL void calc_vplcinc_wall(uint32_t *vplc, int32_t *vinc, inthi_t sw, int32_t y1v)
{
    *vinc = sw*globalyscale;
    *vplc = globalzd + (uint32_t)(*vinc)*(y1v-globalhoriz+1);
}

#ifdef HIGH_PRECISION_SPRITE
static WSHELPER_DECL void calc_vplcinc_sprite(uint32_t *vplc, int32_t *vinc, int32_t x, int32_t y1v)
{
    inthi_t tmpvinc = Blrintf(swallf[x]);
    inthi_t tmpvplc = globalzd + tmpvinc*(y1v-globalhoriz+1);

    *vinc = tmpvinc;
    // Clamp the vertical texture coordinate!
    *vplc = min(max(0, tmpvplc), UINT32_MAX);
}
#endif

static int32_t drawing_sprite = 0;

static WSHELPER_DECL void calc_vplcinc(uint32_t *vplc, int32_t *vinc, const int32_t *swal, int32_t x, int32_t y1v)
{
#if !defined HIGH_PRECISION_SPRITE
    (void)drawing_sprite;
#else
    if (drawing_sprite)
        calc_vplcinc_sprite(vplc, vinc, x, y1v);
    else
#endif
        calc_vplcinc_wall(vplc, vinc, swal[x], y1v);
}

#undef NONPOW2_YSIZE_ASM
#if !defined ENGINE_USING_A_C
# if defined CLASSIC_NONPOW2_YSIZE_WALLS || defined CLASSIC_NONPOW2_YSIZE_SPRITES
#  define NONPOW2_YSIZE_ASM
# endif
#endif


//
// maskwallscan (internal)
//
static void maskwallscan(int32_t x1, int32_t x2, int32_t saturatevplc)
{
    int32_t x;
    intptr_t p, fpalookup;
    int32_t y1ve[4], y2ve[4];
#ifdef MULTI_COLUMN_VLINE
    char bad;
    int32_t u4, d4, dax, z;
#endif
    vec2_t tsiz;
    setgotpic(globalpicnum);
    if (globalshiftval < 0)
        return;

    tsiz = tilesiz[globalpicnum];

    if ((tsiz.x <= 0) || (tsiz.y <= 0)) return;
    if ((uwall[x1] > ydimen) && (uwall[x2] > ydimen)) return;
    if ((dwall[x1] < 0) && (dwall[x2] < 0)) return;

    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);

    tweak_tsizes(&tsiz);

    if (EDUKE32_PREDICT_FALSE(palookup[globalpal] == NULL))
        globalpal = 0;

    fpalookup = FP_OFF(palookup[globalpal]);

    setupmvlineasm(globalshiftval, saturatevplc);


    x = x1;
    while ((x <= x2) && (startumost[x+windowx1] > startdmost[x+windowx1]))
        x++;

    p = x+frameoffset;

#ifdef NONPOW2_YSIZE_ASM
    if (globalshiftval==0)
        goto do_mvlineasm1;
#endif

#ifdef MULTI_COLUMN_VLINE
    for (; (x<=x2)&&(p&3); x++,p++)
    {
        y1ve[0] = max(uwall[x],startumost[x+windowx1]-windowy1);
        y2ve[0] = min(dwall[x],startdmost[x+windowx1]-windowy1);
        if (y2ve[0] <= y1ve[0]) continue;

        palookupoffse[0] = fpalookup + getpalookupsh(mulscale16(swall[x],globvis));

        calc_bufplc(&bufplce[0], lwall[x], tsiz);
        calc_vplcinc(&vplce[0], &vince[0], swall, x, y1ve[0]);

        mvlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0],p+ylookup[y1ve[0]]);
    }
    for (; x<=x2-3; x+=4,p+=4)
    {
        intptr_t pp;

        bad = 0;
        for (z=3,dax=x+3; z>=0; z--,dax--)
        {
            y1ve[z] = max(uwall[dax],startumost[dax+windowx1]-windowy1);
            y2ve[z] = min(dwall[dax],startdmost[dax+windowx1]-windowy1)-1;
            if (y2ve[z] < y1ve[z]) { bad += pow2char[z]; continue; }

            calc_bufplc(&bufplce[z], lwall[dax], tsiz);
            calc_vplcinc(&vplce[z], &vince[z], swall, dax, y1ve[z]);
        }
        if (bad == 15) continue;

        palookupoffse[0] = fpalookup + getpalookupsh(mulscale16(swall[x],globvis));
        palookupoffse[3] = fpalookup + getpalookupsh(mulscale16(swall[x+3],globvis));

        if ((palookupoffse[0] == palookupoffse[3]) && ((bad&0x9) == 0))
        {
            palookupoffse[1] = palookupoffse[0];
            palookupoffse[2] = palookupoffse[0];
        }
        else
        {
            palookupoffse[1] = fpalookup + getpalookupsh(mulscale16(swall[x+1],globvis));
            palookupoffse[2] = fpalookup + getpalookupsh(mulscale16(swall[x+2],globvis));
        }

        u4 = max(max(y1ve[0],y1ve[1]),max(y1ve[2],y1ve[3]));
        d4 = min(min(y2ve[0],y2ve[1]),min(y2ve[2],y2ve[3]));

        if ((bad > 0) || (u4 >= d4))
        {
            if (!(bad&1)) mvlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0],vplce[0],bufplce[0],ylookup[y1ve[0]]+p+0);
            if (!(bad&2)) mvlineasm1(vince[1],palookupoffse[1],y2ve[1]-y1ve[1],vplce[1],bufplce[1],ylookup[y1ve[1]]+p+1);
            if (!(bad&4)) mvlineasm1(vince[2],palookupoffse[2],y2ve[2]-y1ve[2],vplce[2],bufplce[2],ylookup[y1ve[2]]+p+2);
            if (!(bad&8)) mvlineasm1(vince[3],palookupoffse[3],y2ve[3]-y1ve[3],vplce[3],bufplce[3],ylookup[y1ve[3]]+p+3);
            continue;
        }

        if (u4 > y1ve[0]) vplce[0] = mvlineasm1(vince[0],palookupoffse[0],u4-y1ve[0]-1,vplce[0],bufplce[0],ylookup[y1ve[0]]+p+0);
        if (u4 > y1ve[1]) vplce[1] = mvlineasm1(vince[1],palookupoffse[1],u4-y1ve[1]-1,vplce[1],bufplce[1],ylookup[y1ve[1]]+p+1);
        if (u4 > y1ve[2]) vplce[2] = mvlineasm1(vince[2],palookupoffse[2],u4-y1ve[2]-1,vplce[2],bufplce[2],ylookup[y1ve[2]]+p+2);
        if (u4 > y1ve[3]) vplce[3] = mvlineasm1(vince[3],palookupoffse[3],u4-y1ve[3]-1,vplce[3],bufplce[3],ylookup[y1ve[3]]+p+3);

        if (d4 >= u4) mvlineasm4(d4-u4+1, (char *)(ylookup[u4]+p));

        pp = p+ylookup[d4+1];
        if (y2ve[0] > d4) mvlineasm1(vince[0],palookupoffse[0],y2ve[0]-d4-1,vplce[0],bufplce[0],pp+0);
        if (y2ve[1] > d4) mvlineasm1(vince[1],palookupoffse[1],y2ve[1]-d4-1,vplce[1],bufplce[1],pp+1);
        if (y2ve[2] > d4) mvlineasm1(vince[2],palookupoffse[2],y2ve[2]-d4-1,vplce[2],bufplce[2],pp+2);
        if (y2ve[3] > d4) mvlineasm1(vince[3],palookupoffse[3],y2ve[3]-d4-1,vplce[3],bufplce[3],pp+3);
    }
#endif

#ifdef NONPOW2_YSIZE_ASM
do_mvlineasm1:
#endif
    for (; x<=x2; x++,p++)
    {
        y1ve[0] = max(uwall[x],startumost[x+windowx1]-windowy1);
        y2ve[0] = min(dwall[x],startdmost[x+windowx1]-windowy1);
        if (y2ve[0] <= y1ve[0]) continue;

        palookupoffse[0] = fpalookup + getpalookupsh(mulscale16(swall[x],globvis));

        calc_bufplc(&bufplce[0], lwall[x], tsiz);
        calc_vplcinc(&vplce[0], &vince[0], swall, x, y1ve[0]);

#ifdef NONPOW2_YSIZE_ASM
        if (globalshiftval==0)
            mvlineasm1nonpow2(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0],p+ylookup[y1ve[0]]);
        else
#endif
        mvlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0],p+ylookup[y1ve[0]]);
    }

    faketimerhandler();
}


//
// wallfront (internal)
//
int32_t wallfront(int32_t l1, int32_t l2)
{
    vec2_t l1vect   = *(vec2_t *)&wall[thewall[l1]];
    vec2_t l1p2vect = *(vec2_t *)&wall[wall[thewall[l1]].point2];
    vec2_t l2vect   = *(vec2_t *)&wall[thewall[l2]];
    vec2_t l2p2vect = *(vec2_t *)&wall[wall[thewall[l2]].point2];
    int32_t dx = l1p2vect.x-l1vect.x;
    int32_t dy = l1p2vect.y-l1vect.y;
    int32_t t1 = dmulscale2(l2vect.x-l1vect.x, dy, -dx, l2vect.y-l1vect.y); //p1(l2) vs. l1
    int32_t t2 = dmulscale2(l2p2vect.x-l1vect.x, dy, -dx, l2p2vect.y-l1vect.y); //p2(l2) vs. l1

    if (t1 == 0) { if (t2 == 0) return -1; t1 = t2; }
    if (t2 == 0) t2 = t1;

    if ((t1^t2) >= 0) //pos vs. l1
        return (dmulscale2(globalposx-l1vect.x, dy, -dx, globalposy-l1vect.y) ^ t1) >= 0;

    dx = l2p2vect.x-l2vect.x;
    dy = l2p2vect.y-l2vect.y;

    t1 = dmulscale2(l1vect.x-l2vect.x, dy, -dx, l1vect.y-l2vect.y); //p1(l1) vs. l2
    t2 = dmulscale2(l1p2vect.x-l2vect.x, dy, -dx, l1p2vect.y-l2vect.y); //p2(l1) vs. l2

    if (t1 == 0) { if (t2 == 0) return -1; t1 = t2; }
    if (t2 == 0) t2 = t1;

    if ((t1^t2) >= 0) //pos vs. l2
        return (dmulscale2(globalposx-l2vect.x,dy,-dx,globalposy-l2vect.y) ^ t1) < 0;

    return -2;
}

//
// spritewallfront (internal)
//
static inline int32_t spritewallfront(const tspritetype *s, int32_t w)
{
    const walltype *const wal = &wall[w];
    const walltype *wal2 = &wall[wal->point2];
    const int32_t x1 = wal->x, y1 = wal->y;

    return (dmulscale32(wal2->x-x1, s->y-y1, -(s->x-x1), wal2->y-y1) >= 0);
}

//
//  spritebehindwall(internal)
//
#if 0
static int32_t spriteobstructswall(spritetype *s, int32_t w)
{
    walltype *wal;
    int32_t x, y;
    int32_t x1, y1;
    int32_t x2, y2;
    double a1, b1, c1;
    double a2, b2, c2;
    double d1, d2;

    // wall line equation
    wal = &wall[w]; x1 = wal->x - globalposx; y1 = wal->y - globalposy;
    wal = &wall[wal->point2]; x2 = wal->x - globalposx; y2 = wal->y - globalposy;
    if ((x2 - x1) != 0)
        a1 = (float)(y2 - y1)/(x2 - x1);
    else
        a1 = 1e+37; // not infinite, but almost ;)
    b1 = -1;
    c1 = (y1 - (a1 * x1));

    // player to sprite line equation
    if ((s->x - globalposx) != 0)
        a2 = (float)(s->y - globalposy)/(s->x - globalposx);
    else
        a2 = 1e+37;
    b2 = -1;
    c2 = 0;

    // intersection point
    d1 = (float)(1) / (a1*b2 - a2*b1);
    x = ((b1*c2 - b2*c1) * d1);
    y = ((a2*c1 - a1*c2) * d1);

    // distance between the sprite and the player
    a1 = s->x - globalposx;
    b1 = s->y - globalposy;
    d1 = (a1 * a1 + b1 * b1);

    // distance between the intersection point and the player
    d2 = (x * x + y * y);

    // check if the sprite obstructs the wall
    if ((d1 < d2) && (min(x1, x2) <= x) && (x <= max(x1, x2)) && (min(y1, y2) <= y) && (y <= max(y1, y2)))
        return (1);
    else
        return (0);
}
#endif
//
// bunchfront (internal)
//
static inline int32_t bunchfront(int32_t b1, int32_t b2)
{
    int32_t x1b1, x2b1, x1b2, x2b2, b1f, b2f, i;

    b1f = bunchfirst[b1]; x1b1 = xb1[b1f]; x2b2 = xb2[bunchlast[b2]]+1;
    if (x1b1 >= x2b2) return(-1);
    b2f = bunchfirst[b2]; x1b2 = xb1[b2f]; x2b1 = xb2[bunchlast[b1]]+1;
    if (x1b2 >= x2b1) return(-1);

    if (x1b1 >= x1b2)
    {
        for (i=b2f; xb2[i]<x1b1; i=bunchp2[i]);
        return(wallfront(b1f,i));
    }
    for (i=b1f; xb2[i]<x1b2; i=bunchp2[i]);
    return(wallfront(i,b2f));
}


//
// hline (internal)
//
static inline void hline(int32_t xr, int32_t yp)
{
    int32_t xl, r, s;

    xl = lastx[yp]; if (xl > xr) return;
    r = horizlookup2[yp-globalhoriz+horizycent];
    asm1 = (inthi_t)globalx1*r;
    asm2 = (inthi_t)globaly2*r;
    s = getpalookupsh(mulscale16(r,globvis));

    hlineasm4(xr-xl,0,s,(uint32_t)globalx2*r+globalypanning,(uint32_t)globaly1*r+globalxpanning,
              ylookup[yp]+xr+frameoffset);
}


//
// slowhline (internal)
//
static inline void slowhline(int32_t xr, int32_t yp)
{
    int32_t xl, r;

    xl = lastx[yp]; if (xl > xr) return;
    r = horizlookup2[yp-globalhoriz+horizycent];
    asm1 = (inthi_t)globalx1*r;
    asm2 = (inthi_t)globaly2*r;

    asm3 = (intptr_t)globalpalwritten + getpalookupsh(mulscale16(r,globvis));
    if (!(globalorientation&256))
    {
        mhline(globalbufplc,(uint32_t)globaly1*r+globalxpanning-asm1*(xr-xl),(xr-xl)<<16,0L,
               (uint32_t)globalx2*r+globalypanning-asm2*(xr-xl),ylookup[yp]+xl+frameoffset);
        return;
    }
    thline(globalbufplc,(uint32_t)globaly1*r+globalxpanning-asm1*(xr-xl),(xr-xl)<<16,0L,
           (uint32_t)globalx2*r+globalypanning-asm2*(xr-xl),ylookup[yp]+xl+frameoffset);
}


//
// prepwall (internal)
//
static void prepwall(int32_t z, const walltype *wal)
{
    int32_t l=0, ol=0, x;

    int32_t walxrepeat = (wal->xrepeat<<3);

    //lwall calculation
    int32_t tmpx = xb1[z]-halfxdimen;

    const int32_t topinc = -(ry1[z]>>2);
    const int32_t botinc = (ry2[z]-ry1[z])>>8;
    int32_t top = mulscale5(rx1[z],xdimen) + mulscale2(topinc,tmpx);
    int32_t bot = mulscale11(rx1[z]-rx2[z],xdimen) + mulscale2(botinc,tmpx);

    const int32_t splc = mulscale19(ry1[z],xdimscale);
    const int32_t sinc = mulscale16(ry2[z]-ry1[z],xdimscale);

    x = xb1[z];
    if (bot != 0)
    {
        l = divscale12(top,bot);
        swall[x] = mulscale21(l,sinc)+splc;
        l *= walxrepeat;
        lwall[x] = (l>>18);
    }
    while (x+4 <= xb2[z])
    {
        int32_t i;

        top += topinc; bot += botinc;
        if (bot != 0)
        {
            ol = l; l = divscale12(top,bot);
            swall[x+4] = mulscale21(l,sinc)+splc;
            l *= walxrepeat;
            lwall[x+4] = (l>>18);
        }

        i = (ol+l)>>1;

        lwall[x+2] = i>>18;
        lwall[x+1] = (ol+i)>>19;
        lwall[x+3] = (l+i)>>19;

        swall[x+2] = (swall[x]+swall[x+4])>>1;
        swall[x+1] = (swall[x]+swall[x+2])>>1;
        swall[x+3] = (swall[x+4]+swall[x+2])>>1;

        x += 4;
    }
    if (x+2 <= xb2[z])
    {
        top += (topinc>>1); bot += (botinc>>1);
        if (bot != 0)
        {
            ol = l; l = divscale12(top,bot);
            swall[x+2] = mulscale21(l,sinc)+splc;
            l *= walxrepeat;
            lwall[x+2] = (l>>18);
        }
        lwall[x+1] = (l+ol)>>19;
        swall[x+1] = (swall[x]+swall[x+2])>>1;
        x += 2;
    }
    if (x+1 <= xb2[z])
    {
        bot += (botinc>>2);
        if (bot != 0)
        {
            l = divscale12(top+(topinc>>2),bot);
            swall[x+1] = mulscale21(l,sinc)+splc;
            lwall[x+1] = mulscale18(l,walxrepeat);
        }
    }

    if (lwall[xb1[z]] < 0)
        lwall[xb1[z]] = 0;
    if (lwall[xb2[z]] >= walxrepeat && walxrepeat)
        lwall[xb2[z]] = walxrepeat-1;

    if (wal->cstat&8)
    {
        walxrepeat--;
        for (x=xb1[z]; x<=xb2[z]; x++)
            lwall[x] = walxrepeat-lwall[x];
    }
}


//
// animateoffs (internal)
//
#ifdef DEBUGGINGAIDS
int32_t animateoffs(int const tilenum, int fakevar)
#else
int32_t animateoffs(int const tilenum)
#endif
{
#ifdef DEBUGGINGAIDS
    UNREFERENCED_PARAMETER(fakevar);
#endif

    int const animnum = picanm[tilenum].num;

    if (animnum <= 0)
        return 0;

    int const i = totalclocklock >> (picanm[tilenum].sf & PICANM_ANIMSPEED_MASK);
    int offs = 0;

    switch (picanm[tilenum].sf & PICANM_ANIMTYPE_MASK)
    {
        case PICANM_ANIMTYPE_OSC:
        {
            int k = (i % (animnum << 1));
            offs = (k < animnum) ? k : (animnum << 1) - k;
        }
        break;
        case PICANM_ANIMTYPE_FWD: offs = i % (animnum + 1); break;
        case PICANM_ANIMTYPE_BACK: offs = -(i % (animnum + 1)); break;
    }

    return offs;
}


static inline void wallmosts_finish(int16_t *mostbuf, int32_t z1, int32_t z2,
                                    int32_t ix1, int32_t iy1, int32_t ix2, int32_t iy2)
{
    const int32_t y = scale(z1, xdimenscale, iy1)<<4;

#if 0
    // enable for paranoia:
    ix1 = clamp(ix1, 0, xres-1);
    ix2 = clamp(ix2, 0, xres-1);
    if (ix2-ix1 < 0)
        swaplong(&ix1, &ix2);
#endif
    // PK 20110423: a bit consistency checking is a good thing:
    int32_t tmp = (ix2 - ix1 >= 0) ? (ix2 - ix1 + 1) : 1;
    int32_t yinc = tabledivide32((scale(z2, xdimenscale, iy2) << 4) - y, tmp);

    qinterpolatedown16short((intptr_t)&mostbuf[ix1], tmp, y + (globalhoriz << 16), yinc);

    if (mostbuf[ix1] < 0)
        mostbuf[ix1] = 0;
    else if (mostbuf[ix1] > ydimen)
        mostbuf[ix1] = ydimen;

    if (mostbuf[ix2] < 0)
        mostbuf[ix2] = 0;
    else if (mostbuf[ix2] > ydimen)
        mostbuf[ix2] = ydimen;
}

#ifdef CLASSIC_Z_DIFF_64
typedef int64_t zint_t;

// For drawvox()
static inline zint_t mulscale16z(int32_t a, int32_t d)
{
    return ((zint_t)a * d)>>16;
}

static inline zint_t mulscale20z(int32_t a, int32_t d)
{
    return ((zint_t)a * d)>>20;
}

static inline zint_t dmulscale24z(int32_t a, int32_t d, int32_t S, int32_t D)
{
    return (((zint_t)a * d) + ((zint_t)S * D)) >> 24;
}
#else
typedef int32_t zint_t;
# define mulscale16z mulscale16
# define mulscale20z mulscale20
# define dmulscale24z dmulscale24
#endif

//
// owallmost (internal)
//
static int32_t owallmost(int16_t *mostbuf, int32_t w, zint_t z)
{
    z <<= 7;
    const zint_t s1 = mulscale20z(globaluclip,yb1[w]), s2 = mulscale20z(globaluclip,yb2[w]);
    const zint_t s3 = mulscale20z(globaldclip,yb1[w]), s4 = mulscale20z(globaldclip,yb2[w]);
    const int32_t bad = (z<s1)+((z<s2)<<1)+((z>s3)<<2)+((z>s4)<<3);

    int32_t ix1 = xb1[w], iy1 = yb1[w];
    int32_t ix2 = xb2[w], iy2 = yb2[w];

    if ((bad&3) == 3)
    {
        for (int i=ix1; i<=ix2; i++)
            mostbuf[i] = 0;
        return bad;
    }

    if ((bad&12) == 12)
    {
        for (int i=ix1; i<=ix2; i++)
            mostbuf[i] = ydimen;
        return bad;
    }

    if (bad&3)
    {
        int32_t t = divscale30(z-s1,s2-s1);
        int32_t inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        int32_t xcross = xb1[w] + scale(mulscale30(yb2[w],t),xb2[w]-xb1[w],inty);

        if ((bad&3) == 2)
        {
            if (xb1[w] <= xcross) { iy2 = inty; ix2 = xcross; }
            for (int i=xcross+1; i<=xb2[w]; i++)
                mostbuf[i] = 0;
        }
        else
        {
            if (xcross <= xb2[w]) { iy1 = inty; ix1 = xcross; }
            for (int i=xb1[w]; i<=xcross; i++)
                mostbuf[i] = 0;
        }
    }

    if (bad&12)
    {
        int32_t t = divscale30(z-s3,s4-s3);
        int32_t inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        int32_t xcross = xb1[w] + scale(mulscale30(yb2[w],t),xb2[w]-xb1[w],inty);

        if ((bad&12) == 8)
        {
            if (xb1[w] <= xcross) { iy2 = inty; ix2 = xcross; }
            for (int i=xcross+1; i<=xb2[w]; i++)
                mostbuf[i] = ydimen;
        }
        else
        {
            if (xcross <= xb2[w]) { iy1 = inty; ix1 = xcross; }
            for (int i=xb1[w]; i<=xcross; i++)
                mostbuf[i] = ydimen;
        }
    }

    wallmosts_finish(mostbuf, z, z, ix1, iy1, ix2, iy2);

    return bad;
}

static inline zint_t wallmost_getz(int32_t fw, int32_t t, zint_t z,
                                   int32_t x1, int32_t y1, int32_t x2, int32_t y2,
                                   int32_t xv, int32_t yv, int32_t dx, int32_t dy)
{
    // XXX: OVERFLOW with huge sectors and sloped ceilngs/floors!
    int32_t i = xv*(y1-globalposy) - yv*(x1-globalposx);
    const int32_t j = yv*x2 - xv*y2;

    if (klabs(j) > klabs(i>>3))
        i = divscale28(i,j);

    return dmulscale24z(dx*t, mulscale20z(y2,i)+((y1-wall[fw].y)<<8),
                        -dy*t, mulscale20z(x2,i)+((x1-wall[fw].x)<<8)) + ((z-globalposz)<<7);
}

//
// wallmost (internal)
//
static int32_t wallmost(int16_t *mostbuf, int32_t w, int32_t sectnum, char dastat)
{
    int32_t t, z;
    int32_t xv, yv;

    if (dastat == 0)
    {
        z = sector[sectnum].ceilingz-globalposz;
        if ((sector[sectnum].ceilingstat&2) == 0)
            return owallmost(mostbuf,w,z);
    }
    else
    {
        z = sector[sectnum].floorz-globalposz;
        if ((sector[sectnum].floorstat&2) == 0)
            return owallmost(mostbuf,w,z);
    }

    const int wi = thewall[w];
    if (wi == sector[sectnum].wallptr)
        return owallmost(mostbuf,w,z);

    const walltype *const wal = &wall[wi];
    const int32_t x1 = wal->x, x2 = wall[wal->point2].x-x1;
    const int32_t y1 = wal->y, y2 = wall[wal->point2].y-y1;

    const int w1 = sector[sectnum].wallptr, w2 = wall[w1].point2;
    const int32_t dx = wall[w2].x-wall[w1].x, dy = wall[w2].y-wall[w1].y;
    const int32_t dasqr = krecipasm(nsqrtasm(uhypsq(dx,dy)));

    if (dastat == 0)
    {
        t = mulscale15(sector[sectnum].ceilingheinum, dasqr);
        z = sector[sectnum].ceilingz;
    }
    else
    {
        t = mulscale15(sector[sectnum].floorheinum,dasqr);
        z = sector[sectnum].floorz;
    }


    if (xb1[w] == 0)
        { xv = cosglobalang+sinviewingrangeglobalang; yv = singlobalang-cosviewingrangeglobalang; }
    else
        { xv = x1-globalposx; yv = y1-globalposy; }
    zint_t z1 = wallmost_getz(w1, t, z, x1, y1, x2, y2, xv, yv, dx, dy);


    if (xb2[w] == xdimen-1)
        { xv = cosglobalang-sinviewingrangeglobalang; yv = singlobalang+cosviewingrangeglobalang; }
    else
        { xv = (x2+x1)-globalposx; yv = (y2+y1)-globalposy; }
    zint_t z2 = wallmost_getz(w1, t, z, x1, y1, x2, y2, xv, yv, dx, dy);


    const zint_t s1 = mulscale20(globaluclip,yb1[w]), s2 = mulscale20(globaluclip,yb2[w]);
    const zint_t s3 = mulscale20(globaldclip,yb1[w]), s4 = mulscale20(globaldclip,yb2[w]);
    const int32_t bad = (z1<s1)+((z2<s2)<<1)+((z1>s3)<<2)+((z2>s4)<<3);

    int32_t ix1 = xb1[w], ix2 = xb2[w];
    int32_t iy1 = yb1[w], iy2 = yb2[w];

    if ((bad&3) == 3)
    {
        for (int i=ix1; i<=ix2; i++)
            mostbuf[i] = 0;
        return bad;
    }

    if ((bad&12) == 12)
    {
        for (int i=ix1; i<=ix2; i++)
            mostbuf[i] = ydimen;
        return bad;
    }

    const int32_t oz1 = z1, oz2 = z2;

    if (bad&3)
    {
        //inty = intz / (globaluclip>>16)
        t = divscale30(oz1-s1,s2-s1+oz1-oz2);
        int32_t inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        int32_t intz = oz1 + mulscale30(oz2-oz1,t);
        int32_t xcross = xb1[w] + scale(mulscale30(yb2[w],t),xb2[w]-xb1[w],inty);

        //t = divscale30((x1<<4)-xcross*yb1[w],xcross*(yb2[w]-yb1[w])-((x2-x1)<<4));
        //inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        //intz = z1 + mulscale30(z2-z1,t);

        if ((bad&3) == 2)
        {
            if (xb1[w] <= xcross) { z2 = intz; iy2 = inty; ix2 = xcross; }
            for (int i=xcross+1; i<=xb2[w]; i++)
                mostbuf[i] = 0;
        }
        else
        {
            if (xcross <= xb2[w]) { z1 = intz; iy1 = inty; ix1 = xcross; }
            for (int i=xb1[w]; i<=xcross; i++)
                mostbuf[i] = 0;
        }
    }

    if (bad&12)
    {
        //inty = intz / (globaldclip>>16)
        t = divscale30(oz1-s3,s4-s3+oz1-oz2);
        int32_t inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        int32_t intz = oz1 + mulscale30(oz2-oz1,t);
        int32_t xcross = xb1[w] + scale(mulscale30(yb2[w],t),xb2[w]-xb1[w],inty);

        //t = divscale30((x1<<4)-xcross*yb1[w],xcross*(yb2[w]-yb1[w])-((x2-x1)<<4));
        //inty = yb1[w] + mulscale30(yb2[w]-yb1[w],t);
        //intz = z1 + mulscale30(z2-z1,t);

        if ((bad&12) == 8)
        {
            if (xb1[w] <= xcross) { z2 = intz; iy2 = inty; ix2 = xcross; }
            for (int i=xcross+1; i<=xb2[w]; i++)
                mostbuf[i] = ydimen;
        }
        else
        {
            if (xcross <= xb2[w]) { z1 = intz; iy1 = inty; ix1 = xcross; }
            for (int i=xb1[w]; i<=xcross; i++)
                mostbuf[i] = ydimen;
        }
    }

    wallmosts_finish(mostbuf, z1, z2, ix1, iy1, ix2, iy2);

    return bad;
}


// globalpicnum --> globalxshift, globalyshift
static void calc_globalshifts(void)
{
    globalxshift = (8-(picsiz[globalpicnum]&15));
    globalyshift = (8-(picsiz[globalpicnum]>>4));
    if (globalorientation&8) { globalxshift++; globalyshift++; }
    // PK: the following can happen for large (>= 512) tile sizes.
    // NOTE that global[xy]shift are unsigned chars.
    if (globalxshift > 31) globalxshift=0;
    if (globalyshift > 31) globalyshift=0;
}

static int32_t setup_globals_cf1(const sectortype *sec, int32_t pal, int32_t zd,
                                 int32_t picnum, int32_t shade, int32_t stat,
                                 int32_t xpanning, int32_t ypanning, int32_t x1)
{
    int32_t i, j, ox, oy;

    if (palookup[pal] != globalpalwritten)
    {
        globalpalwritten = palookup[pal];
        if (!globalpalwritten) globalpalwritten = palookup[globalpal];  // JBF: fixes null-pointer crash
        setpalookupaddress(globalpalwritten);
    }

    globalzd = zd;
    if (globalzd > 0) return 1;

    globalpicnum = picnum;
    if ((unsigned)globalpicnum >= MAXTILES) globalpicnum = 0;
    setgotpic(globalpicnum);
    if ((tilesiz[globalpicnum].x <= 0) || (tilesiz[globalpicnum].y <= 0)) return 1;
    DO_TILE_ANIM(globalpicnum, 0);

    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);
    globalbufplc = waloff[globalpicnum];

    globalshade = shade;
    globvis = globalcisibility;
    if (sec->visibility != 0) globvis = mulscale4(globvis, (uint8_t)(sec->visibility+16));
    globalorientation = stat;

    if ((globalorientation&64) == 0)
    {
        globalx1 = singlobalang; globalx2 = singlobalang;
        globaly1 = cosglobalang; globaly2 = cosglobalang;
        globalxpanning = ((inthi_t)globalposx<<20);
        globalypanning = -((inthi_t)globalposy<<20);
    }
    else
    {
        j = sec->wallptr;
        ox = wall[wall[j].point2].x - wall[j].x;
        oy = wall[wall[j].point2].y - wall[j].y;
        i = nsqrtasm(uhypsq(ox,oy)); if (i == 0) i = 1024; else i = tabledivide32(1048576, i);
        globalx1 = mulscale10(dmulscale10(ox,singlobalang,-oy,cosglobalang),i);
        globaly1 = mulscale10(dmulscale10(ox,cosglobalang,oy,singlobalang),i);
        globalx2 = -globalx1;
        globaly2 = -globaly1;

        ox = ((wall[j].x-globalposx)<<6); oy = ((wall[j].y-globalposy)<<6);
        i = dmulscale14(oy,cosglobalang,-ox,singlobalang);
        j = dmulscale14(ox,cosglobalang,oy,singlobalang);
        ox = i; oy = j;
        globalxpanning = (coord_t)globalx1*ox - (coord_t)globaly1*oy;
        globalypanning = (coord_t)globaly2*ox + (coord_t)globalx2*oy;
    }
    globalx2 = mulscale16(globalx2,viewingrangerecip);
    globaly1 = mulscale16(globaly1,viewingrangerecip);

    calc_globalshifts();

    if ((globalorientation&0x4) > 0)
    {
        i = globalxpanning; globalxpanning = globalypanning; globalypanning = i;
        i = globalx2; globalx2 = -globaly1; globaly1 = -i;
        i = globalx1; globalx1 = globaly2; globaly2 = i;
    }
    if ((globalorientation&0x10) > 0) globalx1 = -globalx1, globaly1 = -globaly1, globalxpanning = -(inthi_t)globalxpanning;
    if ((globalorientation&0x20) > 0) globalx2 = -globalx2, globaly2 = -globaly2, globalypanning = -(inthi_t)globalypanning;
    globalx1 <<= globalxshift; globaly1 <<= globalxshift;
    globalx2 <<= globalyshift;  globaly2 <<= globalyshift;
    globalxpanning <<= globalxshift; globalypanning <<= globalyshift;
    globalxpanning = (uint32_t)globalxpanning + (xpanning<<24);
    globalypanning = (uint32_t)globalypanning + (ypanning<<24);
    globaly1 = (-globalx1-globaly1)*halfxdimen;
    globalx2 = (globalx2-globaly2)*halfxdimen;

    sethlinesizes(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4,globalbufplc);

    globalx2 += globaly2*(x1-1);
    globaly1 += globalx1*(x1-1);
    globalx1 = mulscale16(globalx1,globalzd);
    globalx2 = mulscale16(globalx2,globalzd);
    globaly1 = mulscale16(globaly1,globalzd);
    globaly2 = mulscale16(globaly2,globalzd);
    globvis = klabs(mulscale10(globvis,globalzd));

    return 0;
}

static void setup_blend(int32_t blend, int32_t doreverse)
{
    if (blendtable[blend] == NULL)
        blend = 0;

    if (globalblend != blend)
    {
        globalblend = blend;
        fixtransluscence(FP_OFF(getblendtab(blend)));
    }

    if (doreverse)
        settransreverse();
    else
        settransnormal();
}

//
// ceilscan (internal)
//
static void ceilscan(int32_t x1, int32_t x2, int32_t sectnum)
{
    int32_t x, y1, y2;
    const sectortype *const sec = &sector[sectnum];

    if (setup_globals_cf1(sec, sec->ceilingpal, sec->ceilingz-globalposz,
                          sec->ceilingpicnum, sec->ceilingshade, sec->ceilingstat,
                          sec->ceilingxpanning, sec->ceilingypanning, x1))
        return;

    if (!(globalorientation&0x180))
    {
        y1 = umost[x1]; y2 = y1;
        for (x=x1; x<=x2; x++)
        {
            const int32_t twall = umost[x]-1;
            const int32_t bwall = min(uplc[x],dmost[x]);

            if (twall < bwall-1)
            {
                if (twall >= y2)
                {
                    while (y1 < y2-1) hline(x-1,++y1);
                    y1 = twall;
                }
                else
                {
                    while (y1 < twall) hline(x-1,++y1);
                    while (y1 > twall) lastx[y1--] = x;
                }
                while (y2 > bwall) hline(x-1,--y2);
                while (y2 < bwall) lastx[y2++] = x;
            }
            else
            {
                while (y1 < y2-1) hline(x-1,++y1);
                if (x == x2) { globalx2 += globaly2; globaly1 += globalx1; break; }
                y1 = umost[x+1]; y2 = y1;
            }
            globalx2 += globaly2; globaly1 += globalx1;
        }
        while (y1 < y2-1) hline(x2,++y1);
        faketimerhandler();
        return;
    }

    switch (globalorientation&0x180)
    {
    case 128:
        msethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    case 256:
        setup_blend(0, 0);
        tsethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    case 384:
        setup_blend(0, 0);
        tsethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    }

    y1 = umost[x1]; y2 = y1;
    for (x=x1; x<=x2; x++)
    {
        const int32_t twall = umost[x]-1;
        const int32_t bwall = min(uplc[x],dmost[x]);

        if (twall < bwall-1)
        {
            if (twall >= y2)
            {
                while (y1 < y2-1) slowhline(x-1,++y1);
                y1 = twall;
            }
            else
            {
                while (y1 < twall) slowhline(x-1,++y1);
                while (y1 > twall) lastx[y1--] = x;
            }
            while (y2 > bwall) slowhline(x-1,--y2);
            while (y2 < bwall) lastx[y2++] = x;
        }
        else
        {
            while (y1 < y2-1) slowhline(x-1,++y1);
            if (x == x2) { globalx2 += globaly2; globaly1 += globalx1; break; }
            y1 = umost[x+1]; y2 = y1;
        }
        globalx2 += globaly2; globaly1 += globalx1;
    }
    while (y1 < y2-1) slowhline(x2,++y1);
    faketimerhandler();
}


//
// florscan (internal)
//
static void florscan(int32_t x1, int32_t x2, int32_t sectnum)
{
     int32_t x, y1, y2;
     const sectortype *const sec = &sector[sectnum];

     if (setup_globals_cf1(sec, sec->floorpal, globalposz-sec->floorz,
                           sec->floorpicnum, sec->floorshade, sec->floorstat,
                           sec->floorxpanning, sec->floorypanning, x1))
         return;

    if (!(globalorientation&0x180))
    {
        y1 = max(dplc[x1],umost[x1]); y2 = y1;
        for (x=x1; x<=x2; x++)
        {
            const int32_t twall = max(dplc[x],umost[x])-1;
            const int32_t bwall = dmost[x];

            if (twall < bwall-1)
            {
                if (twall >= y2)
                {
                    while (y1 < y2-1) hline(x-1,++y1);
                    y1 = twall;
                }
                else
                {
                    while (y1 < twall) hline(x-1,++y1);
                    while (y1 > twall) lastx[y1--] = x;
                }
                while (y2 > bwall) hline(x-1,--y2);
                while (y2 < bwall) lastx[y2++] = x;
            }
            else
            {
                while (y1 < y2-1) hline(x-1,++y1);
                if (x == x2) { globalx2 += globaly2; globaly1 += globalx1; break; }
                y1 = max(dplc[x+1],umost[x+1]); y2 = y1;
            }
            globalx2 += globaly2; globaly1 += globalx1;
        }
        while (y1 < y2-1) hline(x2,++y1);
        faketimerhandler();
        return;
    }

    switch (globalorientation&0x180)
    {
    case 128:
        msethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    case 256:
        setup_blend(0, 0);
        tsethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    case 384:
        setup_blend(0, 1);
        tsethlineshift(picsiz[globalpicnum]&15,picsiz[globalpicnum]>>4);
        break;
    }

    y1 = max(dplc[x1],umost[x1]); y2 = y1;
    for (x=x1; x<=x2; x++)
    {
        const int32_t twall = max(dplc[x],umost[x])-1;
        const int32_t bwall = dmost[x];

        if (twall < bwall-1)
        {
            if (twall >= y2)
            {
                while (y1 < y2-1) slowhline(x-1,++y1);
                y1 = twall;
            }
            else
            {
                while (y1 < twall) slowhline(x-1,++y1);
                while (y1 > twall) lastx[y1--] = x;
            }
            while (y2 > bwall) slowhline(x-1,--y2);
            while (y2 < bwall) lastx[y2++] = x;
        }
        else
        {
            while (y1 < y2-1) slowhline(x-1,++y1);
            if (x == x2) { globalx2 += globaly2; globaly1 += globalx1; break; }
            y1 = max(dplc[x+1],umost[x+1]); y2 = y1;
        }
        globalx2 += globaly2; globaly1 += globalx1;
    }
    while (y1 < y2-1) slowhline(x2,++y1);
    faketimerhandler();
}


//
// wallscan (internal)
//
static void wallscan(int32_t x1, int32_t x2,
                     const int16_t *uwal, const int16_t *dwal,
                     const int32_t *swal, const int32_t *lwal)
{
    int32_t x;
    intptr_t fpalookup;
    int32_t y1ve[4], y2ve[4];
    vec2_t tsiz;
#ifdef MULTI_COLUMN_VLINE
    char bad;
    int32_t u4, d4, z;
    uintptr_t p;
#endif

#ifdef YAX_ENABLE
    if (g_nodraw)
        return;
#endif
    setgotpic(globalpicnum);
    if (globalshiftval < 0)
        return;

    if (x2 >= xdim)
        x2 = xdim-1;
    assert((unsigned)x1 < (unsigned)xdim);

    tsiz = tilesiz[globalpicnum];

    if ((tsiz.x <= 0) || (tsiz.y <= 0)) return;
    if ((uwal[x1] > ydimen) && (uwal[x2] > ydimen)) return;
    if ((dwal[x1] < 0) && (dwal[x2] < 0)) return;

    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);

    tweak_tsizes(&tsiz);

    fpalookup = FP_OFF(palookup[globalpal]);

    setupvlineasm(globalshiftval);


    x = x1;
    while ((x <= x2) && (umost[x] > dmost[x]))
        x++;

#ifdef NONPOW2_YSIZE_ASM
    if (globalshiftval==0)
        goto do_vlineasm1;
#endif

#ifdef MULTI_COLUMN_VLINE
    for (; (x<=x2)&&((x+frameoffset)&3); x++)
    {
        y1ve[0] = max(uwal[x],umost[x]);
        y2ve[0] = min(dwal[x],dmost[x]);
        if (y2ve[0] <= y1ve[0]) continue;

        palookupoffse[0] = fpalookup + getpalookupsh(mulscale16(swal[x],globvis));

        calc_bufplc(&bufplce[0], lwal[x], tsiz);
        calc_vplcinc(&vplce[0], &vince[0], swal, x, y1ve[0]);

        vlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0],x+frameoffset+ylookup[y1ve[0]]);
    }
    for (; x<=x2-3; x+=4)
    {
        bad = 0;
        for (z=3; z>=0; z--)
        {
            y1ve[z] = max(uwal[x+z],umost[x+z]);
            y2ve[z] = min(dwal[x+z],dmost[x+z])-1;
            if (y2ve[z] < y1ve[z]) { bad += pow2char[z]; continue; }

            calc_bufplc(&bufplce[z], lwal[x+z], tsiz);
            calc_vplcinc(&vplce[z], &vince[z], swal, x+z, y1ve[z]);
        }
        if (bad == 15) continue;

        palookupoffse[0] = fpalookup + getpalookupsh(mulscale16(swal[x],globvis));
        palookupoffse[3] = fpalookup + getpalookupsh(mulscale16(swal[x+3],globvis));

        if ((palookupoffse[0] == palookupoffse[3]) && ((bad&0x9) == 0))
        {
            palookupoffse[1] = palookupoffse[0];
            palookupoffse[2] = palookupoffse[0];
        }
        else
        {
            palookupoffse[1] = fpalookup + getpalookupsh(mulscale16(swal[x+1],globvis));
            palookupoffse[2] = fpalookup + getpalookupsh(mulscale16(swal[x+2],globvis));
        }

        u4 = max(max(y1ve[0],y1ve[1]),max(y1ve[2],y1ve[3]));
        d4 = min(min(y2ve[0],y2ve[1]),min(y2ve[2],y2ve[3]));

        if ((bad != 0) || (u4 >= d4))
        {
            if (!(bad&1)) prevlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0],vplce[0],bufplce[0],ylookup[y1ve[0]]+x+frameoffset+0);
            if (!(bad&2)) prevlineasm1(vince[1],palookupoffse[1],y2ve[1]-y1ve[1],vplce[1],bufplce[1],ylookup[y1ve[1]]+x+frameoffset+1);
            if (!(bad&4)) prevlineasm1(vince[2],palookupoffse[2],y2ve[2]-y1ve[2],vplce[2],bufplce[2],ylookup[y1ve[2]]+x+frameoffset+2);
            if (!(bad&8)) prevlineasm1(vince[3],palookupoffse[3],y2ve[3]-y1ve[3],vplce[3],bufplce[3],ylookup[y1ve[3]]+x+frameoffset+3);
            continue;
        }

        if (u4 > y1ve[0]) vplce[0] = prevlineasm1(vince[0],palookupoffse[0],u4-y1ve[0]-1,vplce[0],bufplce[0],ylookup[y1ve[0]]+x+frameoffset+0);
        if (u4 > y1ve[1]) vplce[1] = prevlineasm1(vince[1],palookupoffse[1],u4-y1ve[1]-1,vplce[1],bufplce[1],ylookup[y1ve[1]]+x+frameoffset+1);
        if (u4 > y1ve[2]) vplce[2] = prevlineasm1(vince[2],palookupoffse[2],u4-y1ve[2]-1,vplce[2],bufplce[2],ylookup[y1ve[2]]+x+frameoffset+2);
        if (u4 > y1ve[3]) vplce[3] = prevlineasm1(vince[3],palookupoffse[3],u4-y1ve[3]-1,vplce[3],bufplce[3],ylookup[y1ve[3]]+x+frameoffset+3);

        if (d4 >= u4) vlineasm4(d4-u4+1, (char *)(ylookup[u4]+x+frameoffset));

        p = x+frameoffset+ylookup[d4+1];
        if (y2ve[0] > d4) prevlineasm1(vince[0],palookupoffse[0],y2ve[0]-d4-1,vplce[0],bufplce[0],p+0);
        if (y2ve[1] > d4) prevlineasm1(vince[1],palookupoffse[1],y2ve[1]-d4-1,vplce[1],bufplce[1],p+1);
        if (y2ve[2] > d4) prevlineasm1(vince[2],palookupoffse[2],y2ve[2]-d4-1,vplce[2],bufplce[2],p+2);
        if (y2ve[3] > d4) prevlineasm1(vince[3],palookupoffse[3],y2ve[3]-d4-1,vplce[3],bufplce[3],p+3);
    }
#endif

#ifdef NONPOW2_YSIZE_ASM
do_vlineasm1:
#endif
    for (; x<=x2; x++)
    {
        y1ve[0] = max(uwal[x],umost[x]);
        y2ve[0] = min(dwal[x],dmost[x]);
        if (y2ve[0] <= y1ve[0]) continue;

        palookupoffse[0] = fpalookup + getpalookupsh(mulscale16(swal[x],globvis));

        calc_bufplc(&bufplce[0], lwal[x], tsiz);
        calc_vplcinc(&vplce[0], &vince[0], swal, x, y1ve[0]);

#ifdef NONPOW2_YSIZE_ASM
        if (globalshiftval==0)
            vlineasm1nonpow2(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0],x+frameoffset+ylookup[y1ve[0]]);
        else
#endif
        vlineasm1(vince[0],palookupoffse[0],y2ve[0]-y1ve[0]-1,vplce[0],bufplce[0],x+frameoffset+ylookup[y1ve[0]]);
    }

    faketimerhandler();
}

//
// transmaskvline (internal)
//
static void transmaskvline(int32_t x)
{
    uint32_t vplc;
    int32_t vinc;
    intptr_t palookupoffs;
    intptr_t bufplc,p;
    int32_t y1v, y2v;

    vec2_t ntsiz;

    if ((x < 0) || (x >= xdimen)) return;

    y1v = max(uwall[x],startumost[x+windowx1]-windowy1);
    y2v = min(dwall[x],startdmost[x+windowx1]-windowy1);
    y2v--;
    if (y2v < y1v) return;

    palookupoffs = FP_OFF(palookup[globalpal]) + getpalookupsh(mulscale16(swall[x],globvis));

    ntsiz.x = -tilesiz[globalpicnum].x;
    ntsiz.y = -tilesiz[globalpicnum].y;
    calc_bufplc(&bufplc, lwall[x], ntsiz);
    calc_vplcinc(&vplc, &vinc, swall, x, y1v);

    p = ylookup[y1v]+x+frameoffset;

#ifdef NONPOW2_YSIZE_ASM
    if (globalshiftval==0)
        tvlineasm1nonpow2(vinc,palookupoffs,y2v-y1v,vplc,bufplc,p);
    else
#endif
    tvlineasm1(vinc,palookupoffs,y2v-y1v,vplc,bufplc,p);
}


//
// transmaskvline2 (internal)
//
#ifdef MULTI_COLUMN_VLINE
static void transmaskvline2(int32_t x)
{
    int32_t y1, y2, x2;
    int32_t y1ve[2], y2ve[2];

    uintptr_t p;

    vec2_t ntsiz;

    if ((x < 0) || (x >= xdimen)) return;
    if (x == xdimen-1) { transmaskvline(x); return; }

    x2 = x+1;

    y1ve[0] = max(uwall[x],startumost[x+windowx1]-windowy1);
    y2ve[0] = min(dwall[x],startdmost[x+windowx1]-windowy1)-1;
    if (y2ve[0] < y1ve[0]) { transmaskvline(x2); return; }
    y1ve[1] = max(uwall[x2],startumost[x2+windowx1]-windowy1);
    y2ve[1] = min(dwall[x2],startdmost[x2+windowx1]-windowy1)-1;
    if (y2ve[1] < y1ve[1]) { transmaskvline(x); return; }

    palookupoffse[0] = FP_OFF(palookup[globalpal]) + getpalookupsh(mulscale16(swall[x],globvis));
    palookupoffse[1] = FP_OFF(palookup[globalpal]) + getpalookupsh(mulscale16(swall[x2],globvis));

    setuptvlineasm2(globalshiftval,palookupoffse[0],palookupoffse[1]);

    ntsiz.x = -tilesiz[globalpicnum].x;
    ntsiz.y = -tilesiz[globalpicnum].y;

    calc_bufplc(&bufplce[0], lwall[x], ntsiz);
    calc_bufplc(&bufplce[1], lwall[x2], ntsiz);
    calc_vplcinc(&vplce[0], &vince[0], swall, x, y1ve[0]);
    calc_vplcinc(&vplce[1], &vince[1], swall, x2, y1ve[1]);

    y1 = max(y1ve[0],y1ve[1]);
    y2 = min(y2ve[0],y2ve[1]);

    p = x+frameoffset;

    if (y1ve[0] != y1ve[1])
    {
        if (y1ve[0] < y1)
            vplce[0] = tvlineasm1(vince[0],palookupoffse[0],y1-y1ve[0]-1,vplce[0],bufplce[0],ylookup[y1ve[0]]+p);
        else
            vplce[1] = tvlineasm1(vince[1],palookupoffse[1],y1-y1ve[1]-1,vplce[1],bufplce[1],ylookup[y1ve[1]]+p+1);
    }

    if (y2 > y1)
    {
        asm1 = vince[1];
        asm2 = ylookup[y2]+p+1;
        tvlineasm2(vplce[1],vince[0],bufplce[0],bufplce[1],vplce[0],ylookup[y1]+p);
    }
    else
    {
        asm1 = vplce[0];
        asm2 = vplce[1];
    }

    if (y2ve[0] > y2ve[1])
        tvlineasm1(vince[0],palookupoffse[0],y2ve[0]-y2-1,asm1,bufplce[0],ylookup[y2+1]+p);
    else if (y2ve[0] < y2ve[1])
        tvlineasm1(vince[1],palookupoffse[1],y2ve[1]-y2-1,asm2,bufplce[1],ylookup[y2+1]+p+1);

    faketimerhandler();
}
#endif

//
// transmaskwallscan (internal)
//
static void transmaskwallscan(int32_t x1, int32_t x2, int32_t saturatevplc)
{
    int32_t x;

    setgotpic(globalpicnum);

    Bassert(globalshiftval>=0 || ((tilesiz[globalpicnum].x <= 0) || (tilesiz[globalpicnum].y <= 0)));
    // globalshiftval<0 implies following condition
    if ((tilesiz[globalpicnum].x <= 0) || (tilesiz[globalpicnum].y <= 0))
        return;

    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);

    setuptvlineasm(globalshiftval, saturatevplc);

    x = x1;
    while ((x <= x2) && (startumost[x+windowx1] > startdmost[x+windowx1]))
        x++;

#ifndef ENGINE_USING_A_C
    if (globalshiftval==0)
    {
        while (x <= x2) transmaskvline(x), x++;
    }
    else
#endif
    {
#ifdef MULTI_COLUMN_VLINE
        if ((x <= x2) && (x&1)) transmaskvline(x), x++;
        while (x < x2) transmaskvline2(x), x += 2;
#endif
        while (x <= x2) transmaskvline(x), x++;
    }

    faketimerhandler();
}


////////// NON-power-of-two replacements for mhline/thline, adapted from a.c //////////
#if defined(__GNUC__) && defined(__i386__) && !defined(NOASM)
// from pragmas.h
# define ourdivscale32(d,b) \
        ({ int32_t __d=(d), __b=(b), __r; \
           __asm__ __volatile__ ("xorl %%eax, %%eax; divl %%ebx" \
                : "=a" (__r), "=d" (__d) : "d" (__d), "b" (__b) : "cc"); \
         __r; })

#else
# define ourdivscale32(d,b) divscale32(d,b)
#endif

// cntup16>>16 iterations

static void nonpow2_mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, char *p)
{
    char ch;

    const char *const A_C_RESTRICT buf = (char *)bufplc;
    const char *const A_C_RESTRICT pal = (char *)asm3;

    const uint32_t xdiv = globalxspan > 1 ? (uint32_t)ourdivscale32(1, globalxspan) : UINT32_MAX;
    const uint32_t ydiv = globalyspan > 1 ? (uint32_t)ourdivscale32(1, globalyspan) : UINT32_MAX;
    const uint32_t yspan = globalyspan;
    const int32_t xinc = asm1, yinc = asm2;

    UNREFERENCED_PARAMETER(junk);

    for (cntup16>>=16; cntup16>0; cntup16--)
    {
        ch = buf[(divideu32(bx, xdiv))*yspan + divideu32(by, ydiv)];

        if (ch != 255) *p = pal[ch];
        bx += xinc;
        by += yinc;
        p++;
    }
}

// cntup16>>16 iterations
static void nonpow2_thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, char *p)
{
    char ch;

    const char *const A_C_RESTRICT buf = (char *)bufplc;
    const char *const A_C_RESTRICT pal = (char *)asm3;
    const char *const A_C_RESTRICT trans = getblendtab(globalblend);

    const uint32_t xdiv = globalxspan > 1 ? (uint32_t)ourdivscale32(1, globalxspan) : UINT32_MAX;
    const uint32_t ydiv = globalyspan > 1 ? (uint32_t)ourdivscale32(1, globalyspan) : UINT32_MAX;
    const uint32_t yspan = globalyspan;
    const int32_t xinc = asm1, yinc = asm2;

    UNREFERENCED_PARAMETER(junk);

    if (globalorientation&512)
    {
        for (cntup16>>=16; cntup16>0; cntup16--)
        {
            ch = buf[divideu32(bx, xdiv)*yspan + divideu32(by, ydiv)];
            if (ch != 255) *p = trans[(*p)|(pal[ch]<<8)];
            bx += xinc;
            by += yinc;
            p++;
        }
    }
    else
    {
        for (cntup16>>=16; cntup16>0; cntup16--)
        {
            ch = buf[divideu32(bx, xdiv)*yspan + divideu32(by, ydiv)];
            if (ch != 255) *p = trans[((*p)<<8)|pal[ch]];
            bx += xinc;
            by += yinc;
            p++;
        }
    }
}
////////// END non-power-of-two replacements //////////

//
// ceilspritehline (internal)
//
static void ceilspritehline(int32_t x2, int32_t y)
{
    int32_t x1, v, bx, by;

    //x = x1 + (x2-x1)t + (y1-y2)u  ~  x = 160v
    //y = y1 + (y2-y1)t + (x2-x1)u  ~  y = (scrx-160)v
    //z = z1 = z2                   ~  z = posz + (scry-horiz)v

    x1 = lastx[y]; if (x2 < x1) return;

    v = mulscale20(globalzd,horizlookup[y-globalhoriz+horizycent]);
    bx = (uint32_t)mulscale14(globalx2*x1+globalx1,v) + globalxpanning;
    by = (uint32_t)mulscale14(globaly2*x1+globaly1,v) + globalypanning;
    asm1 = mulscale14(globalx2,v);
    asm2 = mulscale14(globaly2,v);

    asm3 = FP_OFF(palookup[globalpal]) + getpalookupsh(mulscale28(klabs(v),globvis));

    if (globalispow2)
    {
        if ((globalorientation&2) == 0)
            mhline(globalbufplc,bx,(x2-x1)<<16,0L,by,ylookup[y]+x1+frameoffset);
        else
            thline(globalbufplc,bx,(x2-x1)<<16,0L,by,ylookup[y]+x1+frameoffset);
    }
    else
    {
        if ((globalorientation&2) == 0)
            nonpow2_mhline(globalbufplc,bx,(x2-x1)<<16,0L,by,(char *)(ylookup[y]+x1+frameoffset));
        else
            nonpow2_thline(globalbufplc,bx,(x2-x1)<<16,0L,by,(char *)(ylookup[y]+x1+frameoffset));
    }
}


//
// ceilspritescan (internal)
//
static void ceilspritescan(int32_t x1, int32_t x2)
{
    int32_t x;

    int32_t y1 = uwall[x1];
    int32_t y2 = y1;

    for (x=x1; x<=x2; x++)
    {
        const int32_t twall = uwall[x]-1;
        const int32_t bwall = dwall[x];

        if (twall < bwall-1)
        {
            if (twall >= y2)
            {
                while (y1 < y2-1) ceilspritehline(x-1,++y1);
                y1 = twall;
            }
            else
            {
                while (y1 < twall) ceilspritehline(x-1,++y1);
                while (y1 > twall) lastx[y1--] = x;
            }
            while (y2 > bwall) ceilspritehline(x-1,--y2);
            while (y2 < bwall) lastx[y2++] = x;
        }
        else
        {
            while (y1 < y2-1) ceilspritehline(x-1,++y1);
            if (x == x2) break;
            y1 = uwall[x+1]; y2 = y1;
        }
    }
    while (y1 < y2-1) ceilspritehline(x2,++y1);
    faketimerhandler();
}

////////// translucent slope vline, based on a-c.c's slopevlin //////////
static int32_t gglogx, gglogy, ggpinc;
static char *ggbuf, *ggpal;

#ifdef ENGINE_USING_A_C
extern int32_t gpinc;
#endif

static inline void setupslopevlin_alsotrans(int32_t logylogx, intptr_t bufplc, int32_t pinc)
{
#ifdef ENGINE_USING_A_C
    sethlinesizes(logylogx&255, logylogx>>8, bufplc);
    gpinc = pinc;
#else
    setupslopevlin(logylogx, bufplc, pinc);
#endif
    gglogx = (logylogx&255); gglogy = (logylogx>>8);
    ggbuf = (char *)bufplc; ggpinc = pinc;
    ggpal = palookup[globalpal] + getpalookupsh(0);
}

// cnt iterations
static void tslopevlin(uint8_t *p, int32_t i, const intptr_t *slopalptr, int32_t cnt, int32_t bx, int32_t by)
{
    const char *const A_C_RESTRICT buf = ggbuf;
    const char *const A_C_RESTRICT pal = ggpal;
    const char *const A_C_RESTRICT trans = getblendtab(0);
    const int32_t bzinc = (asm1>>3), pinc = ggpinc;

    const int32_t transmode = (globalorientation&128);
    const uint32_t xtou = globalx3, ytov = globaly3;
    const int32_t logx = gglogx, logy = gglogy;

    int32_t bz = asm3;

    do
    {
        uint8_t ch;
        uint32_t u, v;

        i = (sloptable[(bz>>6)+8192]); bz += bzinc;
        u = bx + xtou*i;
        v = by + ytov*i;
        ch = *(uint8_t *)(slopalptr[0] + buf[((u>>(32-logx))<<logy)+(v>>(32-logy))]);

        if (ch != 255)
            *p = trans[transmode ? *p|(pal[ch]<<8) : (*p<<8)|pal[ch]];

        slopalptr--;
        p += pinc;
    }
    while (--cnt);
}

//
// grouscan (internal)
//
#define BITSOFPRECISION 3  //Don't forget to change this in A.ASM also!
static void grouscan(int32_t dax1, int32_t dax2, int32_t sectnum, char dastat)
{
    int32_t i, l, x, y, dx, dy, wx, wy, y1, y2, daz;
    int32_t daslope, dasqr;
    int32_t shoffs, shinc, m1, m2;
    intptr_t *mptr1, *mptr2, j;

    // Er, yes, they're not global anymore:
    int32_t globalx, globaly, globalz, globalzx;

    const sectortype *const sec = &sector[sectnum];
    const walltype *wal;

    if (dastat == 0)
    {
        if (globalposz <= getceilzofslope(sectnum,globalposx,globalposy))
            return;  //Back-face culling
        globalorientation = sec->ceilingstat;
        globalpicnum = sec->ceilingpicnum;
        globalshade = sec->ceilingshade;
        globalpal = sec->ceilingpal;
        daslope = sec->ceilingheinum;
        daz = sec->ceilingz;
    }
    else
    {
        if (globalposz >= getflorzofslope(sectnum,globalposx,globalposy))
            return;  //Back-face culling
        globalorientation = sec->floorstat;
        globalpicnum = sec->floorpicnum;
        globalshade = sec->floorshade;
        globalpal = sec->floorpal;
        daslope = sec->floorheinum;
        daz = sec->floorz;
    }

    DO_TILE_ANIM(globalpicnum, sectnum);
    setgotpic(globalpicnum);
    if ((tilesiz[globalpicnum].x <= 0) || (tilesiz[globalpicnum].y <= 0)) return;
    if (waloff[globalpicnum] == 0) loadtile(globalpicnum);

    wal = &wall[sec->wallptr];
    wx = wall[wal->point2].x - wal->x;
    wy = wall[wal->point2].y - wal->y;
    dasqr = krecipasm(nsqrtasm(uhypsq(wx,wy)));
    i = mulscale21(daslope,dasqr);
    wx *= i; wy *= i;

    globalx = -mulscale19(singlobalang,xdimenrecip);
    globaly = mulscale19(cosglobalang,xdimenrecip);
    globalx1 = (globalposx<<8);
    globaly1 = -(globalposy<<8);
    i = (dax1-halfxdimen)*xdimenrecip;
    globalx2 = mulscale16(cosglobalang<<4,viewingrangerecip) - mulscale27(singlobalang,i);
    globaly2 = mulscale16(singlobalang<<4,viewingrangerecip) + mulscale27(cosglobalang,i);
    globalzd = (xdimscale<<9);
    globalzx = -dmulscale17(wx,globaly2,-wy,globalx2) + mulscale10(1-globalhoriz,globalzd);
    globalz = -dmulscale25(wx,globaly,-wy,globalx);

    if (globalorientation&64)  //Relative alignment
    {
        dx = mulscale14(wall[wal->point2].x-wal->x,dasqr);
        dy = mulscale14(wall[wal->point2].y-wal->y,dasqr);

        i = nsqrtasm(daslope*daslope+16777216);

        x = globalx; y = globaly;
        globalx = dmulscale16(x,dx,y,dy);
        globaly = mulscale12(dmulscale16(-y,dx,x,dy),i);

        x = ((wal->x-globalposx)<<8); y = ((wal->y-globalposy)<<8);
        globalx1 = dmulscale16(-x,dx,-y,dy);
        globaly1 = mulscale12(dmulscale16(-y,dx,x,dy),i);

        x = globalx2; y = globaly2;
        globalx2 = dmulscale16(x,dx,y,dy);
        globaly2 = mulscale12(dmulscale16(-y,dx,x,dy),i);
    }
    if (globalorientation&0x4)
    {
        i = globalx; globalx = -globaly; globaly = -i;
        i = globalx1; globalx1 = globaly1; globaly1 = i;
        i = globalx2; globalx2 = -globaly2; globaly2 = -i;
    }
    if (globalorientation&0x10) { globalx1 = -globalx1, globalx2 = -globalx2, globalx = -globalx; }
    if (globalorientation&0x20) { globaly1 = -globaly1, globaly2 = -globaly2, globaly = -globaly; }

    daz = dmulscale9(wx,globalposy-wal->y,-wy,globalposx-wal->x) + ((daz-globalposz)<<8);
    globalx2 = mulscale20(globalx2,daz); globalx = mulscale28(globalx,daz);
    globaly2 = mulscale20(globaly2,-daz); globaly = mulscale28(globaly,-daz);

    i = 8-(picsiz[globalpicnum]&15); j = 8-(picsiz[globalpicnum]>>4);
    if (globalorientation&8) { i++; j++; }
    globalx1 <<= (i+12); globalx2 <<= i; globalx <<= i;
    globaly1 <<= (j+12); globaly2 <<= j; globaly <<= j;

    if (dastat == 0)
    {
        globalx1 += (uint32_t)sec->ceilingxpanning<<24;
        globaly1 += (uint32_t)sec->ceilingypanning<<24;
    }
    else
    {
        globalx1 += (uint32_t)sec->floorxpanning<<24;
        globaly1 += (uint32_t)sec->floorypanning<<24;
    }

    asm1 = -(globalzd>>(16-BITSOFPRECISION));

    {
        int32_t vis = globalvisibility;
        int64_t lvis;

        if (sec->visibility != 0) vis = mulscale4(vis, (uint8_t)(sec->visibility+16));
        lvis = ((uint64_t)vis*daz) >> 13;  // NOTE: lvis can be negative now!
        lvis = (lvis * xdimscale) >> 16;
        globvis = lvis;
    }

    j = FP_OFF(palookup[globalpal]);

    setupslopevlin_alsotrans((picsiz[globalpicnum]&15) + ((picsiz[globalpicnum]>>4)<<8),
                             waloff[globalpicnum],-ylookup[1]);

    l = (globalzd>>16);

    shinc = mulscale16(globalz,xdimenscale);
    if (shinc > 0) shoffs = (4<<15); else shoffs = ((16380-ydimen)<<15);    // JBF: was 2044
    if (dastat == 0) y1 = umost[dax1]; else y1 = max(umost[dax1],dplc[dax1]);
    m1 = mulscale16(y1,globalzd) + (globalzx>>6);
    //Avoid visibility overflow by crossing horizon
    if (globalzd > 0) m1 += (globalzd>>16); else m1 -= (globalzd>>16);
    m2 = m1+l;
    mptr1 = (intptr_t *)&slopalookup[y1+(shoffs>>15)]; mptr2 = mptr1+1;

    for (x=dax1; x<=dax2; x++)
    {
        if (dastat == 0) { y1 = umost[x]; y2 = min(dmost[x],uplc[x])-1; }
        else { y1 = max(umost[x],dplc[x]); y2 = dmost[x]-1; }
        if (y1 <= y2)
        {
            intptr_t *nptr1 = &slopalookup[y1+(shoffs>>15)];
            intptr_t *nptr2 = &slopalookup[y2+(shoffs>>15)];

            while (nptr1 <= mptr1)
            {
                *mptr1-- = j + getpalookupsh(mulscale24(krecipasm(m1),globvis));
                m1 -= l;
            }
            while (nptr2 >= mptr2)
            {
                *mptr2++ = j + getpalookupsh(mulscale24(krecipasm(m2),globvis));
                m2 += l;
            }

            globalx3 = (globalx2>>10);
            globaly3 = (globaly2>>10);
            asm3 = mulscale16(y2,globalzd) + (globalzx>>6);
            if ((globalorientation&256)==0)
                slopevlin(ylookup[y2]+x+frameoffset,krecipasm(asm3>>3),(intptr_t)nptr2,y2-y1+1,globalx1,globaly1);
            else
                tslopevlin((uint8_t *)(ylookup[y2]+x+frameoffset),0,nptr2,y2-y1+1,globalx1,globaly1);

            if ((x&15) == 0) faketimerhandler();
        }
        globalx2 += globalx;
        globaly2 += globaly;
        globalzx += globalz;
        shoffs += shinc;
    }
}


//
// parascan (internal)
//
static void parascan(int32_t dax1, int32_t dax2, int32_t sectnum, char dastat, int32_t bunch)
{
    sectortype *sec;
    int32_t j, k, l, m, n, x, z, wallnum, nextsectnum, globalhorizbak;
    int16_t *topptr, *botptr;

    int32_t dapyscale, dapskybits;
    const int8_t *dapskyoff;

    int32_t logtilesizy, tsizy;

    UNREFERENCED_PARAMETER(dax1);
    UNREFERENCED_PARAMETER(dax2);

    sectnum = thesector[bunchfirst[bunch]]; sec = &sector[sectnum];

    globalhorizbak = globalhoriz;
    globvis = globalpisibility;
    //globalorientation = 0L;
    if (sec->visibility != 0)
        globvis = mulscale4(globvis, (uint8_t)(sec->visibility+16));

    if (dastat == 0)
    {
        globalpal = sec->ceilingpal;
        globalpicnum = sec->ceilingpicnum;
        globalshade = (int32_t)sec->ceilingshade;
        globalxpanning = (int32_t)sec->ceilingxpanning;
        globalypanning = (int32_t)sec->ceilingypanning;
        topptr = umost;
        botptr = uplc;
    }
    else
    {
        globalpal = sec->floorpal;
        globalpicnum = sec->floorpicnum;
        globalshade = (int32_t)sec->floorshade;
        globalxpanning = (int32_t)sec->floorxpanning;
        globalypanning = (int32_t)sec->floorypanning;
        topptr = dplc;
        botptr = dmost;
    }

    if ((unsigned)globalpicnum >= MAXTILES) globalpicnum = 0;
    DO_TILE_ANIM(globalpicnum, sectnum);

    logtilesizy = (picsiz[globalpicnum]>>4);
    tsizy = tilesiz[globalpicnum].y;

    if (tsizy==0)
        return;

    dapskyoff = getpsky(globalpicnum, &dapyscale, &dapskybits);

    globalshiftval = logtilesizy;

    // before proper non-power-of-two tilesizy drawing
    if (oldnonpow2() && pow2long[logtilesizy] != tsizy)
        globalshiftval++;
#ifdef CLASSIC_NONPOW2_YSIZE_WALLS
    // non power-of-two y size textures!
    if ((!oldnonpow2() && pow2long[logtilesizy] != tsizy) || tsizy >= 512)
    {
        globaltilesizy = tsizy;
        globalyscale = 65536 / tsizy;
        globalshiftval = 0;
        globalzd