/*
   Copyright (C) 2004-2005 Paul Mackerras <paulus@samba.org>

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.

   The GNU General Public License is contained in the file COPYING.
*/

#include "core.h"
#include <string.h>
#include <stddef.h>

#ifndef USE_MACHINE_RESERVATION
Addr VG_(reservation);
#endif

#define dis	VG_(print_codegen)

#define TSTOFF(reg)	(offsetof(ThreadState, arch.reg))

/* Interesting registers */
/* These values must match the usage in vg_ppc_dispatch.S */
#define DISPATCH_CTR	17	/* r17 contains VG_(dispatch_ctr) */
#define CR		18	/* r18 contains synthetic CR */
#define XER		19	/* r19 contains synthetic XER */
#define NIP		30	/* r30 contains synthetic NIP (PC) */
#define TST		31	/* r31 points to the thread state */

/* Temporary registers are from 20 - 29 */
#define IS_TEMP(r)	(20 <= (r) && (r) < 30)

/* Shorthand for some instructions */
#define MOVEREG(dst, src)	XFORM31(src, dst, src, MO_OR, 0)

static unsigned int ldop[4] = { LBZ, LHZ, 0, LWZ };
static unsigned int stop[4] = { STB, STH, 0, STW };
static const char *ldopname[4] = { "lbz", "lhz", "unimp", "lwz" };
static const char *stopname[4] = { "stb", "sth", "unimp", "stw" };
static unsigned int lveop[4] = { MO_LVEBX, MO_LVEHX, 0, MO_LVEWX };

static unsigned int *emitted_code;
static int emitted_used;
static int emitted_size;

static int xer_valid, xer_dirty;
static int fpscr_dirty;
static int lr_live;

static int realreg_dirty[32];

static UShort jumps[VG_MAX_JUMPS];
static int njumps;

static int vec_loaded;
static int islocked;
static int argreg;

static int regloc[32];
static int instr_dest_reg;

static UInt live_before, live_after;

static UInt real_count;
static UInt n_ccalls;
static UInt n_cond_ccalls;

static void reset_state(void)
{
    int i;

    emitted_used = 0;
    emitted_size = 100;
    emitted_code = VG_(arena_malloc)
	(VG_AR_JITTER, emitted_size * sizeof(unsigned int));
    xer_valid = xer_dirty = 0;
    lr_live = 1;
    realreg_dirty[CR] = 0;
    realreg_dirty[XER] = 0;
    realreg_dirty[NIP] = 0;
    njumps = 0;
    vec_loaded = 0;
    islocked = 0;
    for (i = 0; i < 32; ++i)
	regloc[i] = 0;
    real_count = 0;
    argreg = 3;
}

static void expand_emitted(void)
{
    int i;
    unsigned int *ne;

    emitted_size *= 2;
    ne = VG_(arena_malloc)(VG_AR_JITTER, emitted_size * sizeof(unsigned int));
    for (i = 0; i < emitted_used; ++i)
	ne[i] = emitted_code[i];
    VG_(arena_free)(VG_AR_JITTER, emitted_code);
    emitted_code = ne;
}

static __inline__ void emit(unsigned int x)
{
    if (emitted_used >= emitted_size)
	expand_emitted();
    emitted_code[emitted_used++] = x;
}

int VG_(emit_instr)(unsigned int x)
{
    emit(x);
    return emitted_used - 1;
}

void VG_(patch_branch)(int b)
{
    emitted_code[b] += (emitted_used - b) << 2;
}

void VG_(print_ccall_stats)(void)
{
    VG_(message)(Vg_DebugMsg, "%u calls generated, %u of them conditional",
		 n_ccalls, n_cond_ccalls);
}

void VG_(print_UInstr_histogram)(void)
{
}

static void save_xer(void)
{
    vg_assert(xer_valid);
    emit(XFORM31(XER, SPRN_XER, 0, MO_MFSPR, 0));	/* save XER to r19 */
    if (dis)
	VG_(printf)("\tmfxer r%d\n", XER);
    realreg_dirty[XER] = 1;
    xer_dirty = 0;
}

void VG_(will_trash_xer)(void)
{
    if (xer_dirty)
	save_xer();
    xer_valid = 0;
}

static void store_fpscr(void)
{
    emit(XFORM63(0, 0, 0, MO_MFFS, 0));
    emit(DFORM(STFD, 0, TST, TSTOFF(m_fpr) + 256));
    if (dis)
	VG_(printf)("\tmffs f0\n\tstfd f0,%d(r%d)\n",
		    TSTOFF(m_fpr) + 256, TST);
    fpscr_dirty = 0;
}

static void load_vec(void)
{
    Addr address = (Addr) &VG_(load_vec_state);

    VG_(save_lr_if_live)();
    emit(DFORM(LWZ, 3, TST, TSTOFF(vr_live)));
    emit(DFORM(ADDIS, 0, 0, (address >> 16) & 0xffff));
    emit(DFORM(ORI, 0, 0, address & 0xffff));
    emit(DFORM(CMPI, 0, 3, 0));			/* cmpwi 3,0 */
    emit(XFORM31(0, SPRN_CTR, 0, MO_MTSPR, 0));	/* mtctr 0 */
    emit(MOVEREG(3, 31));			/* mr 3,31 */
    emit(XLFORM(12, 2, BCCTR, 1));		/* beqctrl */
    if (dis) {
	VG_(printf)("\tlwz r3,%d(r%d)\n", TSTOFF(vr_live), TST);
	VG_(printf)("\tlis r0,0x%x\n\tori r0,r0,0x%x\n",
		    (address >> 16) & 0xffff, address & 0xffff);
	VG_(printf)("\tcmpwi r3,0\n\tmr r3,r31\n\tmtctr r0\n");
	VG_(printf)("\tbeqctrl\n");
    }
    vec_loaded = 1;
}

/*
 * Call this before generating a call out to C code.
 */
void VG_(save_live_regs)(void)
{
    if (xer_dirty)
	save_xer();
    xer_valid = 0;
    VG_(save_lr_if_live)();
    if (realreg_dirty[CR]) {
	emit(DFORM(STW, CR, TST, TSTOFF(m_cr)));
	if (dis)
	    VG_(printf)("\tstw r%d,%d(r%d)\n", CR, TSTOFF(m_cr), TST);
	realreg_dirty[CR] = 0;
    }
    if (realreg_dirty[XER]) {
	emit(DFORM(STW, XER, TST, TSTOFF(m_xer)));
	if (dis)
	    VG_(printf)("\tstw r%d,%d(r%d)\n", XER, TSTOFF(m_xer), TST);
	realreg_dirty[XER] = 0;
    }
    /* Save NIP to the ThreadState; if a C routine detects an error
       we will want this for doing a backtrace. */
    if (realreg_dirty[NIP]) {
	emit(DFORM(STW, NIP, TST, TSTOFF(m_eip)));
	if (dis)
	    VG_(printf)("\tstw r%d,%d(r%d)\n", NIP, TSTOFF(m_eip), TST);
	realreg_dirty[NIP] = 0;
    }
    if (fpscr_dirty)
	store_fpscr();
}

void VG_(save_lr_if_live)(void)
{
    if (lr_live) {
	emit(XFORM31(0, SPRN_LR, 0, MO_MFSPR, 0));	/* mflr 0 */
	emit(DFORM(STW, 0, 1, 20));			/* stw 0,20(1) */
	if (dis)
	    VG_(printf)("\tmflr r0\n\tstw r0,20(r1)\n");
	lr_live = 0;
    }
}

static void reload_lr(void)
{
    vg_assert(!lr_live);
    emit(DFORM(LWZ, 0, 1, 20));				/* lwz 0,20(1) */
    emit(XFORM31(0, SPRN_LR, 0, MO_MTSPR, 0));		/* mtlr 0 */
    if (dis)
	VG_(printf)("\tlwz r0,20(r1)\n\tmtlr r0\n");
    lr_live = 1;
}

static int spill_or_arch_offset(Tag tag, unsigned int val)
{
    if (tag == SpillNo) {
	vg_assert(val >= 0 && val < VG_MAX_SPILLSLOTS);
	return TSTOFF(spillslots[val]);
    }
    if (tag == ArchReg) {
	if (val >= 0 && val < 32)
	    return TSTOFF(m_gpr[val]);
	switch (val) {
	case R_CR:	return -CR;		/* in a register */
	case R_LR:	return TSTOFF(m_lr);
	case R_CTR:	return TSTOFF(m_ctr);
	case R_XER:	return -XER;		/* in a register */
	}
    }
    VG_(core_panic)("spill_or_arch_offset");
}

static void set_r3_from_jmpkind(JmpKind kind, int chain)
{
    int val = 0;

    if (kind == JmpSyscall)
	val = VG_TRC_EBP_JMP_SYSCALL;
    else if (kind == JmpClientReq)
	val = VG_TRC_EBP_JMP_CLIENTREQ;
    else if (chain)
	val = -1;
    emit(DFORM(ADDI, 3, 0, val & 0xffff));
    if (dis)
	VG_(printf)("\tli r3,%d\n", val);
}

static void emit_addi(int dreg, int sreg, unsigned int val)
{
    unsigned hival;

    hival = (val + 0x8000) >> 16;
    if (hival != 0) {
	emit(DFORM(ADDIS, dreg, sreg, hival));
	if (dis) {
	    if (sreg)
		VG_(printf)("\taddis r%d,r%d,0x%x\n", dreg, sreg, hival);
	    else
		VG_(printf)("\tlis r%d,0x%x\n", dreg, hival);
	}
	sreg = dreg;
    }
    val &= 0xffff;
    if (val != 0 || hival == 0) {
	emit(DFORM(ADDI, dreg, sreg, val));
	if (dis) {
	    if (sreg)
		VG_(printf)("\taddi r%d,r%d,0x%x\n", dreg, sreg,
			    (val & 0x8000)? val - 0x10000: val);
	    else
		VG_(printf)("\tli r%d,%d\n", dreg,
			    (val & 0x8000)? val - 0x10000: val);
	}
    }
}

void VG_(emit_loadlit)(int reg, unsigned int val)
{
    emit_addi(reg, 0, val);
}

void VG_(emit_call)(Addr address, Bool iscond)
{
    emit(DFORM(ADDIS, 0, 0, (address >> 16) & 0xffff));
    emit(DFORM(ORI, 0, 0, address & 0xffff));
    emit(XFORM31(0, SPRN_CTR, 0, MO_MTSPR, 0));
    emit(XLFORM(20, 0, BCCTR, 1));
    if (dis)
	VG_(printf)("\tlis r0,0x%x\n\tori r0,r0,0x%x\n\tmtctr r0\n\tbctrl\n",
		    (address >> 16) & 0xffff, address & 0xffff);
    ++n_ccalls;
    n_cond_ccalls += iscond;
}

#define MASKG(mb, me)	((0xffffffffUL >> (mb)) - (0x7fffffffUL >> (me)) \
			 - ((mb > (me))))

static int is_mask(unsigned int val, int *mbp, int *mep)
{
    int mb = 0, me = 31, i;

    if (val == 0)
	return 0;
    if ((val & 0x80000001U) != 0x80000001U) {
	/* mb <= me */
	for (i = 0; i < 32; ++i)
	    if (val & (0x80000000U >> i))
		break;
	mb = i;
	while (++i < 32)
	    if ((val & (0x80000000U >> i)) == 0)
		break;
	me = i - 1;
    } else if (val != 0xffffffffU) {
	/* mb > me */
	for (i = 0; i < 32; ++i)
	    if ((val & (0x80000000U >> i)) == 0)
		break;
	me = i - 1;
	while (++i < 32)
	    if (val & (0x80000000U >> i))
		break;
	mb = i;
    }
    if (val != MASKG(mb, me))
	return 0;
    *mbp = mb;
    *mep = me;
    return 1;
}

int VG_(real_source_reg)(int sreg)
{
    if (IS_TEMP(sreg))
	vg_assert(IS_RREG_LIVE(VG_(realreg_to_rank)(sreg), live_before));
    if (regloc[sreg] > 0) {
	if (dis)
	    VG_(printf)("using r%d instead of r%d\n", regloc[sreg], sreg);
	return regloc[sreg];
    }
    return sreg;
}

/* If we are currently using dreg to store the value supposedly in
   some other register(s), move it into the first one that is still
   live, and set any other live ones to point to that one. */
int VG_(real_dest_reg)(int dreg)
{
    int r;
    int areg = 0;

    for (r = 1; r < 31; ++r) {
	if (regloc[r] == dreg) {
	    if (IS_TEMP(r)
		&& IS_RREG_LIVE(VG_(realreg_to_rank)(r), live_after)) {
		/* ok we need to preserve the value */
		regloc[r] = areg;
		if (areg == 0) {
		    emit(MOVEREG(r, dreg));
		    if (dis)
			VG_(printf)("\tmr r%d,r%d\n", r, dreg);
		    areg = r;
		}
	    } else {
		/* value is dead, forget where it was */
		regloc[r] = 0;
	    }
	}
    }
    instr_dest_reg = dreg;
    return dreg;
}

int VG_(real_src_and_dest)(int reg)
{
    VG_(real_dest_reg)(reg);
    if (regloc[reg] != 0) {
	emit(MOVEREG(reg, regloc[reg]));
	if (dis)
	    VG_(printf)("\tmr r%d,r%d\n", reg, regloc[reg]);
	regloc[reg] = 0;
    }
    return reg;
}

void VG_(alias_reg)(int dreg, int sreg)
{
    if (dis)
	VG_(printf)("regloc[%d] = %d\n", dreg, sreg);
    regloc[dreg] = sreg;
}

/*
 * Stuff for munging FP instructions so we don't have to
 * load up all the FP registers and save them afterwards.
 */
#define USE_FRA		4
#define USE_FRB		2
#define USE_FRC		1

unsigned char fpu_src_map[32] = {
    0,					/* mcrfs, fcmpu, fcmpo */
    0,					/* unimplemented */
    0,					/* unimplemented */
    0,					/* unimplemented */
    0,					/* unimplemented */
    0,					/* unimplemented */
    0,					/* mtfsfi, mtfsb0/1 */
    0,					/* mffs, mtfsf */
    USE_FRB,				/* fmr, fabs, fneg, fnabs */
    USE_FRB,				/* unimplemented */
    USE_FRB,				/* unimplemented */
    USE_FRB,				/* unimplemented */
    USE_FRB,				/* frsp */
    USE_FRB,				/* unimplemented */
    USE_FRB,				/* fctiw, fctid, fcfid */
    USE_FRB,				/* fctiwz, fctidz */
    USE_FRA | USE_FRB | USE_FRC,	/* unimplemented */
    USE_FRA | USE_FRB | USE_FRC,	/* unimplemented */
    USE_FRA | USE_FRB,			/* fdiv */
    USE_FRA | USE_FRB | USE_FRC,	/* unimplemented */
    USE_FRA | USE_FRB,			/* fadd */
    USE_FRA | USE_FRB,			/* fsub */
    USE_FRB,				/* fsqrt */
    USE_FRA | USE_FRB | USE_FRC,	/* fsel */
    USE_FRB,				/* fres */
    USE_FRA | USE_FRC,			/* fmul */
    USE_FRB,				/* fsqrte */
    USE_FRA | USE_FRB | USE_FRC,	/* unimplemented */
    USE_FRA | USE_FRB | USE_FRC,	/* fmsub */
    USE_FRA | USE_FRB | USE_FRC,	/* fmadd */
    USE_FRA | USE_FRB | USE_FRC,	/* fnmsub */
    USE_FRA | USE_FRB | USE_FRC		/* fnmadd */
};

static void emit_fpuop(UInt instr)
{
    unsigned char freg_assignments[32];
    int op, subop;
    int rt, frfields;
    int fr, rr, nextrr;
    int fld;

    op = (instr >> 1) & 0x1f;		/* A-form minor opcode */
    subop = (instr >> 1) & 0x3ff;	/* X-form minor opcode */

    /* load up FPSCR if the instruction uses or sets it */
    if (op != 8 && op != 23 && !fpscr_dirty) {
	emit(DFORM(LFD, 0, TST, TSTOFF(m_fpr) + 256));
	emit(XFORM63(0x0f, 0x1e, 0, MO_MTFSF, 0));
	if (dis)
	    VG_(printf)("\tlfd f0,%d(r%d)\n\tmtfsf 0xff,f0\n",
			TSTOFF(m_fpr) + 256, TST);
	fpscr_dirty = 1;
    }

    rt = -1;
    if (op >= 8 || subop == MO_MFFS) {
	/* instruction writes to FP reg rt, put result in f1 for now */
	rt = (instr >> 21) & 0x1f;
	instr = (instr & ~(0x1f << 21)) | (1 << 21);
    }

    /* Get map of FP reg sources used by instruction */
    frfields = fpu_src_map[op];
    switch (subop) {
    case MO_FCMPU:
    case MO_FCMPO:
	frfields = USE_FRA | USE_FRB;
	break;
    case MO_MTFSF:
	frfields = USE_FRB;
	break;
    }

    /* Reassign any FP reg sources used by the instruction. */
    /* We always reassign f0 to f0, and the others starting at f2. */
    nextrr = 2;
    memset(freg_assignments, 0xff, sizeof(freg_assignments));
    for (fld = 16; fld >= 6; fld -= 5, frfields <<= 1) {
	if ((frfields & 4) == 0)
	    continue;
	fr = (instr >> fld) & 0x1f;
	if ((rr = freg_assignments[fr]) == 0xff) {
	    rr = (fr == 0)? 0: nextrr++;
	    freg_assignments[fr] = rr;
	    emit(DFORM(LFD, rr, TST, TSTOFF(m_fpr) + 8 * fr));
	    if (dis)
		VG_(printf)("\tlfd f%d,%d(r%d)\n",
			    rr, TSTOFF(m_fpr) + 8 * fr, TST);
	}
	instr = (instr & ~(0x1f << fld)) | (rr << fld);
    }

    emit(instr);
    if (dis)
	VG_(printf)("\t.long 0x%x\t/* some fpu op */\n", instr);

    if (rt >= 0) {
	emit(DFORM(STFD, 1, TST, TSTOFF(m_fpr) + 8 * rt));
	if (dis)
	    VG_(printf)("\tstfd f1,%d(r%d)\n", TSTOFF(m_fpr) + 8 * rt, TST);
    }
}

static void emit_uinstr(UCodeBlock *cb, int i)
{
    UInstr *u = &cb->instrs[i];
    int op, reg, b;
    int mb, me, off;
    int sreg, treg, dreg;
    int chain;
    int ov = 0;

    if (dis)
	VG_(pp_UInstr_regs)(i, u);

    if ((u->flags_r | u->flags_w) != FlagsEmpty) {
	if (!xer_valid) {
	    /* set the real XER */
	    emit(XFORM31(XER, SPRN_XER, 0, MO_MTSPR, 0));
	    if (dis)
		VG_(printf)("\tmtxer r%d\n", XER);
	    xer_valid = 1;
	}
	if (u->flags_w != FlagsEmpty)
	    xer_dirty = 1;
	if (u->flags_w & FlagOV)
	    ov = 0x200;
    }

    switch (u->opcode) {
    case NOP:
	break;

    case GET:
	vg_assert(u->tag1 == ArchReg || u->tag1 == SpillNo);
	vg_assert(u->tag2 == RealReg);
	if (u->tag1 == ArchReg && u->val1 == R_XER && xer_valid) {
	    emit(XFORM31(u->val2, SPRN_XER, 0, MO_MFSPR, 0));
	    if (dis)
		VG_(printf)("\tmfxer r%d\n", u->val2);
	    break;
	}
	if (u->tag1 == ArchReg && u->val1 == R_CR) {
	    VG_(alias_reg)(u->val2, CR);
	    break;
	}
	dreg = VG_(real_dest_reg)(u->val2);
	off = spill_or_arch_offset(u->tag1, u->val1);
	if (off >= 0) {
	    emit(DFORM(LWZ, dreg, TST, off));
	    if (dis)
		VG_(printf)("\tlwz r%d,%d(r%d)\n", dreg, off, TST);
	} else {
	    reg = -off;
	    emit(MOVEREG(dreg, reg));
	    if (dis)
		VG_(printf)("\tmr r%d,r%d\n", dreg, reg);
	}
	break;

    case PUT:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == ArchReg || u->tag2 == SpillNo);
	sreg = VG_(real_source_reg)(u->val1);
	if (u->tag2 == ArchReg && u->val2 == R_XER && xer_valid) {
	    emit(XFORM31(sreg, SPRN_XER, 0, MO_MTSPR, 0));
	    if (dis)
		VG_(printf)("\tmtxer r%d\n", sreg);
	    xer_dirty = 1;
	    break;
	}
	off = spill_or_arch_offset(u->tag2, u->val2);
	if (off >= 0) {
	    emit(DFORM(STW, sreg, TST, off));
	    if (dis)
		VG_(printf)("\tstw r%d,%d(r%d)\n", sreg, off, TST);
	} else {
	    reg = -off;
	    emit(MOVEREG(reg, sreg));
	    if (dis)
		VG_(printf)("\tmr r%d,r%d\n", reg, sreg);
	    realreg_dirty[reg] = 1;
	}
	break;

    case MOV:
	vg_assert(u->tag2 == RealReg);
	switch (u->tag1) {
	case RealReg:
	    sreg = VG_(real_source_reg)(u->val1);
	    if (sreg == u->val2)
		break;
	    VG_(alias_reg)(u->val2, sreg);
	    break;
	case Literal:
	    dreg = VG_(real_dest_reg)(u->val2);
	    emit_addi(dreg, 0, u->lit32);
	    break;
	default:
	    VG_(core_panic)("emit_uinstr: unimplemented mov");
	}
	break;

    case INCEIP:
	vg_assert(u->tag1 == Lit16);
	emit(DFORM(ADDI, NIP, NIP, u->val1));
	if (dis)
	    VG_(printf)("\taddi r%d,r%d,%d\n", NIP, NIP, u->val1);
	realreg_dirty[NIP] = 1;
	++real_count;
	break;

    case LOAD:
	vg_assert(u->tag1 == RealReg && u->tag2 == RealReg);
	vg_assert(1 <= u->size && u->size <= 4);
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_dest_reg)(u->val2);
	if (islocked) {
#ifdef USE_MACHINE_RESERVATION
	    vg_assert(u->size == 4);
	    emit(XFORM31(dreg, 0, sreg, MO_LWARX, 0));
	    if (dis)
		VG_(printf)("\tlwarx r%d,0,r%d\n", dreg, sreg);
#else
	    UInt addr = (UInt) &VG_(reservation);

	    vg_assert(u->size == 4);
	    emit(DFORM(ADDIS, 9, 0, (addr + 0x8000) >> 16));
	    emit(DFORM(STW, sreg, 9, addr & 0xffff));
	    emit(DFORM(LWZ, dreg, sreg, 0));
	    if (dis)
		VG_(printf)("\tlis r9,0x%x\n\tstw r%d,0x%x(r9)\n"
			    "\tlwz r%d,0(r%d)\n",
			    (addr + 0x8000) >> 16, sreg,
			    addr - ((addr + 0x8000) & ~0xffff),
			    dreg, sreg);
#endif
	    islocked = 0;
	} else if (u->size == 3) {
	    /* the nasty one, we only get this with lswi */
	    emit(DFORM(LBZ, 0, sreg, 0));
	    emit(DFORM(LHZ, dreg, sreg, 1));
	    emit(MFORM(RLWIMI, 0, dreg, 16, 8, 15, 0));
	    if (dis)
		VG_(printf)("\tlbz r0,0(r%d)\n"
			    "\tlhz r%d,1(r%d)\n"
			    "\trlwimi r%d,r0,16,8,15\n",
			    sreg, dreg, sreg, dreg);
	} else {
	    emit(DFORM(ldop[u->size - 1], dreg, sreg, 0));
	    if (dis)
		VG_(printf)("\t%s r%d,0(r%d)\n", ldopname[u->size-1],
			    dreg, sreg);
	}
	break;

    case STORE:
	vg_assert(u->tag1 == RealReg && u->tag2 == RealReg);
	vg_assert(1 <= u->size && u->size <= 4);
	sreg = VG_(real_source_reg)(u->val1);
	treg = VG_(real_source_reg)(u->val2);
	if (islocked) {
#ifdef USE_MACHINE_RESERVATION
	    vg_assert(u->size == 4);
	    emit(XFORM31(sreg, 0, treg, MO_STWCX, 1));
	    if (dis)
		VG_(printf)("\tstwcx. r%d,0,r%d\n", sreg, treg);
#else
	    UInt addr = (UInt) &VG_(reservation);

	    vg_assert(u->size == 4);
	    emit(DFORM(ADDIS, 9, 0, (addr + 0x8000) >> 16));
	    emit(DFORM(LWZ, 0, 9, addr & 0xffff));
	    emit(DFORM(CMPI, 0, 0, 0));
	    emit(XFORM19(2, 2, 2, CRNOR));
	    emit(BFORM(BC, 4, 2, 4, 0));
	    emit(DFORM(ADDI, 0, 0, 0));
	    emit(DFORM(STW, sreg, treg, 0));
	    emit(DFORM(STW, 0, 9, addr & 0xffff));
	    if (dis) {
		VG_(printf)("\tlis r9,0x%x\n\tlwz r0,0x%x(r9)\n",
			    (addr + 0x8000) >> 16,
			    addr - ((addr + 0x8000) & ~0xffff));
		VG_(printf)("\tcmpwi r0,0\n\tcrnot eq,eq\n\tbne 1f\n"
			    "\tli r0,0\n\tstw r%d,0(r%d)\n", sreg, treg);
		VG_(printf)("\tstw r0,0x%x(r9)\n",
			    addr - ((addr + 0x8000) & ~0xffff));
	    }
#endif
	    islocked = 0;
	} else if (u->size == 3) {
	    /* the nasty one, we only get this with stswi */
	    emit(MFORM(RLWINM, sreg, 0, 16, 16, 31, 0));
	    emit(DFORM(STB, 0, treg, 0));
	    emit(DFORM(STH, sreg, treg, 1));
	    if (dis)
		VG_(printf)("\tsrwi r0,r%d,16\n"
			    "\tstb r0,0(r%d)\n"
			    "\tsth r%d,1(r%d)\n",
			    sreg, treg, sreg, treg);
	} else {
	    emit(DFORM(stop[u->size - 1], sreg, treg, 0));
	    if (dis)
		VG_(printf)("\t%s r%d,0(r%d)\n", stopname[u->size-1],
			    sreg, treg);
	}
	break;

    case LOCK:
	islocked = 1;
	break;

#if 0
    case LOAD_R:
	vg_assert(u->tag1 == RealReg && u->tag2 == RealReg);
	vg_assert(u->size == 4);
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_dest_reg)(u->val2);
	emit(XFORM31(dreg, 0, sreg, MO_LWARX, 0));
	if (dis)
	    VG_(printf)("\tlwarx r%d,0,r%d\n", dreg, sreg);
	break;

    case STORE_C:
	vg_assert(u->tag1 == RealReg && u->tag2 == RealReg);
	vg_assert(u->tag3 == RealReg);
	vg_assert(u->size == 4);
	sreg = VG_(real_source_reg)(u->val1);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val3);
	emit(XFORM31(sreg, 0, treg, MO_STWCX, 1));
	emit(XFORM31(u->val3, 0, 0, MO_MFCR, 0));
	if (dis)
	    VG_(printf)("\tstwcx. r%d,0,r%d\n\tmfcr r%d\n",
			sreg, treg, u->val3);
	break;
#endif

    case ADD:
	vg_assert(u->tag2 == RealReg);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	switch (u->tag1) {
	case RealReg:
	    sreg = VG_(real_source_reg)(u->val1);
	    emit(XFORM31(dreg, treg, sreg, ov + MO_ADD, 0));
	    if (dis)
		VG_(printf)("\tadd%s r%d,r%d,r%d\n", (ov? "o": ""),
			    dreg, treg, sreg);
	    break;
	case Literal:
	    emit_addi(dreg, treg, u->lit32);
	    break;
	default:
	    VG_(core_panic)("emit_uinstr: add unimp");
	}
	break;

    case ADC:
	vg_assert(u->tag2 == RealReg);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	switch (u->tag1) {
	case RealReg:
	    sreg = VG_(real_source_reg)(u->val1);
	    op = (u->flags_r == FlagsEmpty)? MO_ADDC: MO_ADDE;
	    emit(XFORM31(dreg, treg, sreg, ov + op, 0));
	    if (dis)
		VG_(printf)("\tadd%c%s r%d,r%d,r%d\n",
			    (u->flags_r == FlagsEmpty)? 'c': 'e',
			    (ov? "o": ""), dreg, treg, sreg);
	    break;
	case Literal:
	    if (u->flags_r == FlagsEmpty) {
		vg_assert(u->lit32 + 0x8000 <= 0xffffU);
		emit(DFORM(ADDIC, dreg, treg, u->lit32 & 0xffff));
		if (dis)
		    VG_(printf)("\taddic r%d,r%d,%d\n", dreg, treg,
				(int)u->lit32);
	    } else {
		switch (u->lit32) {
		case 0:
		    emit(XFORM31(dreg, treg, 0, ov + MO_ADDZE, 0));
		    if (dis)
			VG_(printf)("\taddze%s r%d,r%d\n", (ov? "o": ""),
				    dreg, treg);
		    break;
		case -1:
		    emit(XFORM31(dreg, treg, 0, ov + MO_ADDME, 0));
		    if (dis)
			VG_(printf)("\taddme%s r%d,r%d\n", (ov? "o": ""),
				    dreg, treg);
		    break;
		default:
		    emit_addi(3, 0, u->lit32);
		    emit(XFORM31(dreg, treg, 3, ov + MO_ADDE, 0));
		    if (dis)
			VG_(printf)("\tadde%s r%d,r%d,r3\n", (ov? "o": ""),
				    dreg, treg);
		    break;
		}
	    }
	    break;
	default:
	    VG_(core_panic)("emit_uinstr: adc unimp");
	}
	break;

    case SUB:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	emit(XFORM31(dreg, sreg, treg, ov + MO_SUBF, 0));
	if (dis)
	    VG_(printf)("\tsubf%s r%d,r%d,r%d\n", (ov? "o": ""),
			dreg, sreg, treg);
	break;

    case SBB:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	op = (u->flags_r == FlagsEmpty)? MO_SUBFC: MO_SUBFE;
	emit(XFORM31(dreg, sreg, treg, ov + op, 0));
	if (dis)
	    VG_(printf)("\tsubf%c%s r%d,r%d,r%d\n",
			(u->flags_r == FlagsEmpty)? 'c': 'e',
			(ov? "o": ""), dreg, sreg, treg);
	break;

    case NEG:
	vg_assert(u->tag1 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_dest_reg)(u->val1);
	emit(XFORM31(dreg, sreg, 0, ov + MO_NEG, 0));
	if (dis)
	    VG_(printf)("\tneg%s r%d,r%d\n", (ov? "o": ""), dreg, sreg);
	break;

    case CNTLZ:
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_dest_reg)(u->val1);
	vg_assert(u->tag1 == RealReg);
	emit(XFORM31(sreg, dreg, 0, MO_CNTLZW, 0));
	if (dis)
	    VG_(printf)("\tcntlzw r%d,r%d\n", dreg, sreg);
	break;

    case RDTB:
	vg_assert(u->tag1 == Lit16);
	vg_assert(u->tag2 == RealReg);
	sreg = u->val1;
	dreg = VG_(real_dest_reg)(u->val2);
	emit(XFORM31(dreg, sreg & 0x1f, (sreg >> 5) & 0x1f, MO_MFTB, 0));
	if (dis)
	    VG_(printf)("\tmftb%s r%d\n", (sreg & 1)? "u": "", dreg);
	break;

    case AND:
	vg_assert(u->tag2 == RealReg);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	switch (u->tag1) {
	case RealReg:
	    sreg = VG_(real_source_reg)(u->val1);
	    emit(XFORM31(treg, dreg, sreg, MO_AND, 0));
	    if (dis)
		VG_(printf)("\tand r%d,r%d,r%d\n", dreg, treg, sreg);
	    break;
	case Literal:
	    if ((u->lit32 & 0xffff0000) == 0) {
		emit(DFORM(ANDIR, treg, dreg, u->lit32));
		if (dis)
		    VG_(printf)("\tandi. r%d,r%d,0x%x\n",
				dreg, treg, u->lit32);
	    } else if (is_mask(u->lit32, &mb, &me)) {
		emit(MFORM(RLWINM, treg, dreg, 0, mb, me, 0));
		if (dis)
		    VG_(printf)("\trlwinm r%d,r%d,0,%d,%d\n",
				dreg, treg, mb, me);
	    } else {
		emit_addi(4, 0, u->lit32);
		emit(XFORM31(treg, dreg, 4, MO_AND, 0));
		if (dis)
		    VG_(printf)("\tand r%d,r%d,r4\n", dreg, treg);
	    }
	    break;
	default:
	    VG_(core_panic)("emit_uinstr and unimp");
	}
	break;

    case OR:
    case XOR:
	vg_assert(u->tag2 == RealReg);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	switch (u->tag1) {
	case RealReg:
	    sreg = VG_(real_source_reg)(u->val1);
	    op = (u->opcode == OR)? MO_OR: MO_XOR;
	    emit(XFORM31(treg, dreg, sreg, op, 0));
	    if (dis)
		VG_(printf)("\t%sor r%d,r%d,r%d\n",
			    (u->opcode == OR)? "": "x", dreg, treg, sreg);
	    break;
	case Literal:
	    if ((u->lit32 & 0xffff0000U) != 0) {
		op = (u->opcode == OR)? ORIS: XORIS;
		emit(DFORM(op, treg, dreg, u->lit32 >> 16));
		if (dis)
		    VG_(printf)("\t%soris r%d,r%d,0x%x\n",
				(u->opcode == OR)? "": "x",
				dreg, treg, u->lit32 >> 16);
		treg = dreg;
	    }
	    if ((u->lit32 & 0xffff) != 0) {
		op = (u->opcode == OR)? ORI: XORI;
		emit(DFORM(op, treg, dreg, u->lit32 & 0xffff));
		if (dis)
		    VG_(printf)("\t%sori r%d,r%d,0x%x\n",
				(u->opcode == OR)? "": "x",
				dreg, treg, u->lit32 & 0xffff);
	    }
	    break;
	default:
	    VG_(core_panic)("emit_uinstr [x]or unimp");
	}
	break;

    case ROL:
	vg_assert(u->tag2 == RealReg);
	vg_assert(u->size == 4);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	switch (u->tag1) {
	case RealReg:
	    sreg = VG_(real_source_reg)(u->val1);
	    emit(MFORM(RLWNM, treg, dreg, sreg, 0, 31, 0));
	    if (dis)
		VG_(printf)("\trotlw r%d,r%d,r%d\n", dreg, treg, sreg);
	    break;
	case Lit16:
	    emit(MFORM(RLWINM, treg, dreg, u->val1 & 31, 0, 31, 0));
	    if (dis)
		VG_(printf)("\trotlwi r%d,r%d,%d\n", dreg, treg, u->val1 & 31);
	    break;
	case Literal:
	    emit(MFORM(RLWINM, treg, dreg, u->lit32 & 31, 0, 31, 0));
	    if (dis)
		VG_(printf)("\trotlwi r%d,r%d,%d\n", dreg, treg, u->lit32&31);
	    break;
	default:
	    VG_(core_panic)("emit uinstr: rol unimp");
	}
	break;

    case NOT:
	vg_assert(u->tag1 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_dest_reg)(u->val1);
	emit(XFORM31(sreg, dreg, sreg, MO_NOR, 0));
	if (dis)
	    VG_(printf)("\tnot r%d,r%d\n", dreg, sreg);
	break;

    case CMP:
    case CMPU:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == RealReg);
	vg_assert(u->tag3 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val3);
	op = (u->opcode == CMP)? MO_CMP: MO_CMPL;
	emit(XFORM31(0, sreg, treg, op, 0));
	emit(XFORM31(dreg, 0, 0, MO_MFCR, 0));
	if (dis)
	    VG_(printf)("\tcmp%sw r%d,r%d\n\tmfcr r%d\n",
			(u->opcode == CMP)? "": "l",
			sreg, treg, dreg);
	break;

    case MUL:
	vg_assert(u->tag2 == RealReg);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	switch (u->tag1) {
	case RealReg:
	    sreg = VG_(real_source_reg)(u->val1);
	    emit(XFORM31(dreg, treg, sreg, MO_MULLW, 0));
	    if (dis)
		VG_(printf)("\tmullw r%d,r%d,r%d\n", dreg, treg, sreg);
	    break;
	case Literal:
	    emit(DFORM(MULLI, dreg, treg, u->lit32 & 0xffff));
	    if (dis)
		VG_(printf)("\tmulli r%d,r%d,%d\n", dreg, treg, (int)u->lit32);
	    break;
	default:
	    VG_(core_panic)("emit_uinstr mul unimp");
	}
	break;

    case MULH:
    case UMULH:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	op = (u->opcode == MULH)? MO_MULHW: MO_MULHWU;
	emit(XFORM31(dreg, treg, sreg, op, 0));
	if (dis)
	    VG_(printf)("\tmulhw%s r%d,r%d,r%d\n",
			(u->opcode == MULH)? "": "u",
			dreg, treg, sreg);
	break;

    case DIV:
    case UDIV:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	op = (u->opcode == DIV)? MO_DIVW: MO_DIVWU;
	emit(XFORM31(dreg, treg, sreg, ov + op, 0));
	if (dis)
	    VG_(printf)("\tdivw%s r%d,r%d,r%d\n",
			(u->opcode == DIV)? "": "u",
			dreg, treg, sreg);
	break;

    case SHL:
    case SHR:
	vg_assert(u->tag2 == RealReg);
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	switch (u->tag1) {
	case RealReg:
	    sreg = VG_(real_source_reg)(u->val1);
	    op = (u->opcode == SHL)? MO_SLW: MO_SRW;
	    emit(XFORM31(treg, dreg, sreg, op, 0));
	    if (dis)
		VG_(printf)("\ts%cw r%d,r%d,r%d\n",
			    (u->opcode == SHL)? 'l': 'r',
			    dreg, treg, sreg);
	    break;
	case Lit16:
	    vg_assert(u->val1 < 32);
	    if (u->opcode == SHL) {
		emit(MFORM(RLWINM, treg, dreg,
			   u->val1, 0, 31 - u->val1, 0));
		if (dis)
		    VG_(printf)("\tslwi r%d,r%d,%d\n", dreg, treg, u->val1);
	    } else {
		emit(MFORM(RLWINM, treg, dreg,
			   32 - u->val1, u->val1, 31, 0));
		if (dis)
		    VG_(printf)("\tsrwi r%d,r%d,%d\n", dreg, treg, u->val1);
	    }
	    break;
	default:
	    VG_(core_panic)("emit_uinstr shl/r unimp");
	}
	break;

    case SAR:
	vg_assert(u->tag2 == RealReg);
	if ((u->flags_w & FlagCA) == 0)
	    VG_(will_trash_xer)();
	treg = VG_(real_source_reg)(u->val2);
	dreg = VG_(real_dest_reg)(u->val2);
	switch (u->tag1) {
	case RealReg:
	    sreg = VG_(real_source_reg)(u->val1);
	    emit(XFORM31(treg, dreg, sreg, MO_SRAW, 0));
	    if (dis)
		VG_(printf)("\tsraw r%d,r%d,%d\n", dreg, treg, sreg);
	    break;
	case Literal:
	    emit(XFORM31(treg, dreg, u->lit32 & 0x1f, MO_SRAWI, 0));
	    if (dis)
		VG_(printf)("\tsrawi r%d,r%d,%d\n", dreg, treg, u->lit32&0x1f);
	    break;
	case Lit16:
	    emit(XFORM31(treg, dreg, u->val1 & 0x1f, MO_SRAWI, 0));
	    if (dis)
		VG_(printf)("\tsrawi r%d,r%d,%d\n", dreg, treg, u->val1&0x1f);
	    break;
	default:
	    VG_(core_panic)("emit_uinstr: sar unimp");
	}
	break;

    case CMP0:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_dest_reg)(u->val2);
	emit(DFORM(CMPI, 0, sreg, 0));
	emit(XFORM31(dreg, 0, 0, MO_MFCR, 0));
	if (dis)
	    VG_(printf)("\tcmpwi r%d,0\n\tmfcr r%d\n", sreg, dreg);
	break;

    case XBIT:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == Literal);
	vg_assert(u->tag3 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_dest_reg)(u->val3);
	emit(MFORM(RLWINM, sreg, dreg, (u->lit32 + 1) & 31, 31, 31, 0));
	if (dis)
	    VG_(printf)("\trlwinm r%d,r%d,%d,31,31\n",
			dreg, sreg, (u->lit32 + 1) & 31);
	break;

    case ICRF:
	vg_assert(u->tag1 == RealReg
		  || (u->tag1 == ArchReg && u->val1 == R_CR));
	vg_assert(u->tag2 == Lit16);
	vg_assert(u->tag3 == RealReg
		  || (u->tag3 == ArchReg && u->val3 == R_CR));
	b = u->val2 * 4;
	if (u->tag1 == ArchReg)
	    reg = CR;
	else if (u->val1 == R_CR) {
	    emit(XFORM31(0, 0, 0, MO_MFCR, 0));
	    if (dis)
		VG_(printf)("\tmfcr r0\n");
	    reg = 0;
	} else
	    reg = VG_(real_source_reg)(u->val1);
	if (u->tag3 == ArchReg)
	    dreg = VG_(real_dest_reg)(CR);
	else
	    dreg = VG_(real_src_and_dest)(u->val3);
	emit(MFORM(RLWIMI, reg, dreg, (u->extra4b * 4 - b) & 31, b, b + 3, 0));
	if (dis)
	    VG_(printf)("\trlwimi r%d,r%d,%d,%d,%d\n",
			dreg, reg, (u->extra4b * 4 - b) & 31, b, b + 3);
	break;

    case IBIT:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == Literal);
	vg_assert(u->tag3 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_src_and_dest)(u->val3);
	b = u->lit32;
	emit(MFORM(RLWIMI, sreg, dreg, 31 - b, b, b, 0));
	if (dis)
	    VG_(printf)("\trlwimi r%d,r%d,%d,%d,%d\n",
			dreg, sreg, 31 - b, b, b);
	break;

    case SETZ:
	vg_assert(u->tag1 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_dest_reg)(u->val1);
	emit(XFORM31(sreg, dreg, 0, MO_CNTLZW, 0));
	emit(MFORM(RLWINM, dreg, dreg, 27, 31, 31, 0));
	if (dis)
	    VG_(printf)("\tcntlzw r%d,r%d\n\trlwinm r%d,r%d,27,31,31\n",
			dreg, sreg, dreg, dreg);
	break;

    case JMP:
	vg_assert(u->tag1 == RealReg || u->tag1 == Literal);
	if (fpscr_dirty)
	    store_fpscr();
	if (xer_dirty)
	    save_xer();
	if (!lr_live)
	    reload_lr();
	b = -1;
	chain = (u->jmpkind == JmpBoring || u->jmpkind == JmpCall)
	    && u->tag1 == Literal
	    && VG_(clo_chain_bb)
	    && njumps < VG_MAX_JUMPS;
	set_r3_from_jmpkind(u->jmpkind, chain);

	if (u->cond != CondAlways) {
	    op = (u->cond & CondIfSet)? 12: 4;
	    emit(MFORM(RLWINM, CR, 0, u->cond & 0x1f, 0, 0, 1));
	    b = emitted_used;
	    emit(BFORM(BC, op, 2, 0, 0));
	    if (dis)
		VG_(printf)("\trlwinm. r0,r%d,%d,0,0\n"
			    "\tb%s 1f\n",
			    CR, u->cond & 0x1f,
			    (u->cond & CondIfSet)? "eq": "ne");
	}

	switch (u->tag1) {
	case RealReg:
	    sreg = VG_(real_source_reg)(u->val1);
	    emit(MOVEREG(NIP, sreg));
	    if (dis)
		VG_(printf)("\tmr r%d,r%d\n", NIP, sreg);
	    break;
	case Literal:
	    emit_addi(NIP, 0, u->lit32);
	    break;
	default:
	    VG_(core_panic)("emit_uinstr(JMP, default)");
	}

	if (!chain) {
	    emit(XLFORM(20, 0, BCLR, 0));
	    if (dis)
		VG_(printf)("\tblr\n");
	} else {
	    emit(XLFORM(20, 0, BCLR, 1));
	    emit(0);
	    if (dis)
		VG_(printf)("\tblrl /* jumpsite */\n\t.long 0\n");
	    jumps[njumps++] = (emitted_used - 2) * sizeof(UInt);
	}
	if (b >= 0) {
	    VG_(patch_branch)(b);	/* patch up the branch */
	    if (dis)
		VG_(printf)("1:\n");
	}
	break;

    case JIFZ:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == Literal);
	sreg = VG_(real_source_reg)(u->val1);
	if (fpscr_dirty)
	    store_fpscr();
	if (xer_dirty)
	    save_xer();
	if (!lr_live)
	    reload_lr();
	chain = VG_(clo_chain_bb) && njumps < VG_MAX_JUMPS;
	emit(DFORM(CMPI, 0, sreg, 0));
	b = emitted_used;
	emit(BFORM(BC, 5, 2, 0, 0));
	if (dis)
	    VG_(printf)("\tcmpwi r%d,0\n\tbne+ 1f\n", sreg);
	emit_addi(NIP, 0, u->lit32);
	emit(DFORM(ADDI, 3, 0, (chain? 0xffff: 0)));
	if (chain)
	    jumps[njumps++] = emitted_used * sizeof(UInt);
	emit(XLFORM(20, 0, BCLR, chain));
	if (chain)
	    emit(0);
	/* patch up the branch */
	VG_(patch_branch)(b);
	if (dis) {
	    if (!chain)
		VG_(printf)("\tli r3,0\n\tblr\n");
	    else
		VG_(printf)("\tli r3,-1\n\tblrl	/* jumpsite */\n"
			    "\t.long 0\n\t1:\n");
	}
	break;

    case BSWAP:
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->size == 2 || u->size == 4);
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_dest_reg)(u->val1);
	treg = sreg;
	if (dreg == sreg) {
	    emit(MOVEREG(0, sreg));
	    if (dis)
		VG_(printf)("mr r0,r%d\n", sreg);
	    sreg = 0;
	}
	if (u->size == 2) {
	    emit(MFORM(RLWINM, treg, dreg, 8, 16, 23, 0));
	    emit(MFORM(RLWIMI, sreg, dreg, 24, 24, 31, 0));
	    if (dis)
		VG_(printf)("\trlwinm r%d,r%d,8,16,23\n"
			    "\trlwimi r%d,r%d,24,24,31\n",
			    dreg, treg, dreg, sreg);
	} else {	/* u->size == 4 */
	    emit(MFORM(RLWINM, treg, dreg, 8, 0, 31, 0));
	    emit(MFORM(RLWIMI, sreg, dreg, 24, 0, 7, 0));
	    emit(MFORM(RLWIMI, sreg, dreg, 24, 16, 23, 0));
	    if (dis)
		VG_(printf)("\trotlwi r%d,r%d,8\n"
			    "\trlwimi r%d,r%d,24,0,7\n"
			    "\trlwimi r%d,r%d,24,16,23\n",
			    dreg, treg, dreg, sreg, dreg, sreg);
	}
	break;

    case WIDEN:
	vg_assert(u->tag1 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	dreg = VG_(real_dest_reg)(u->val1);
	if (u->signed_widen) {
	    op = (u->extra4b > 1)? MO_EXTSH: MO_EXTSB;
	    emit(XFORM31(sreg, dreg, 0, op, 0));
	    if (dis)
		VG_(printf)("\texts%c r%d,r%d\n",
			    (u->extra4b > 1)? 'h': 'b',
			    dreg, sreg);
	} else {
	    emit(MFORM(RLWINM, sreg, dreg, 0,
		       (u->extra4b > 1)? 16: 24, 31, 0));
	    if (dis)
		VG_(printf)("\trlwinm r%d,r%d,0,%d,31\n", dreg, sreg,
			    (u->extra4b > 1)? 16: 24);
	}
	break;

    case FPU_R:
    case FPU_W:
	/* Note that FP loads and stores don't use or alter FPSCR */
	vg_assert(u->tag1 == Lit16);
	vg_assert(u->tag2 == RealReg);
	vg_assert(u->size == 4 || u->size == 8);
	treg = VG_(real_source_reg)(u->val2);
	op = (u->opcode == FPU_R)? LFS: STFS;
	if (u->size == 8)
	    op += (LFD - LFS);
	off = TSTOFF(m_fpr) + 8 * u->val1;
	if (u->opcode == FPU_W) {
	    emit(DFORM(LFD, 0, TST, off));
	    if (dis)
		VG_(printf)("\tlfd f0,%d(r%d)\n", off, TST);
	}
	emit(DFORM(op, 0, treg, 0));
	if (dis)
	    VG_(printf)("\t%sf%c f0,0(r%d)\n",
			(u->opcode == FPU_R)? "l": "st",
			(u->size == 8)? 'd': 's', treg);
	if (u->opcode == FPU_R) {
	    emit(DFORM(STFD, 0, TST, off));
	    if (dis)
		VG_(printf)("\tstfd f0,%d(r%d)\n", off, TST);
	}
	break;

    case FPU:
	vg_assert(u->tag1 == Literal);
	emit_fpuop(u->lit32);
	break;

    case VEC_R:
    case VEC_W:
	vg_assert(u->tag1 == Lit16);
	vg_assert(u->tag2 == RealReg);
	vg_assert(u->size == 1 || u->size == 2
		  || u->size == 4 || u->size == 16);
	if (!vec_loaded)
	    load_vec();
	treg = VG_(real_source_reg)(u->val2);
	op = (u->opcode == VEC_W)? (MO_STVX - MO_LVX): 0;
	if (u->size == 16) {
	    op += (u->extra4b << 8) + MO_LVX;
	    emit(XFORM31(u->val1, 0, treg, op, 0));
	    if (dis)
		VG_(printf)("\t%svx%s vr%d,0,r%d\n",
			    (u->opcode == VEC_R)? "l": "st",
			    (u->extra4b? "l": ""), u->val1, treg);
	} else {
	    op += lveop[u->size - 1];
	    emit(XFORM31(u->val1, 0, treg, op, 0));
	    if (dis)
		VG_(printf)("\t%sve%cx vr%d,0,r%d\n",
			    (u->opcode == VEC_R)? "l": "st",
			    "bh.w"[u->size-1], u->val1, treg);
	}
	break;

    case VEC:		/* misc vector operations */
	vg_assert(u->tag1 == Literal);
	if (!vec_loaded)
	    load_vec();
	emit(u->lit32);
	if (dis)
	    VG_(printf)("\t.long 0x%x\t/* some altivec op */\n", u->lit32);
	break;

    case VEC_FROMREG:	/* set a vector reg based on GPR(s) */
	vg_assert(u->tag1 == RealReg);
	vg_assert(u->tag2 == Lit16);
	if (!vec_loaded)
	    load_vec();
	sreg = VG_(real_source_reg)(u->val1);
	switch (u->val2 >> 8) {
	case 0:		/* lvsl */
	case 1:		/* lvsr */
	    op = (u->val2 & 0x100)? MO_LVSR: MO_LVSL;
	    emit(XFORM31(u->val2 & 0x1f, 0, sreg, op, 0));
	    if (dis)
		VG_(printf)("\tlvs%c vr%d,0,r%d\n",
			    ((u->val2 & 0x100)? 'r': 'l'),
			    u->val2 & 0x1f, sreg);
	    break;
	case 2:		/* mtvrsave */
	    emit(XFORM31(sreg, 0, 8, MO_MTSPR, 0));
	    if (dis)
		VG_(printf)("\tmtvrsave r%d\n", sreg);
	    break;
	default:
	    VG_(core_panic)("VEC_FROMREG unimp");
	}
	break;

    case VEC_TOREG:	/* set GPR from vec reg */
	vg_assert(u->tag1 == Lit16);
	vg_assert(u->tag2 == RealReg);
	/* mfvrsave is the only operation implemented here */
	vg_assert((u->val1 >> 8) == 2);
	if (!vec_loaded)
	    load_vec();
	dreg = VG_(real_dest_reg)(u->val2);
	emit(XFORM31(dreg, 0, 8, MO_MFSPR, 0));
	if (dis)
	    VG_(printf)("\tmfvrsave r%d\n", dreg);
	break;

    case CALLM_S:
	argreg = 3;
	break;

    case PUSH:
	/* We expect the arguments in forward order, not reverse order
	   as on x86. */
	vg_assert(u->tag1 == RealReg);
	sreg = VG_(real_source_reg)(u->val1);
	emit(MOVEREG(argreg, sreg));
	if (dis)
	    VG_(printf)("\tmr r%d,r%d\n", argreg, sreg);
	++argreg;
	break;

    case CALLM:
	vg_assert(u->tag1 == Literal);
	VG_(save_live_regs)();
	VG_(emit_call)(u->lit32, False);
	argreg = 3;
	break;

    case CALLM_E:
	break;

    case CCALL:
	VG_(save_live_regs)();
	if (u->argc >= 1) {
	    vg_assert(u->tag1 == RealReg);
	    reg = VG_(real_source_reg)(u->val1);
	    emit(MOVEREG(3, reg));
	    if (dis)
		VG_(printf)("\tmr r3,r%d\n", reg);
	} else
	    vg_assert(u->tag1 == NoValue);
	if (u->argc >= 2) {
	    vg_assert(u->tag2 == RealReg);
	    reg = VG_(real_source_reg)(u->val2);
	    emit(MOVEREG(4, reg));
	    if (dis)
		VG_(printf)("\tmr r4,r%d\n", reg);
	} else
	    vg_assert(u->tag2 == NoValue);
	if (u->argc >= 3) {
	    vg_assert(u->tag3 == RealReg);
	    reg = VG_(real_source_reg)(u->val3);
	    emit(MOVEREG(5, reg));
	    if (dis)
		VG_(printf)("\tmr r5,r%d\n", reg);
	} else
	    vg_assert(u->tag3 == (u->has_ret_val? RealReg: NoValue));
	dreg = u->has_ret_val? VG_(real_dest_reg)(u->val3): -1;
	VG_(emit_call)(u->lit32, False);
	if (u->has_ret_val) {
	    emit(MOVEREG(dreg, 3));
	    if (dis)
		VG_(printf)("\tmr r%d,r3\n", dreg);
	}
	break;

    default:
	if (VG_(needs).extended_UCode) {
	    SK_(emit_XUInstr)(u, 0);
	} else {
	    VG_(pp_UInstr)(0,u);
	    VG_(core_panic)("emit_uinstr: unimplemented opcode");
	}
    }
}

UChar *VG_(emit_code)(UCodeBlock *cb, Int *nbytes )
{
    int i, k;
#if 0
    int synthp, realp;
#endif
   const FuncWrapper *wrapper;

    reset_state();
    dis = VG_(print_codegen);

    /* generate code to decrement the dispatch counter and return if 0 */
    emit(DFORM(CMPI, 0, DISPATCH_CTR, 0));	/* cmpwi 17,0 */
    if (dis)
	VG_(printf)("\tcmpwi r%d,0\n", DISPATCH_CTR);
    emit(XLFORM(12, 2, BCLR, 0));		/* beqlr */
    emit(DFORM(ADDI, DISPATCH_CTR, DISPATCH_CTR, 0xffff));
    if (dis)
	VG_(printf)("\tbeqlr\n\taddi r%d,r%d,-1\n",
		    DISPATCH_CTR, DISPATCH_CTR);

#if 0
    /* generate code to increment the count of synthetic and real
       instructions executed */
    emit(DFORM(LWZ, 3, TST, TSTOFF(total_synth_instrs)));
    synthp = emitted_used;
    emit(DFORM(ADDI, 3, 3, 0));
    emit(DFORM(STW, 3, TST, TSTOFF(total_synth_instrs)));
    if (dis) {
	VG_(printf)("\tlwz r3,%d(%d)\n",
		    TSTOFF(total_synth_instrs), TST);
	VG_(printf)("\taddi r3,r3,n_synth\n\tstw r3,%d(%d)\n",
		    TSTOFF(total_synth_instrs), TST);
    }

    emit(DFORM(LWZ, 4, TST, TSTOFF(total_real_instrs)));
    realp = emitted_used;
    emit(DFORM(ADDI, 4, 4, 0));
    emit(DFORM(STW, 4, TST, TSTOFF(total_real_instrs)));
    if (dis) {
	VG_(printf)("\tlwz r4,%d(%d)\n",
		    TSTOFF(total_real_instrs), TST);
	VG_(printf)("\taddi r4,r4,n_real\n\tstw r4,%d(%d)\n",
		    TSTOFF(total_real_instrs), TST);
    }
#endif

    emit_addi(NIP, 0, cb->orig_eip);

    /* see if this is the return from a wrapped function */
    if (VG_(is_wrapper_return)(cb->orig_eip)) {
	emit(MOVEREG(3, TST));
	VG_(emit_call)((Addr)VG_(wrap_after), False);
    }

    /* see if this is the start of a wrapped function */
    wrapper = VG_(is_wrapped)(cb->orig_eip);
    if (wrapper != NULL) {
	emit(MOVEREG(3, TST));
	emit_addi(4, 0, (UInt) wrapper);
	VG_(emit_call)((Addr)VG_(wrap_before), False);
    }

    live_before = 0;
    for (i = 0; i < cb->used; ++i) {
	instr_dest_reg = -1;
	live_after = cb->instrs[i].regs_live_after;
	emit_uinstr(cb, i);
	if (instr_dest_reg >= 0) {
	    if (dis && regloc[instr_dest_reg] != 0)
		VG_(printf)("dest is %d, setting regloc[] = 0\n",
			    instr_dest_reg);
	    regloc[instr_dest_reg] = 0;
	}
	live_before = live_after;
    }

#if 0
    emitted_code[realp] += emitted_used;
    emitted_code[synthp] += real_count + 1;	/* +1 for final branch */
#endif

#if 0	/* for debugging... */
    if (dis) {
	VG_(printf)("\n");
	for (i = 0; i < emitted_used; ++i) {
	    VG_(printf)("%x: %08x  ", i * 4, emitted_code[i]);
	    print_insn_big_powerpc(emitted_code[i], i * 4);
	    for (k = 0; k < njumps; ++k) {
		if (i * sizeof(UInt) == jumps[k])
		    VG_(printf)("    ***jumpsite***");
	    }
	    VG_(printf)("\n");
	}
	VG_(printf)("\n");
    }
#endif

    vg_assert(njumps <= VG_MAX_JUMPS);
    for (k = 0; k < njumps; ++k)
	cb->jumps[k] = jumps[k];
    cb->relocs = NULL;
    cb->nrelocs = 0;

    *nbytes = emitted_used * sizeof(unsigned int);
    return (UChar *) emitted_code;
}

Addr VG_(get_jmp_dest)(Addr a)
{
    UInt instr = *(UInt *)a;

    if ((instr & 0xfc000003) != 0x48000000)
	return 0;
    instr &= 0x03fffffc;	/* get offset field */
    if (instr & 0x02000000)
	instr -= 0x04000000;	/* sign extend */
    return a + instr;
}

void VG_(unchain_jumpsite)(Addr a)
{
    UInt *ip = (UInt *)a;

    if (*ip == XLFORM(20, 0, BCLR, 1))
	return;		/* unchained already */
    /* make sure it is a branch */
    if ((*ip & 0xfc000003) != 0x48000000) {
	VG_(printf)("instr at %p is %x, dechain_count=%u\n", ip, *ip,
		    VG_(bb_dechain_count));
	vg_assert((*ip & 0xfc000003) == 0x48000000);
    }
    /* turn it into a blrl */
    *ip = XLFORM(20, 0, BCLR, 1);
    /* do the icache flush */
    asm volatile("dcbst 0,%0; sync; icbi 0,%0; sync; isync" : : "r" (ip));
    VG_(bb_dechain_count)++;
}

UInt VG_(get_thread_archreg)(ThreadId tid, UInt areg)
{
    ThreadState* tst;

    vg_assert(VG_(is_valid_tid)(tid));
    tst = & VG_(threads)[tid];

    if (areg < 32)
	return tst->arch.m_gpr[areg];
    switch (areg) {
    case R_CTR:	return tst->arch.m_ctr;
    case R_LR:	return tst->arch.m_lr;
    case R_XER:	return tst->arch.m_xer;
    case R_CR:	return tst->arch.m_cr;
    default:
	VG_(core_panic)("get_thread_archreg");
    }
}

static UInt *thread_shadow_ptr(ThreadState *tst, UInt areg)
{
    if (areg <= 31)
	return &tst->arch.sh_gpr[areg];
    switch (areg) {
    case R_CR:	return &tst->arch.sh_cr;
    case R_LR:	return &tst->arch.sh_lr;
    case R_CTR:	return &tst->arch.sh_ctr;
    case R_XER:	return &tst->arch.sh_xer;
    default:
	VG_(printf)("trying to get shadow of reg 0x%x\n", areg);
	VG_(core_panic)("get_thread_shadow_ptr");
    }
}

Int VG_(shadow_reg_offset)(Int areg)
{
    return (Int) thread_shadow_ptr(0, areg);
}

UInt VG_(get_shadow_archreg)(ThreadState *tst, UInt areg)
{
    return *thread_shadow_ptr(tst, areg);
}

void VG_(set_shadow_archreg)(ThreadState *tst, UInt areg, UInt val)
{
    *thread_shadow_ptr(tst, areg) = val;
}

void VG_(set_thread_shadow_eflags) ( ThreadId tid, UInt val )
{
   /* Hmmm we don't have eflags... */
}

UInt VG_(get_thread_shadow_archreg)(ThreadId tid, UInt areg)
{
    vg_assert(VG_(is_valid_tid)(tid));
    return *thread_shadow_ptr(&VG_(threads)[tid], areg);
}

void VG_(set_thread_shadow_archreg)(ThreadId tid, UInt areg, UInt val)
{
    vg_assert(VG_(is_valid_tid)(tid));
    *thread_shadow_ptr(&VG_(threads)[tid], areg) = val;
}

Addr VG_(shadow_archreg_address)(ThreadId tid, UInt archreg)
{
    return (Addr) thread_shadow_ptr(VG_(get_ThreadState)(tid), archreg);
}

void VG_(set_shadow_eflags)(UInt val)
{
}

void VG_(reloc_abs_jump)(UChar *instr)
{
    /* we don't generate any relocations, so we should never get here */
    VG_(core_panic)("reloc_abs_jump");
}
