Index: valgrind12305/configure.in
===================================================================
--- valgrind12305/configure.in	(revision 12305)
+++ valgrind12305/configure.in	(working copy)
@@ -143,6 +143,8 @@
 # configure-time, and distinguishes them from the VGA_*/VGO_*/VGP_*
 # variables used when compiling C files.
 
+VGCONF_PLATFORM_ARM_ARCH=
+
 AC_CANONICAL_HOST
 
 AC_MSG_CHECKING([for a supported CPU])
@@ -179,15 +181,34 @@
         ;;
 
      armv7*)
-	AC_MSG_RESULT([ok (${host_cpu})])
-	ARCH_MAX="arm"
-	;;
+       # This means we use a armv7 toolchain - at least Cortex-A8
+       AC_MSG_RESULT([ok (${host_cpu})])
+       ARCH_MAX="arm"
+       VGCONF_PLATFORM_ARM_ARCH="-march=armv7 -mcpu=cortex-a8"
+       AC_DEFINE(ARM_ARCH_V7,1,"Defined for v7 architectures")
+	   ;;
 
+     armv6*)
+       AC_MSG_RESULT([ok (${host_cpu})])
+       ARCH_MAX="arm"
+       VGCONF_PLATFORM_ARM_ARCH="-march=armv6"
+       AC_DEFINE(ARM_ARCH_V6,1,"Defined for v6 architectures")
+	   ;;
+
+     arm*)
+       # Generic arm toolchain - we will target armv5te
+       AC_MSG_RESULT([(${host_cpu}) - will enforce armv5te when compiling])
+       ARCH_MAX="arm"
+       VGCONF_PLATFORM_ARM_ARCH="-march=armv5te"
+       AC_DEFINE(ARM_ARCH_V5TE,1,"Defined for v5te architectures")
+      ;;
+
      *) 
 	AC_MSG_RESULT([no (${host_cpu})])
 	AC_MSG_ERROR([Unsupported host architecture. Sorry])
 	;;
 esac
+AC_SUBST(VGCONF_PLATFORM_ARM_ARCH)
 
 #----------------------------------------------------------------------------
 
Index: valgrind12305/Makefile.am
===================================================================
--- valgrind12305/Makefile.am	(revision 12305)
+++ valgrind12305/Makefile.am	(working copy)
@@ -3,18 +3,19 @@
 
 include $(top_srcdir)/Makefile.all.am 
 
-TOOLS =		memcheck \
-		cachegrind \
-		callgrind \
-		massif \
-		lackey \
-		none \
-		helgrind \
-		drd
+TOOLS =		memcheck
+# jfr		cachegrind \
+# jfr		callgrind \
+# jfr		massif \
+# jfr		lackey \
+# jfr		none \
+# jfr		helgrind \
+# jfr		drd
 
-EXP_TOOLS = 	exp-sgcheck \
-		exp-bbv \
-		exp-dhat
+EXP_TOOLS =
+# jfr	 	exp-sgcheck \
+# jfr		exp-bbv \
+# jfr		exp-dhat
 
 # Put docs last because building the HTML is slow and we want to get
 # everything else working before we try it.
@@ -28,9 +29,9 @@
 	tests \
 	perf \
 	gdbserver_tests \
-	auxprogs \
-	mpi \
-	docs
+	auxprogs
+# jfr	mpi \
+# jfr	docs
 DIST_SUBDIRS  = $(SUBDIRS)
 
 SUPP_FILES = \
Index: valgrind12305/coregrind/pub_core_transtab_asm.h
===================================================================
--- valgrind12305/coregrind/pub_core_transtab_asm.h	(revision 12305)
+++ valgrind12305/coregrind/pub_core_transtab_asm.h	(working copy)
@@ -58,12 +58,16 @@
 #if defined(VGA_x86) || defined(VGA_amd64)
 #  define VG_TT_FAST_HASH(_addr)  ((((UWord)(_addr))     ) & VG_TT_FAST_MASK)
 
-#elif defined(VGA_s390x) || defined(VGA_arm)
+#elif defined(VGA_s390x)
 #  define VG_TT_FAST_HASH(_addr)  ((((UWord)(_addr)) >> 1) & VG_TT_FAST_MASK)
 
 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
 #  define VG_TT_FAST_HASH(_addr)  ((((UWord)(_addr)) >> 2) & VG_TT_FAST_MASK)
 
+#elif defined(VGA_arm)
+#  define VG_TT_FAST_HASH(_addr)  ((((UWord)(_addr)) >> (2- (1& _addr))) & VG_TT_FAST_MASK)
+   /* shift by 2 if ARM mode, by 1 if Thumb mode */
+
 #else
 #  error "VG_TT_FAST_HASH: unknown platform"
 #endif
Index: valgrind12305/coregrind/m_syswrap/syswrap-generic.c
===================================================================
--- valgrind12305/coregrind/m_syswrap/syswrap-generic.c	(revision 12305)
+++ valgrind12305/coregrind/m_syswrap/syswrap-generic.c	(working copy)
@@ -2025,8 +2025,19 @@
       mreq.rkind = MAny;
    }
 
+   /* handle alignment to 4 pages we need for MAP_FIXED to succeed on ARM */
+   vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
+   if ( (arg4 & VKI_MAP_SHARED) && (arg1 == 0) && (VKI_SHMLBA > VKI_PAGE_SIZE) ) {
+      mreq.len += VKI_SHMLBA - VKI_PAGE_SIZE;
+   }
    /* Enquire ... */
    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
+   if (mreq_ok && (arg4 & VKI_MAP_SHARED) && (arg1 == 0) && (VKI_SHMLBA > VKI_PAGE_SIZE) ) {
+       Addr newaddr = VG_ROUNDUP(advised, VKI_SHMLBA);
+       mreq.len -= (newaddr - advised);
+       advised   = newaddr;
+   }
+
    if (!mreq_ok) {
       /* Our request was bounced, so we'd better fail. */
       return VG_(mk_SysRes_Error)( VKI_EINVAL );
Index: valgrind12305/coregrind/m_dispatch/dispatch-arm-linux.S
===================================================================
--- valgrind12305/coregrind/m_dispatch/dispatch-arm-linux.S	(revision 12305)
+++ valgrind12305/coregrind/m_dispatch/dispatch-arm-linux.S	(working copy)
@@ -9,6 +9,9 @@
 
   Copyright (C) 2008-2011 Evan Geller
      gaze@bea.ms
+  Copyright (C) 2011 John Reiser
+     jreiser@BitWagon.com
+     Sept+Oct 2011:  Inner loops recoded for speed.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
@@ -58,139 +61,134 @@
 
         /* set FPSCR to vex-required default value */
         mov  r4, #0
-        fmxr fpscr, r4
+        /* fmxr fpscr, r4 */
 
         /* r0 (hence also [sp,#0]) holds guest_state */
         /* r1 holds do_profiling */
 	mov r8, r0
 	ldr r0, [r8, #OFFSET_arm_R15T]
-        
+
        	/* fall into main loop (the right one) */
 	cmp r1, #0      /* do_profiling */
-	beq VG_(run_innerloop__dispatch_unprofiled)
-	b   VG_(run_innerloop__dispatch_profiled)
+	bne VG_(run_innerloop__dispatch_profiled)
+	// FALLTHROUGH  b VG_(run_innerloop__dispatch_unprofiled)
 
 
 /*----------------------------------------------------*/
 /*--- NO-PROFILING (standard) dispatcher           ---*/
 /*----------------------------------------------------*/
 
-/* Pairing of insns below is my guesstimate of how dual dispatch would
-   work on an A8.  JRS, 2011-May-28 */
+// Pairing of insns below is how dual dispatch should work.
  
+CLR_HI= 32 - VG_TT_FAST_BITS
+CLR_LO= 32 - VG_TT_FAST_BITS
+
 .global	VG_(run_innerloop__dispatch_unprofiled)
 VG_(run_innerloop__dispatch_unprofiled):
-
 	/* AT ENTRY: r0 is next guest addr, r8 is possibly
         modified guest state ptr */
 
-        /* Has the guest state pointer been messed with?  If yes, exit. */
-        movw r3, #:lower16:VG_(dispatch_ctr)
         tst  r8, #1
+	ldr  r2,=VG_(dispatch_ctr)
 
-        movt r3, #:upper16:VG_(dispatch_ctr)
+	bne  gsp_changed                // guest state pointer was modified
+	movs r3, r0, LSR #1             // shift off Thumb mode bit; set Carry
 
-	bne  gsp_changed
+	ldr  r5,=VG_(tt_fast)
+	movcc  r3, r3, LSR #1           // if ARM mode then shift off another bit
 
-	/* save the jump address in the guest state */
-        str  r0, [r8, #OFFSET_arm_R15T]
+        ldr  r1, [r2]                   // dispatch_ctr
+	mov  r3, r3, LSL #CLR_HI        // shift off hi bits
 
-        /* Are we out of timeslice?  If yes, defer to scheduler. */
-        ldr  r2, [r3]
+        str  r0, [r8, #OFFSET_arm_R15T]  // save jump address into guest state
+	add  r5, r5, r3, LSR #CLR_LO -3 // r5= &tt_fast[entry#]
 
-        subs r2, r2, #1
+	ldr  r4, [r5, #0]               // r4= .guest
+        subs r1, r1, #1                 // decrement timeslice
 
-        str  r2, [r3]
+	ldr  r5, [r5, #4]               // r5= .host
+        beq  counter_is_zero            // out of timeslice ==> defer to scheduler
 
-        beq  counter_is_zero
+	adr  lr, VG_(run_innerloop__dispatch_unprofiled)  // &continuation
+	cmp  r4, r0                     // check cache tag
 
-        /* try a fast lookup in the translation cache */
-        // r0 = next guest, r1,r2,r3,r4 scratch
-        movw r1, #VG_TT_FAST_MASK       // r1 = VG_TT_FAST_MASK
-        movw r4, #:lower16:VG_(tt_fast)
-
-	and  r2, r1, r0, LSR #1         // r2 = entry #
-        movt r4, #:upper16:VG_(tt_fast) // r4 = &VG_(tt_fast)
-
-	add  r1, r4, r2, LSL #3         // r1 = &tt_fast[entry#]
-
-        ldrd r4, r5, [r1, #0]           // r4 = .guest, r5 = .host
-
-	cmp  r4, r0
-
-	bne  fast_lookup_failed
+        streq  r1, [r2]                 // match: update dispatch_ctr
+	bxeq r5                         // match: jump to .host, continue at *lr
         // r5: next-host    r8: live, gsp
         // r4: next-guest
-        // r2: entry #
+        // r2: &VG_(dispatch_ctr)
+	// r1:  VG_(dispatch_ctr)
         // LIVE: r5, r8; all others dead
-        
-        /* Found a match.  Jump to .host. */
-	blx  r5
-	b    VG_(run_innerloop__dispatch_unprofiled)
-.ltorg
+fast_lookup_failed:
+	movne  r0, #VG_TRC_INNER_FASTMISS
+counter_is_zero:
+        moveq  r0, #VG_TRC_INNER_COUNTERZERO
+
+/* All exits from the dispatcher go through here.  %r0 holds
+   the return value. 
+*/
+run_innerloop_exit:
+        /* We're leaving.  Check that nobody messed with
+           FPSCR in ways we don't expect. */
+        /* fmrx r4, fpscr */
+        bic  r4, #0xF8000000 /* mask out NZCV and QC */
+        bic  r4, #0x0000009F /* mask out IDC,IXC,UFC,OFC,DZC,IOC */
+        cmp  r4, #0
+invariant_violation:
+        movne  r0, #VG_TRC_INVARIANT_FAILED
+
+run_innerloop_exit_REALLY:
+	add sp, sp, #8
+	pop {r4, r5, r6, r7, r8, r9, fp, pc}
 	/*NOTREACHED*/
 
+.ltorg
+
 /*----------------------------------------------------*/
 /*--- PROFILING dispatcher (can be much slower)    ---*/
 /*----------------------------------------------------*/
 
 .global	VG_(run_innerloop__dispatch_profiled)
 VG_(run_innerloop__dispatch_profiled):
-
 	/* AT ENTRY: r0 is next guest addr, r8 is possibly
         modified guest state ptr */
 
-        /* Has the guest state pointer been messed with?  If yes, exit. */
-        movw r3, #:lower16:VG_(dispatch_ctr)
-	tst  r8, #1
+        tst  r8, #1
+	ldr  r2,=VG_(dispatch_ctr)
 
-        movt r3, #:upper16:VG_(dispatch_ctr)
+	bne  gsp_changed                // guest state pointer was modified
+	movs r3, r0, LSR #1             // shift off Thumb mode bit; set Carry
 
-	bne  gsp_changed
+	ldr  r5,=VG_(tt_fast)
+	movcc  r3, r3, LSR #1           // if ARM mode then shift off another bit
 
-	/* save the jump address in the guest state */
-        str  r0, [r8, #OFFSET_arm_R15T]
+        ldr  r1, [r2]                   // dispatch_ctr
+	mov  r3, r3, LSL #CLR_HI        // shift off hi bits
 
-        /* Are we out of timeslice?  If yes, defer to scheduler. */
-        ldr  r2, [r3]
+        str  r0, [r8, #OFFSET_arm_R15T]  // save jump address into guest state
+	add  r5, r5, r3, LSR #CLR_LO -3 // r5= &tt_fast[entry#]
 
-        subs r2, r2, #1
+	ldr  r4, [r5, #0]               // r4= .guest
+        subs r1, r1, #1                 // decrement timeslice
 
-        str  r2, [r3]
+	ldr  r5, [r5, #4]               // r5= .host
+        beq  counter_is_zero            // out of timeslice ==> defer to scheduler
 
-        beq  counter_is_zero
+	cmp  r4, r0                     // check cache tag
+	ldr  r0, =VG_(tt_fastN)
 
-        /* try a fast lookup in the translation cache */
-        // r0 = next guest, r1,r2,r3,r4 scratch
-        movw r1, #VG_TT_FAST_MASK       // r1 = VG_TT_FAST_MASK
-        movw r4, #:lower16:VG_(tt_fast)
+        streq  r1, [r2]                 // match: update dispatch_ctr
+	bne fast_lookup_failed
 
-	and  r2, r1, r0, LSR #1         // r2 = entry #
-        movt r4, #:upper16:VG_(tt_fast) // r4 = &VG_(tt_fast)
+	ldr  r0, [r0, r3, LSR #CLR_LO -2]  // tt_fastN[entry#]
+	adr  lr, VG_(run_innerloop__dispatch_profiled)  // &continuation
+// r0 stall
+	ldr  r3, [r0]
+// r3 stall
+	add  r3, r3, #1
 
-	add  r1, r4, r2, LSL #3         // r1 = &tt_fast[entry#]
-
-        ldrd r4, r5, [r1, #0]           // r4 = .guest, r5 = .host
-
-	cmp  r4, r0
-
-	bne  fast_lookup_failed
-        // r5: next-host    r8: live, gsp
-        // r4: next-guest
-        // r2: entry #
-        // LIVE: r5, r8; all others dead
-        
-        /* increment bb profile counter */
-        movw r0, #:lower16:VG_(tt_fastN)
-        movt r0, #:upper16:VG_(tt_fastN) // r0 = &tt_fastN[0]
-        ldr  r0, [r0, r2, LSL #2]        // r0 = tt_fast[entry #]
-        ldr  r3, [r0]                    // *r0 ++
-        add  r3, r3, #1
-        str  r3, [r0]
-
-        /* Found a match.  Jump to .host. */
-	blx  r5
-	b    VG_(run_innerloop__dispatch_profiled)
+	str  r3, [r0]
+	bx  r5                         // match: jump to .host, continue at *lr
 	/*NOTREACHED*/
 
 /*----------------------------------------------------*/
@@ -212,49 +210,6 @@
 	b run_innerloop_exit
         /*NOTREACHED*/
 
-counter_is_zero:
-        /* R15T is up to date here */
-        /* Back out increment of the dispatch ctr */
-        ldr  r1, =VG_(dispatch_ctr)
-        ldr  r2, [r1]
-        add  r2, r2, #1
-        str  r2, [r1]
-        mov  r0, #VG_TRC_INNER_COUNTERZERO
-        b    run_innerloop_exit
-        /*NOTREACHED*/
-        
-fast_lookup_failed:
-        /* R15T is up to date here */
-        /* Back out increment of the dispatch ctr */
-        ldr  r1, =VG_(dispatch_ctr)
-        ldr  r2, [r1]
-        add  r2, r2, #1
-        str  r2, [r1]
-	mov  r0, #VG_TRC_INNER_FASTMISS
-	b    run_innerloop_exit
-        /*NOTREACHED*/
-
-/* All exits from the dispatcher go through here.  %r0 holds
-   the return value. 
-*/
-run_innerloop_exit:
-        /* We're leaving.  Check that nobody messed with
-           FPSCR in ways we don't expect. */
-        fmrx r4, fpscr
-        bic  r4, #0xF8000000 /* mask out NZCV and QC */
-        bic  r4, #0x0000009F /* mask out IDC,IXC,UFC,OFC,DZC,IOC */
-        cmp  r4, #0
-        bne  invariant_violation
-        b    run_innerloop_exit_REALLY
-
-invariant_violation:
-        mov  r0, #VG_TRC_INVARIANT_FAILED
-        b    run_innerloop_exit_REALLY
-
-run_innerloop_exit_REALLY:
-	add sp, sp, #8
-	pop {r4, r5, r6, r7, r8, r9, fp, pc}
-
 .size VG_(run_innerloop), .-VG_(run_innerloop)
 
 
Index: valgrind12305/coregrind/m_translate.c
===================================================================
--- valgrind12305/coregrind/m_translate.c	(revision 12305)
+++ valgrind12305/coregrind/m_translate.c	(working copy)
@@ -1533,8 +1533,8 @@
    /* See comment in libvex.h.  This target uses a
       return-to-link-register scheme to get back to the dispatcher, so
       both fields are NULL. */
-   vta.dispatch_assisted   = NULL;
-   vta.dispatch_unassisted = NULL;
+   //vta.dispatch_assisted   = NULL;
+   //vta.dispatch_unassisted = NULL;
 
 #  else
 #    error "Unknown arch"
Index: valgrind12305/Makefile.all.am
===================================================================
--- valgrind12305/Makefile.all.am	(revision 12305)
+++ valgrind12305/Makefile.all.am	(working copy)
@@ -160,9 +160,9 @@
 
 AM_FLAG_M3264_ARM_LINUX   = @FLAG_M32@
 AM_CFLAGS_ARM_LINUX       = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY@ \
-			 	$(AM_CFLAGS_BASE) -marm -mcpu=cortex-a8
+			 	$(AM_CFLAGS_BASE) $(VGCONF_PLATFORM_ARM_ARCH)
 AM_CCASFLAGS_ARM_LINUX    = $(AM_CPPFLAGS_ARM_LINUX) @FLAG_M32@ \
-				-marm -mcpu=cortex-a8 -g
+				$(VGCONF_PLATFORM_ARM_ARCH) -g
 
 AM_FLAG_M3264_X86_DARWIN = -arch i386
 AM_CFLAGS_X86_DARWIN     = $(WERROR) -arch i386 $(AM_CFLAGS_BASE) \