You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

457 lines
15 KiB

8 years ago
Index: valgrind12305/configure.in
===================================================================
--- valgrind12305/configure.in (revision 12305)
+++ valgrind12305/configure.in (working copy)
@@ -143,6 +143,8 @@
# configure-time, and distinguishes them from the VGA_*/VGO_*/VGP_*
# variables used when compiling C files.
+VGCONF_PLATFORM_ARM_ARCH=
+
AC_CANONICAL_HOST
AC_MSG_CHECKING([for a supported CPU])
@@ -179,15 +181,34 @@
;;
armv7*)
- AC_MSG_RESULT([ok (${host_cpu})])
- ARCH_MAX="arm"
- ;;
+ # This means we use a armv7 toolchain - at least Cortex-A8
+ AC_MSG_RESULT([ok (${host_cpu})])
+ ARCH_MAX="arm"
+ VGCONF_PLATFORM_ARM_ARCH="-march=armv7 -mcpu=cortex-a8"
+ AC_DEFINE(ARM_ARCH_V7,1,"Defined for v7 architectures")
+ ;;
+ armv6*)
+ AC_MSG_RESULT([ok (${host_cpu})])
+ ARCH_MAX="arm"
+ VGCONF_PLATFORM_ARM_ARCH="-march=armv6"
+ AC_DEFINE(ARM_ARCH_V6,1,"Defined for v6 architectures")
+ ;;
+
+ arm*)
+ # Generic arm toolchain - we will target armv5te
+ AC_MSG_RESULT([(${host_cpu}) - will enforce armv5te when compiling])
+ ARCH_MAX="arm"
+ VGCONF_PLATFORM_ARM_ARCH="-march=armv5te"
+ AC_DEFINE(ARM_ARCH_V5TE,1,"Defined for v5te architectures")
+ ;;
+
*)
AC_MSG_RESULT([no (${host_cpu})])
AC_MSG_ERROR([Unsupported host architecture. Sorry])
;;
esac
+AC_SUBST(VGCONF_PLATFORM_ARM_ARCH)
#----------------------------------------------------------------------------
Index: valgrind12305/Makefile.am
===================================================================
--- valgrind12305/Makefile.am (revision 12305)
+++ valgrind12305/Makefile.am (working copy)
@@ -3,18 +3,19 @@
include $(top_srcdir)/Makefile.all.am
-TOOLS = memcheck \
- cachegrind \
- callgrind \
- massif \
- lackey \
- none \
- helgrind \
- drd
+TOOLS = memcheck
+# jfr cachegrind \
+# jfr callgrind \
+# jfr massif \
+# jfr lackey \
+# jfr none \
+# jfr helgrind \
+# jfr drd
-EXP_TOOLS = exp-sgcheck \
- exp-bbv \
- exp-dhat
+EXP_TOOLS =
+# jfr exp-sgcheck \
+# jfr exp-bbv \
+# jfr exp-dhat
# Put docs last because building the HTML is slow and we want to get
# everything else working before we try it.
@@ -28,9 +29,9 @@
tests \
perf \
gdbserver_tests \
- auxprogs \
- mpi \
- docs
+ auxprogs
+# jfr mpi \
+# jfr docs
DIST_SUBDIRS = $(SUBDIRS)
SUPP_FILES = \
Index: valgrind12305/coregrind/pub_core_transtab_asm.h
===================================================================
--- valgrind12305/coregrind/pub_core_transtab_asm.h (revision 12305)
+++ valgrind12305/coregrind/pub_core_transtab_asm.h (working copy)
@@ -58,12 +58,16 @@
#if defined(VGA_x86) || defined(VGA_amd64)
# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) ) & VG_TT_FAST_MASK)
-#elif defined(VGA_s390x) || defined(VGA_arm)
+#elif defined(VGA_s390x)
# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) >> 1) & VG_TT_FAST_MASK)
#elif defined(VGA_ppc32) || defined(VGA_ppc64)
# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) >> 2) & VG_TT_FAST_MASK)
+#elif defined(VGA_arm)
+# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) >> (2- (1& _addr))) & VG_TT_FAST_MASK)
+ /* shift by 2 if ARM mode, by 1 if Thumb mode */
+
#else
# error "VG_TT_FAST_HASH: unknown platform"
#endif
Index: valgrind12305/coregrind/m_syswrap/syswrap-generic.c
===================================================================
--- valgrind12305/coregrind/m_syswrap/syswrap-generic.c (revision 12305)
+++ valgrind12305/coregrind/m_syswrap/syswrap-generic.c (working copy)
@@ -2025,8 +2025,19 @@
mreq.rkind = MAny;
}
+ /* handle alignment to 4 pages we need for MAP_FIXED to succeed on ARM */
+ vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
+ if ( (arg4 & VKI_MAP_SHARED) && (arg1 == 0) && (VKI_SHMLBA > VKI_PAGE_SIZE) ) {
+ mreq.len += VKI_SHMLBA - VKI_PAGE_SIZE;
+ }
/* Enquire ... */
advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
+ if (mreq_ok && (arg4 & VKI_MAP_SHARED) && (arg1 == 0) && (VKI_SHMLBA > VKI_PAGE_SIZE) ) {
+ Addr newaddr = VG_ROUNDUP(advised, VKI_SHMLBA);
+ mreq.len -= (newaddr - advised);
+ advised = newaddr;
+ }
+
if (!mreq_ok) {
/* Our request was bounced, so we'd better fail. */
return VG_(mk_SysRes_Error)( VKI_EINVAL );
Index: valgrind12305/coregrind/m_dispatch/dispatch-arm-linux.S
===================================================================
--- valgrind12305/coregrind/m_dispatch/dispatch-arm-linux.S (revision 12305)
+++ valgrind12305/coregrind/m_dispatch/dispatch-arm-linux.S (working copy)
@@ -9,6 +9,9 @@
Copyright (C) 2008-2011 Evan Geller
gaze@bea.ms
+ Copyright (C) 2011 John Reiser
+ jreiser@BitWagon.com
+ Sept+Oct 2011: Inner loops recoded for speed.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
@@ -58,139 +61,134 @@
/* set FPSCR to vex-required default value */
mov r4, #0
- fmxr fpscr, r4
+ /* fmxr fpscr, r4 */
/* r0 (hence also [sp,#0]) holds guest_state */
/* r1 holds do_profiling */
mov r8, r0
ldr r0, [r8, #OFFSET_arm_R15T]
-
+
/* fall into main loop (the right one) */
cmp r1, #0 /* do_profiling */
- beq VG_(run_innerloop__dispatch_unprofiled)
- b VG_(run_innerloop__dispatch_profiled)
+ bne VG_(run_innerloop__dispatch_profiled)
+ // FALLTHROUGH b VG_(run_innerloop__dispatch_unprofiled)
/*----------------------------------------------------*/
/*--- NO-PROFILING (standard) dispatcher ---*/
/*----------------------------------------------------*/
-/* Pairing of insns below is my guesstimate of how dual dispatch would
- work on an A8. JRS, 2011-May-28 */
+// Pairing of insns below is how dual dispatch should work.
+CLR_HI= 32 - VG_TT_FAST_BITS
+CLR_LO= 32 - VG_TT_FAST_BITS
+
.global VG_(run_innerloop__dispatch_unprofiled)
VG_(run_innerloop__dispatch_unprofiled):
-
/* AT ENTRY: r0 is next guest addr, r8 is possibly
modified guest state ptr */
- /* Has the guest state pointer been messed with? If yes, exit. */
- movw r3, #:lower16:VG_(dispatch_ctr)
tst r8, #1
+ ldr r2,=VG_(dispatch_ctr)
- movt r3, #:upper16:VG_(dispatch_ctr)
+ bne gsp_changed // guest state pointer was modified
+ movs r3, r0, LSR #1 // shift off Thumb mode bit; set Carry
- bne gsp_changed
+ ldr r5,=VG_(tt_fast)
+ movcc r3, r3, LSR #1 // if ARM mode then shift off another bit
- /* save the jump address in the guest state */
- str r0, [r8, #OFFSET_arm_R15T]
+ ldr r1, [r2] // dispatch_ctr
+ mov r3, r3, LSL #CLR_HI // shift off hi bits
- /* Are we out of timeslice? If yes, defer to scheduler. */
- ldr r2, [r3]
+ str r0, [r8, #OFFSET_arm_R15T] // save jump address into guest state
+ add r5, r5, r3, LSR #CLR_LO -3 // r5= &tt_fast[entry#]
- subs r2, r2, #1
+ ldr r4, [r5, #0] // r4= .guest
+ subs r1, r1, #1 // decrement timeslice
- str r2, [r3]
+ ldr r5, [r5, #4] // r5= .host
+ beq counter_is_zero // out of timeslice ==> defer to scheduler
- beq counter_is_zero
+ adr lr, VG_(run_innerloop__dispatch_unprofiled) // &continuation
+ cmp r4, r0 // check cache tag
- /* try a fast lookup in the translation cache */
- // r0 = next guest, r1,r2,r3,r4 scratch
- movw r1, #VG_TT_FAST_MASK // r1 = VG_TT_FAST_MASK
- movw r4, #:lower16:VG_(tt_fast)
-
- and r2, r1, r0, LSR #1 // r2 = entry #
- movt r4, #:upper16:VG_(tt_fast) // r4 = &VG_(tt_fast)
-
- add r1, r4, r2, LSL #3 // r1 = &tt_fast[entry#]
-
- ldrd r4, r5, [r1, #0] // r4 = .guest, r5 = .host
-
- cmp r4, r0
-
- bne fast_lookup_failed
+ streq r1, [r2] // match: update dispatch_ctr
+ bxeq r5 // match: jump to .host, continue at *lr
// r5: next-host r8: live, gsp
// r4: next-guest
- // r2: entry #
+ // r2: &VG_(dispatch_ctr)
+ // r1: VG_(dispatch_ctr)
// LIVE: r5, r8; all others dead
-
- /* Found a match. Jump to .host. */
- blx r5
- b VG_(run_innerloop__dispatch_unprofiled)
-.ltorg
+fast_lookup_failed:
+ movne r0, #VG_TRC_INNER_FASTMISS
+counter_is_zero:
+ moveq r0, #VG_TRC_INNER_COUNTERZERO
+
+/* All exits from the dispatcher go through here. %r0 holds
+ the return value.
+*/
+run_innerloop_exit:
+ /* We're leaving. Check that nobody messed with
+ FPSCR in ways we don't expect. */
+ /* fmrx r4, fpscr */
+ bic r4, #0xF8000000 /* mask out NZCV and QC */
+ bic r4, #0x0000009F /* mask out IDC,IXC,UFC,OFC,DZC,IOC */
+ cmp r4, #0
+invariant_violation:
+ movne r0, #VG_TRC_INVARIANT_FAILED
+
+run_innerloop_exit_REALLY:
+ add sp, sp, #8
+ pop {r4, r5, r6, r7, r8, r9, fp, pc}
/*NOTREACHED*/
+.ltorg
+
/*----------------------------------------------------*/
/*--- PROFILING dispatcher (can be much slower) ---*/
/*----------------------------------------------------*/
.global VG_(run_innerloop__dispatch_profiled)
VG_(run_innerloop__dispatch_profiled):
-
/* AT ENTRY: r0 is next guest addr, r8 is possibly
modified guest state ptr */
- /* Has the guest state pointer been messed with? If yes, exit. */
- movw r3, #:lower16:VG_(dispatch_ctr)
- tst r8, #1
+ tst r8, #1
+ ldr r2,=VG_(dispatch_ctr)
- movt r3, #:upper16:VG_(dispatch_ctr)
+ bne gsp_changed // guest state pointer was modified
+ movs r3, r0, LSR #1 // shift off Thumb mode bit; set Carry
- bne gsp_changed
+ ldr r5,=VG_(tt_fast)
+ movcc r3, r3, LSR #1 // if ARM mode then shift off another bit
- /* save the jump address in the guest state */
- str r0, [r8, #OFFSET_arm_R15T]
+ ldr r1, [r2] // dispatch_ctr
+ mov r3, r3, LSL #CLR_HI // shift off hi bits
- /* Are we out of timeslice? If yes, defer to scheduler. */
- ldr r2, [r3]
+ str r0, [r8, #OFFSET_arm_R15T] // save jump address into guest state
+ add r5, r5, r3, LSR #CLR_LO -3 // r5= &tt_fast[entry#]
- subs r2, r2, #1
+ ldr r4, [r5, #0] // r4= .guest
+ subs r1, r1, #1 // decrement timeslice
- str r2, [r3]
+ ldr r5, [r5, #4] // r5= .host
+ beq counter_is_zero // out of timeslice ==> defer to scheduler
- beq counter_is_zero
+ cmp r4, r0 // check cache tag
+ ldr r0, =VG_(tt_fastN)
- /* try a fast lookup in the translation cache */
- // r0 = next guest, r1,r2,r3,r4 scratch
- movw r1, #VG_TT_FAST_MASK // r1 = VG_TT_FAST_MASK
- movw r4, #:lower16:VG_(tt_fast)
+ streq r1, [r2] // match: update dispatch_ctr
+ bne fast_lookup_failed
- and r2, r1, r0, LSR #1 // r2 = entry #
- movt r4, #:upper16:VG_(tt_fast) // r4 = &VG_(tt_fast)
+ ldr r0, [r0, r3, LSR #CLR_LO -2] // tt_fastN[entry#]
+ adr lr, VG_(run_innerloop__dispatch_profiled) // &continuation
+// r0 stall
+ ldr r3, [r0]
+// r3 stall
+ add r3, r3, #1
- add r1, r4, r2, LSL #3 // r1 = &tt_fast[entry#]
-
- ldrd r4, r5, [r1, #0] // r4 = .guest, r5 = .host
-
- cmp r4, r0
-
- bne fast_lookup_failed
- // r5: next-host r8: live, gsp
- // r4: next-guest
- // r2: entry #
- // LIVE: r5, r8; all others dead
-
- /* increment bb profile counter */
- movw r0, #:lower16:VG_(tt_fastN)
- movt r0, #:upper16:VG_(tt_fastN) // r0 = &tt_fastN[0]
- ldr r0, [r0, r2, LSL #2] // r0 = tt_fast[entry #]
- ldr r3, [r0] // *r0 ++
- add r3, r3, #1
- str r3, [r0]
-
- /* Found a match. Jump to .host. */
- blx r5
- b VG_(run_innerloop__dispatch_profiled)
+ str r3, [r0]
+ bx r5 // match: jump to .host, continue at *lr
/*NOTREACHED*/
/*----------------------------------------------------*/
@@ -212,49 +210,6 @@
b run_innerloop_exit
/*NOTREACHED*/
-counter_is_zero:
- /* R15T is up to date here */
- /* Back out increment of the dispatch ctr */
- ldr r1, =VG_(dispatch_ctr)
- ldr r2, [r1]
- add r2, r2, #1
- str r2, [r1]
- mov r0, #VG_TRC_INNER_COUNTERZERO
- b run_innerloop_exit
- /*NOTREACHED*/
-
-fast_lookup_failed:
- /* R15T is up to date here */
- /* Back out increment of the dispatch ctr */
- ldr r1, =VG_(dispatch_ctr)
- ldr r2, [r1]
- add r2, r2, #1
- str r2, [r1]
- mov r0, #VG_TRC_INNER_FASTMISS
- b run_innerloop_exit
- /*NOTREACHED*/
-
-/* All exits from the dispatcher go through here. %r0 holds
- the return value.
-*/
-run_innerloop_exit:
- /* We're leaving. Check that nobody messed with
- FPSCR in ways we don't expect. */
- fmrx r4, fpscr
- bic r4, #0xF8000000 /* mask out NZCV and QC */
- bic r4, #0x0000009F /* mask out IDC,IXC,UFC,OFC,DZC,IOC */
- cmp r4, #0
- bne invariant_violation
- b run_innerloop_exit_REALLY
-
-invariant_violation:
- mov r0, #VG_TRC_INVARIANT_FAILED
- b run_innerloop_exit_REALLY
-
-run_innerloop_exit_REALLY:
- add sp, sp, #8
- pop {r4, r5, r6, r7, r8, r9, fp, pc}
-
.size VG_(run_innerloop), .-VG_(run_innerloop)
Index: valgrind12305/coregrind/m_translate.c
===================================================================
--- valgrind12305/coregrind/m_translate.c (revision 12305)
+++ valgrind12305/coregrind/m_translate.c (working copy)
@@ -1533,8 +1533,8 @@
/* See comment in libvex.h. This target uses a
return-to-link-register scheme to get back to the dispatcher, so
both fields are NULL. */
- vta.dispatch_assisted = NULL;
- vta.dispatch_unassisted = NULL;
+ //vta.dispatch_assisted = NULL;
+ //vta.dispatch_unassisted = NULL;
# else
# error "Unknown arch"
Index: valgrind12305/Makefile.all.am
===================================================================
--- valgrind12305/Makefile.all.am (revision 12305)
+++ valgrind12305/Makefile.all.am (working copy)
@@ -160,9 +160,9 @@
AM_FLAG_M3264_ARM_LINUX = @FLAG_M32@
AM_CFLAGS_ARM_LINUX = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY@ \
- $(AM_CFLAGS_BASE) -marm -mcpu=cortex-a8
+ $(AM_CFLAGS_BASE) $(VGCONF_PLATFORM_ARM_ARCH)
AM_CCASFLAGS_ARM_LINUX = $(AM_CPPFLAGS_ARM_LINUX) @FLAG_M32@ \
- -marm -mcpu=cortex-a8 -g
+ $(VGCONF_PLATFORM_ARM_ARCH) -g
AM_FLAG_M3264_X86_DARWIN = -arch i386
AM_CFLAGS_X86_DARWIN = $(WERROR) -arch i386 $(AM_CFLAGS_BASE) \