Only in hdperf: hdperf
diff -pru hdperf-1.5/hdperf.c hdperf/hdperf.c
--- hdperf-1.5/hdperf.c	2005-02-23 15:24:35.000000000 -0800
+++ hdperf/hdperf.c	2005-12-22 15:22:19.000000000 -0800
@@ -20,6 +20,7 @@
  *
  */
 
+#define _GNU_SOURCE
 #define _LARGEFILE64_SOURCE
 #include <errno.h>
 #include <fcntl.h>
@@ -38,7 +39,9 @@
 #define N_ZONES		64
 
 static float zone_perf[N_ZONES];	/* surface performance bins */
-static uint8_t buf[MAX_BUF];		/* transfer buffer */
+// for O_DIRECT this buf must be aligned to 512-byte boundary... not
+// sure if aligning to a page makes a difference or not
+static uint8_t buf[MAX_BUF] __attribute__((aligned(4096)));		/* transfer buffer */
 static int impatient = 0;	/* max number of seconds to run each test */
 
 static void do_random_test(int fd, int xfer_size, int max_lba, int count)
@@ -47,6 +50,11 @@ static void do_random_test(int fd, int x
 	int i, lba, cdiv;
 	struct timeval tv_start, tv_end;
 	uint64_t len;
+	size_t min_xfer_size;
+
+	// O_DIRECT requires the size to be a multiple of 512 bytes... so
+	// the special xfer_size == 1 case must be rounded up.
+	min_xfer_size = (xfer_size < 512) ? 512 : xfer_size;
 
 	if (xfer_size > MAX_BUF) {
 		fprintf(stderr, "Illegal transfer size requested\n");
@@ -73,7 +81,7 @@ static void do_random_test(int fd, int x
 			}
 			fflush(stdout);
 		}
-		if (read(fd, buf, xfer_size) < xfer_size) {
+		if (read(fd, buf, min_xfer_size) < min_xfer_size) {
 			perror("read");
 		}
 		if (i > 1 && impatient) {
@@ -255,7 +263,7 @@ int main(int argc, char **argv)
 			printf("impatient = %d second timeout\n", impatient);
 		}
 	}
-	if ((f = open64(argv[1], O_RDONLY)) < 0) {
+	if ((f = open64(argv[1], O_RDONLY|O_DIRECT)) < 0) {
 		perror(argv[1]);
 		exit(1);
 	}
