> #Problem 4.8: data with ties > #Using Charlie Geyer's Computer Code > exdata <- read.table("http://www.rohan.sdsu.edu/~babailey/stat672/4.8.data", header=T) > attach(exdata) > exdata x y 1 2.1 1.9 2 1.9 2.6 3 2.6 3.7 4 3.3 NA > > #Wilcoxon Rank Sum Test > mu <- 0 # hypothesized value of median difference > x <- x[!is.na(x)] > y <- y[!is.na(y)] > print(nx <- length(x)) [1] 4 > print(ny <- length(y)) [1] 3 > y <- y - mu > data <- c(x, y) > names(data) <- c(rep("x", nx), rep("y", ny)) > data <- sort(data) > r <- rank(data) > rbind(data, r) x y x x y x y data 1.9 1.9 2.1 2.6 2.6 3.3 3.7 r 1.5 1.5 3.0 4.5 4.5 6.0 7.0 > print(w <- sum(r[names(data) == "y"])) [1] 13 > print(u <- w - ny * (ny + 1) / 2) [1] 7 > pwilcox(u, nx, ny) [1] 0.6857143 > > #Hodges-Lehmann estimator: median of the pairwise difference > x <- x[!is.na(x)] > y <- y[!is.na(y)] > print(diffs <- sort(as.vector(outer(y, x, "-")))) [1] -1.4 -0.7 -0.7 -0.2 0.0 0.0 0.4 0.5 0.7 1.1 1.6 1.8 > median(diffs) [1] 0.2 > > #Associated CI: counts in k from each end of the list of > #sorted pairwise differences > conf.level <- 0.95 > x <- x[!is.na(x)] > y <- y[!is.na(y)] > print(nx <- length(x)) [1] 4 > print(ny <- length(y)) [1] 3 > print(diffs <- sort(as.vector(outer(y, x, "-")))) [1] -1.4 -0.7 -0.7 -0.2 0.0 0.0 0.4 0.5 0.7 1.1 1.6 1.8 > print(m <- length(diffs)) [1] 12 > alpha <- 1 - conf.level > k <- qwilcox(alpha / 2, nx, ny) > if (k == 0) k <- k + 1 > print(k) [1] 1 > cat("achieved confidence level:", + 1 - 2 * pwilcox(k - 1, nx, ny), "\n") achieved confidence level: 0.9428571 > c(diffs[k], diffs[m + 1 - k]) [1] -1.4 1.8 > > #Or use the wilcox.exact test > library("exactRankTests") Package 'exactRankTests' is no longer under development. Please consider using package 'coin' instead. > wilcox.exact(y, x, alternative="less") Exact Wilcoxon rank sum test data: y and x W = 7, p-value = 0.6857 alternative hypothesis: true mu is less than 0 > wilcox.exact(y, x, conf.int=T) Exact Wilcoxon rank sum test data: y and x W = 7, p-value = 0.8 alternative hypothesis: true mu is not equal to 0 95 percent confidence interval: -1.4 1.8 sample estimates: difference in location 0.2