问题：

如何用不同的水平线注释ggplot2中的每个条？

丘普松

2023-03-14

我绘制了一系列条形图，表示一个比例的置信区间的模拟。我想在每个栏中添加一行，代表成功的比例。

我要绘制的比例在绘图的数据框中。我还没有弄清楚如何在每个单独的条中为该数据点添加线元素。

可视化来自哈维·马图斯基的《直觉生物统计学》第36页。这是从给定样本空间获取样本，记录成功比例并计算置信区间的模拟。

我使用geom_segment绘制了条形图，因此可以使条形图从置信区间的下端开始，而不是从x轴开始绘制。我在整个图表中添加了一条水平线，显示了样本空间中成功的真实比例（红色和白色球集合中的红色球）。

我试着用映射到数据点trials_df$比例的geom_hline和geom_ segment做一些事情。我不能走上正确的轨道。

这是我整个可视化的代码。它被分解到一些函数中，然后整个模拟运行，打印绘图的数据框，然后运行我到目前为止的绘图(缺少每个条形上的比例线)。


    library(ggplot2)

    run_trials <- function(sample_space, N) {
            sample(sample_space,
                   size = N,
                   replace = TRUE)
    }

    success_count <- function(trials, success_value) {
            result <- sum(trials == success_value)
            result
    }

    proportion <- function(trials, success_value) {
            success_count(trials, success_value) / length(trials)
    }

    wald_mod <- function(success_count, trial_count) {
            z <- 1.96
            p_prime <- (success_count + (0.5 * z^2)) / (trial_count + z^2)
            W <- z * sqrt((p_prime * (1 - p_prime)) / (trial_count + z^2))
            result <- c((p_prime - W), (p_prime + W))
            result
    }

    get_trial_results <- function(trials, success_value) {
            p <- proportion(trials, success_value)
            successes <- success_count(trials, success_value)
            confidence_interval <- wald_mod(successes, length(trials))

            result <- list(p, confidence_interval)
            result
    }

    run_simulation <- function() {
            sample_space <- c(rep('Red', 25), rep('White', 75))
            N <- 15

            trials_df <- data.frame(trials_index = integer(),
                                    proportion = double(),
                                    ci_min = double(),
                                    ci_max = double())

            for (i in 1:20) {
                    t <- run_trials(sample_space, N)
                    t_results <- get_trial_results(t, "Red")
                    trials_df <- rbind(trials_df, c(i, t_results[[1]][1], t_results[[2]][1], t_results[[2]][2]))
            }
            names(trials_df) <- c("trials_index", "proportion", "ci_min", "ci_max")

            print(trials_df)

            ggplot(trials_df, aes(trials_index, ci_max)) +
                    geom_segment(aes(xend = trials_index, yend = ci_min), size = 4, lineend = "butt",
                                 color = "turquoise4") +
                    geom_abline(slope = 0, intercept = proportion(sample_space, "Red"), linetype = "dashed")
    }

    run_simulation()

我在代码中添加了@Simon的解决方案，并改进了情节的标签。开发这个小模拟帮助我理解了置信区间。


    library(ggplot2)

    run_experiment <- function(sample_space, N) {
            sample(sample_space,
                   size = N,
                   replace = TRUE)
    }

    success_count <- function(experiment, success_value) {
            result <- sum(experiment == success_value)
            result
    }

    proportion <- function(experiment, success_value) {
            success_count(experiment, success_value) / length(experiment)
    }

    wald_mod <- function(success_count, trial_count) {
            z <- 1.96
            p_prime <- (success_count + (0.5 * z^2)) / (trial_count + z^2)
            W <- z * sqrt((p_prime * (1 - p_prime)) / (trial_count + z^2))
            result <- c((p_prime - W), (p_prime + W))
            result
    }

    get_experiment_results <- function(experiment, success_value) {
            p <- proportion(experiment, success_value)
            successes <- success_count(experiment, success_value)
            confidence_interval <- wald_mod(successes, length(experiment))
            p_plot_value <- confidence_interval[1] + p * abs(diff(confidence_interval))

            result <- list(c(p, p_plot_value), confidence_interval)
            result
    }

    run_simulation <- function() {
            sample_space <- c(rep('Red', 25), rep('White', 75))
            N <- 15

            experiments_df <- data.frame()

            for (i in 1:20) {
                    t <- run_experiment(sample_space, N)
                    t_results <- get_experiment_results(t, "Red")

                    experiments_df <- rbind(experiments_df, c(i, t_results[[1]][[1]], t_results[[1]][[2]], t_results[[2]][[1]], t_results[[2]][[2]]))
            }
            names(experiments_df) <- c("experiment_index", "proportion", "proportion_plot_value", "ci_min", "ci_max")

            print(experiments_df)

            # Jaap's answer on SO solves floating bar plot.
            # https://stackoverflow.com/questions/29916770/geom-bar-from-min-to-max-data-value
            # Simon's answer to me on SO solves plotting the proportion.
            # https://stackoverflow.com/questions/29916770/geom-bar-from-min-to-max-data-value
            ggplot(experiments_df, aes(experiment_index)) +
                    geom_segment(aes(xend = experiment_index, yend = ci_min, y = ci_max), size = 4, lineend = "butt",
                                 color = "turquoise4") +
                    geom_segment(aes(xend = experiment_index, yend = proportion_plot_value-.001, y = proportion_plot_value+.001), size = 4, lineend = "butt",
                                 color = "black") +
                    geom_abline(slope = 0, intercept = proportion(sample_space, "Red"), linetype = "dashed") +
                    coord_cartesian(ylim = c(0, 1)) +
                    labs(x = "Experiment", y = "Probability",
                         title = "Each bar shows 95% CI computed from one
    simulated experiment",
                         subtitle = "Dashed line is true proportion in sample space",
                         caption = "Intuitive Biostatistics. Harvey Mitulsky. p. 36") 
    }

    run_simulation()

我的最终情节(我的名声还不允许我粘贴)

共有1个答案

张俊茂

2023-03-14

首先计算杆下端的比例：

trials_df <- data.frame(trials_index = integer(),
                          proportion = double(),
                          ci_min = double(),
                          ci_max = double())

  for (i in 1:20) {
    t <- run_trials(sample_space, N)
    t_results <- get_trial_results(t, "Red")
    trials_df <- rbind(trials_df, c(i, t_results[[1]][1], t_results[[2]][1], t_results[[2]][2], t_results[[2]][1]+t_results[[1]][1]*asbs(diff(t_results[[2]][2], t_results[[2]][1]))))
  }
  names(trials_df) <- c("trials_index", "proportion", "ci_min", "ci_max", 'proportion_max')

对于每个条形上的一条小水平线，您可以执行以下操作：

  ggplot(trials_df, aes(trials_index, ci_max)) +
    geom_segment(aes(xend = trials_index, yend = ci_min), size = 4, #lineend = "butt",
                 color = "turquoise4") +
    geom_segment(aes(xend = trials_index, yend = proportion_max-.001, y = proportion_max+.001), size = 4, lineend = "butt",
                 color = "turquoise3") +
    geom_abline(slope = 0, intercept = proportion(sample_space, "Red"), linetype = "dashed")

这些是你想要的吗？

要上色比例较低的各条可以做到：

ggplot(trials_df, aes(trials_index, ci_max)) +
    geom_segment(aes(xend = trials_index, yend = ci_min), size = 4, #lineend = "butt",
                 color = "turquoise4") +
    geom_segment(aes(xend = trials_index, yend = ci_min, y = proportion_max), size = 4, lineend = "butt",
                 color = "turquoise3") +
    geom_abline(slope = 0, intercept = proportion(sample_space, "Red"), linetype = "dashed")

类似资料：

matplotlib中水平条形图的注释[duplicate]

我需要注释水平条形图。我能够使用matplotlib网站上显示的示例来注释垂直条形图，但是对于 horizonatl 的类似想法似乎不起作用。以下是垂直方向的小型工作示例：以下是我想要使用的代码，但不适用于水平图形感谢任何帮助，提前感谢！
matplotlib文本注释水平

条形图的代码如下：我有一个字符串列表:lst = ['A '，' B '，' C '，' D '，' E '，' F '，' G '，' H '，' I '，' J'] 我想用以下字符串注释我的柱线（列表的第一个元素=第一根柱线的注释）。当然，它会用值A标注所有的条形，位置不在条形图内。有什么办法可以解决这个问题吗？
ggplot2：点和线的不同图例符号

已经搜索了所有相关的线程，但找不到解决方案。这是我的代码和附加的绘图结果：这导致了这个情节：如您所见，所有图例图标看起来都一样。我想要的是点显示为点，两条线（“回归”和“趋势线”）显示为线。我试着用但这再次以相同的方式给了我所有图标，我不知道如何区分它们我是R的新手，这是我的第一个“复杂”情节。尝试通过在线帮助和谷歌找出大多数问题，但找不到解决此问题的方法。谢谢大家的时间和帮助！这里
如何注释除列表中的动词以外的每个动词

我试过这个：但没有用。我如何找到一个动词，检查它是否已经有一个Inner_Pred注释，如果没有，将这个动词注释为outer_pred？在inner_pred.lst中，我有基本形式的动词列表。提前谢了。如果你能告诉我在哪里我可以自己看这些信息，那就太好了。我只找到了GATE Jape手册，但它很短，没有提供很多答案。
如何使用@Autowired注释两个或多个不同的组件类为同一服务？

例如，拥有如下所示的类。类中的第一个不是null，但中的第二个是null。我得到我尝试创建bean它工作，然后在中调用时得到相同的异常。如何在使用注释的任何地方都使用。我的主要班级：类A中使用的验证器类： AmountValidator添加到类Validator中的规则。
PowerMockito：如何“释放”一个用@PrepareForTest注释的类？

以某种方式“释放”类

如何用不同的水平线注释ggplot2中的每个条？

共有1个答案

相关问答

相关文章

相关阅读

相关工具

相关文档