Here I've used your data alongside the tidyverse packages dplyr, tidyr and forcats to do as you asked (and tidy the data up a bit!). The important thing that I did was convert the "batch" (and "gene") columns to be numeric, and use fct_reorder to reorder the "ID" column using the now-numeric "batch".
data_v1 = structure(list(ID = c("CH30_0h-1", "CH30_6h-8", "CH30_6h-16",
"CN1_0h-1", "CN1_6h-8", "CN1_6h-16", "400B_0h-1", "400B_6h-8",
"400B_6h-16", "1111A_0h-1", "1111A_6h-8", "1111A_6h-16", "1164A_0h-1",
"1164A_6h-8", "1164A_6h-16"), Gene_1 = c(-7.06771, -8.73071,
-6.73071, -4.73797, NA, NA, -6.89689, -9.11833, -7.24038, -9.91257,
-7.31379, -6.17557, -7.58907, -7.43676, NA), Gene_2 = c(-5.91461,
-6.36411, -5.02691, -9.64877, NA, -4.1052, -4.9579, -4.44811,
-4.94366, -5.61523, -5.6951, -5.6236, -5.61506, -5.724, -6.38191
), Gene_3 = c(-2.1792, -2.3658, -2.02961, -1.26177, -2.03669,
-2.4839, -2.24417, -1.76436, -1.29733, -2.30767, -2.57192, -1.87333,
-2.72559, -2.36064, -1.59041), Gene_4 = c(-4.24441, -3.40551,
-1.96791, -4.46717, -2.54759, -0.8027, -4.17766, -3.21896, -2.26203,
-4.00953, -3.06621, -2.15282, -4.45781, -3.77418, -4.48653),
Gene_5 = c(-5.38011, -5.66761, -3.26111, -5.70527, 4.26221,
-5.1683, NA, NA, NA, -4.78791, -8.95204, -6.35331, -6.34912,
-5.70766, -7.71973), Gene_6 = c(-2.36521, -2.59891, -2.15781,
-0.75477, -2.03779, -1.196, -3.06798, -3.85273, -2.89402,
-2.13551, -2.60488, -1.93915, -2.68515, -3.7548, -2.69344
), Gene_7 = c(-2.05641, -3.8735, -2.91651, -2.66677, -3.40529,
-2.8928, -3.4558, -3.89929, -3.38236, -3.92386, -4.15504,
-3.77592, -3.96861, -2.36629, -3.30789), Gene_8 = c(-1.4283,
-1.79241, -1.75011, -3.25977, -2.36509, -1.9922, -2.53121,
-2.58246, -2.649, -2.36292, -2.0177, -1.99138, -1.74274,
-3.02425, -2.16307), Gene_9 = c(-3.32931, -3.20471, -4.11671,
-4.95547, -2.76539, -3.227, -3.90394, -4.20117, -4.92701,
-4.67031, -5.08027, -5.37573, -3.43241, -2.51116, -3.55333
), Gene_10 = c(-2.55501, -1.24421, -1.41321, -1.56437, -2.90679,
-3.2299, -3.31768, -3.16836, -4.02685, -4.35349, -2.77335,
-4.40128, -2.77479, -2.61881, -3.58733)), class = "data.frame", row.names = c("CH30_0h-1",
"CH30_6h-8", "CH30_6h-16", "CN1_0h-1", "CN1_6h-8", "CN1_6h-16",
"400B_0h-1", "400B_6h-8", "400B_6h-16", "1111A_0h-1", "1111A_6h-8",
"1111A_6h-16", "1164A_0h-1", "1164A_6h-8", "1164A_6h-16"))
sample_pheno <- structure(list(ID = c("CH30_0h-1", "CH30_6h-8", "CH30_6h-16",
"CN1_0h-1", "CN1_6h-8", "CN1_6h-16", "400B_0h-1", "400B_6h-8",
"400B_6h-16", "1111A_0h-1", "1111A_6h-8", "1111A_6h-16", "1164A_0h-1",
"1164A_6h-8", "1164A_6h-16"), Batch = c("Batch-I", "Batch-I",
"Batch-I", "Batch-II", "Batch-II", "Batch-II", "Batch-III", "Batch-III",
"Batch-III", "Batch-IV", "Batch-IV", "Batch-IV", "Batch-V", "Batch-V",
"Batch-V")), class = "data.frame", row.names = c(NA, -15L))
# lets use the tidyverse
library(tidyverse)
data_long = data_v1 %>%
# pivot_longer from tidyr - does the same job as reshape, but more modern
pivot_longer(contains("Gene"),
names_to = "gene",
names_prefix = "Gene_",
names_transform = list(gene = as.integer)) %>%
# join on pheno
left_join(sample_pheno, by = "ID") %>%
# optional rename - make names consistant
rename(id = ID, batch = Batch) %>%
# convert the roman numerals to be numeric
mutate(batch = str_remove(batch, "Batch-"),
batch = as.roman(batch) %>% as.numeric())
head(data_long)
#> # A tibble: 6 x 4
#> id gene value batch
#> <chr> <int> <dbl> <dbl>
#> 1 CH30_0h-1 1 -7.07 1
#> 2 CH30_0h-1 2 -5.91 1
#> 3 CH30_0h-1 3 -2.18 1
#> 4 CH30_0h-1 4 -4.24 1
#> 5 CH30_0h-1 5 -5.38 1
#> 6 CH30_0h-1 6 -2.37 1
data_long %>%
# reorder the "id" column by our now numeric "batch" column
mutate(id = fct_reorder(id, batch)) %>%
# plot!
ggplot(aes(x = id, y = value, fill = factor(batch))) +
geom_boxplot() +
labs(x = "ID", y = NULL, fill = "Batch #") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))
#> Warning: Removed 7 rows containing non-finite values (stat_boxplot).

Created on 2022-01-06 by the reprex package (v2.0.1)