This is a first pass at a possible solution. It is slow, due to the 1 million points in the png even after I collapsed the three color values into a single number. If you hover over the dot at (300, 250) you should see the text "Here be dragons!"
library(png)
library(ggplot2)
library(plotly)
PNG <- readPNG("9mYzCRX.png")
PNGbw <- apply(PNG, c(1,2), mean) #average the three colors
PNGdf <- data.frame(x = rep(1:1024, each = 1024),
y = rep(1024:1, 1024),
Value = as.vector(PNGbw))
Comments <- data.frame(x = 300, y = 250, INFO = "Here be dragons!")
plt <- ggplot() +
geom_tile(data = PNGdf, mapping = aes(x = x, y = y, fill = Value)) +
geom_point(data = Comments, mapping = aes(x = x, y = y, text = INFO)) +
theme_classic()
ggplotly(plt, tooltip = "text")