I think they're just very different implementations. The code for XML::htmlTreeParse()
is
function (file, ignoreBlanks = TRUE, handlers = NULL, replaceEntities = FALSE,
asText = FALSE, trim = TRUE, validate = FALSE, getDTD = TRUE,
isURL = FALSE, asTree = FALSE, addAttributeNamespaces = FALSE,
useInternalNodes = FALSE, isSchema = FALSE, fullNamespaceInfo = FALSE,
encoding = character(), useDotNames = length(grep("^\\.",
names(handlers))) > 0, xinclude = TRUE, addFinalizer = TRUE,
error = htmlErrorHandler, isHTML = TRUE, options = integer(),
parentFirst = FALSE)
{
isMissingAsText = missing(asText)
if (length(file) > 1) {
file = paste(file, collapse = "\n")
if (!missing(asText) && !asText)
stop(structure(list(message = "multiple URLs passed to xmlTreeParse. If this is the content of the file, specify asText = TRUE"),
class = c("MultipleURLError", "XMLParserError",
"simpleError", "error", "condition")))
asText = TRUE
}
if (missing(isURL) && !asText)
isURL <- length(grep("^(http|ftp|file)://", file, useBytes = TRUE,
perl = TRUE))
if (isHTML) {
validate = FALSE
getDTD = FALSE
isSchema = FALSE
docClass = "HTMLInternalDocument"
}
else docClass = character()
checkHandlerNames(handlers, "DOM")
if (missing(fullNamespaceInfo) && inherits(handlers, "RequiresNamespaceInfo"))
fullNamespaceInfo = TRUE
oldValidate = xmlValidity()
xmlValidity(validate)
on.exit(xmlValidity(oldValidate))
if (!asText && isURL == FALSE) {
if (file.exists(file) == FALSE)
if (!missing(asText) && asText == FALSE) {
e = simpleError(paste("File", file, "does not exist"))
class(e) = c("FileNotFound", class(e))
stop(e)
}
else asText <- TRUE
}
if (asText && length(file) > 1)
file = paste(file, collapse = "\n")
old = setEntitySubstitution(replaceEntities)
on.exit(setEntitySubstitution(old), add = TRUE)
if (asText && length(grep(sprintf("^%s?\\s*<", BOMRegExp),
file, perl = TRUE, useBytes = TRUE)) == 0) {
if (!isHTML || (isMissingAsText && !inherits(file, "AsIs"))) {
e = simpleError(paste("XML content does not seem to be XML:",
sQuote(file)))
class(e) = c("XMLInputError", class(e))
(if (isHTML)
warning
else stop)(e)
}
}
if (!is.logical(xinclude)) {
xinclude = as.logical(xinclude)
}
if (!asText && !isURL)
file = path.expand(as.character(file))
if (useInternalNodes && trim) {
prevBlanks = .Call("RS_XML_setKeepBlanksDefault", 0L,
PACKAGE = "XML")
on.exit(.Call("RS_XML_setKeepBlanksDefault", prevBlanks,
PACKAGE = "XML"), add = TRUE)
}
.oldErrorHandler = setXMLErrorHandler(error)
on.exit(.Call("RS_XML_setStructuredErrorHandler", .oldErrorHandler,
PACKAGE = "XML"), add = TRUE)
if (length(options))
options = sum(options)
ans <- .Call("RS_XML_ParseTree", as.character(file), handlers,
as.logical(ignoreBlanks), as.logical(replaceEntities),
as.logical(asText), as.logical(trim), as.logical(validate),
as.logical(getDTD), as.logical(isURL), as.logical(addAttributeNamespaces),
as.logical(useInternalNodes), as.logical(isHTML), as.logical(isSchema),
as.logical(fullNamespaceInfo), as.character(encoding),
as.logical(useDotNames), xinclude, error, addFinalizer,
as.integer(options), as.logical(parentFirst), PACKAGE = "XML")
if (!missing(handlers) && length(handlers) && !as.logical(asTree))
return(handlers)
if (!isSchema && length(class(ans)))
class(ans) = c(docClass, oldClass(class(ans)))
if (inherits(ans, "XMLInternalDocument"))
addDocFinalizer(ans, addFinalizer)
else if (!getDTD && !isSchema) {
class(ans) = oldClass("XMLDocumentContent")
}
ans
}
And RCurl package uses libcurl under the hood (see description at site below):
http://www.omegahat.net/RCurl/
RCurl::getURL()
uses libcurl under the hood to perform the request and retrieve the response.