Title: | Cast (R)Markdown Files to XML and Back Again |
---|---|
Description: | Casts (R)Markdown files to XML and back to allow their editing via XPath. |
Authors: | Maëlle Salmon [aut] , Zhian N. Kamvar [aut, cre] , Jeroen Ooms [aut], Nick Wellnhofer [cph] (Nick Wellnhofer wrote the XSLT stylesheet.), rOpenSci [fnd] (https://ropensci.org/), Peter Daengeli [ctb] |
Maintainer: | Zhian N. Kamvar <[email protected]> |
License: | GPL-3 |
Version: | 0.0.0.9001 |
Built: | 2024-10-25 02:44:45 UTC |
Source: | https://github.com/ropensci/tinkr |
Helper function to find all nodes between a standard pattern. This is useful if you want to find unnested pandoc tags.
find_between( body, ns, pattern = "md:paragraph[md:text[starts-with(text(), ':::')]]", include = FALSE )
find_between( body, ns, pattern = "md:paragraph[md:text[starts-with(text(), ':::')]]", include = FALSE )
body |
and XML document |
ns |
the namespace of the document |
pattern |
an XPath expression that defines characteristics of nodes between which you want to extract everything. |
include |
if |
a nodeset
md <- glue::glue(" h1 ==== ::: section h2 ---- section *text* with [a link](https://ropensci.org/) ::: ") x <- xml2::read_xml(commonmark::markdown_xml(md)) ns <- xml2::xml_ns_rename(xml2::xml_ns(x), d1 = "md") res <- find_between(x, ns) res xml2::xml_text(res) xml2::xml_find_all(res, ".//descendant-or-self::md:*", ns = ns)
md <- glue::glue(" h1 ==== ::: section h2 ---- section *text* with [a link](https://ropensci.org/) ::: ") x <- xml2::read_xml(commonmark::markdown_xml(md)) ns <- xml2::xml_ns_rename(xml2::xml_ns(x), d1 = "md") res <- find_between(x, ns) res xml2::xml_text(res) xml2::xml_find_all(res, ".//descendant-or-self::md:*", ns = ns)
The commonmark package is used to translate markdown to XML, but it does
not assign a namespace prefix, which means that xml2 will auto-assign a
default prefix of d1
.
md_ns()
md_ns()
This function renames the default prefix to md
, so that you can use XPath
queries that are slightly more descriptive.
an xml_namespace
object (see xml2::xml_ns()
)
tink <- tinkr::to_xml(system.file("extdata", "example1.md", package = "tinkr")) # with default namespace xml2::xml_find_all(tink$body, ".//d1:link[starts-with(@destination, 'https://ropensci')]" ) # with tinkr namespace xml2::xml_find_all(tink$body, ".//md:link[starts-with(@destination, 'https://ropensci')]", tinkr::md_ns() )
tink <- tinkr::to_xml(system.file("extdata", "example1.md", package = "tinkr")) # with default namespace xml2::xml_find_all(tink$body, ".//d1:link[starts-with(@destination, 'https://ropensci')]" ) # with tinkr namespace xml2::xml_find_all(tink$body, ".//md:link[starts-with(@destination, 'https://ropensci')]", tinkr::md_ns() )
Protect math elements from commonmark's character escape
protect_math(body, ns = md_ns())
protect_math(body, ns = md_ns())
body |
an XML object |
ns |
an XML namespace object (defaults: |
Commonmark does not know what LaTeX is and will LaTeX equations as
normal text. This means that content surrounded by underscores are
interpreted as <emph>
elements and all backslashes are escaped by default.
This function protects inline and block math elements that use $
and $$
for delimiters, respectively.
a copy of the modified XML object
this function is also a method in the yarn object.
m <- tinkr::to_xml(system.file("extdata", "math-example.md", package = "tinkr")) txt <- textConnection(tinkr::to_md(m)) cat(tail(readLines(txt)), sep = "\n") # broken math close(txt) m$body <- protect_math(m$body) txt <- textConnection(tinkr::to_md(m)) cat(tail(readLines(txt)), sep = "\n") # fixed math close(txt)
m <- tinkr::to_xml(system.file("extdata", "math-example.md", package = "tinkr")) txt <- textConnection(tinkr::to_md(m)) cat(tail(readLines(txt)), sep = "\n") # broken math close(txt) m$body <- protect_math(m$body) txt <- textConnection(tinkr::to_md(m)) cat(tail(readLines(txt)), sep = "\n") # fixed math close(txt)
This function returns the path to the tinkr stylesheet
stylesheet()
stylesheet()
a single element character vector representing the path to the stylesheet used by tinkr.
tinkr::stylesheet()
tinkr::stylesheet()
Write YAML and XML back to disk as (R)Markdown
to_md(yaml_xml_list, path = NULL, stylesheet_path = stylesheet())
to_md(yaml_xml_list, path = NULL, stylesheet_path = stylesheet())
yaml_xml_list |
result from a call to |
path |
path of the new file. Defaults to |
stylesheet_path |
path to the XSL stylesheet |
The stylesheet you use will decide whether lists
are built using "*" or "-" for instance. If you're keen to
keep your own Markdown style when using to_md()
after
to_xml()
, you can tweak the XSL stylesheet a bit and provide
the path to your XSL stylesheet as argument.
the converted document, invisibly.
path <- system.file("extdata", "example1.md", package = "tinkr") yaml_xml_list <- to_xml(path) names(yaml_xml_list) library("magrittr") # transform level 3 headers into level 1 headers body <- yaml_xml_list$body body %>% xml2::xml_find_all(xpath = './/d1:heading', xml2::xml_ns(.)) %>% .[xml2::xml_attr(., "level") == "3"] -> headers3 xml2::xml_set_attr(headers3, "level", 1) yaml_xml_list$body <- body # save back and have a look newmd <- tempfile("newmd", fileext = ".md") to_md(yaml_xml_list, newmd) # file.edit("newmd.md") file.remove(newmd)
path <- system.file("extdata", "example1.md", package = "tinkr") yaml_xml_list <- to_xml(path) names(yaml_xml_list) library("magrittr") # transform level 3 headers into level 1 headers body <- yaml_xml_list$body body %>% xml2::xml_find_all(xpath = './/d1:heading', xml2::xml_ns(.)) %>% .[xml2::xml_attr(., "level") == "3"] -> headers3 xml2::xml_set_attr(headers3, "level", 1) yaml_xml_list$body <- body # save back and have a look newmd <- tempfile("newmd", fileext = ".md") to_md(yaml_xml_list, newmd) # file.edit("newmd.md") file.remove(newmd)
Transform file to XML
to_xml(path, encoding = "UTF-8", sourcepos = FALSE, anchor_links = TRUE)
to_xml(path, encoding = "UTF-8", sourcepos = FALSE, anchor_links = TRUE)
path |
Path to the file. |
encoding |
Encoding to be used by readLines. |
sourcepos |
passed to |
anchor_links |
if |
This function will take a (R)markdown file, split the yaml header
from the body, and read in the body through commonmark::markdown_xml()
.
Any RMarkdown code fences will be parsed to expose the chunk options in
XML and tickboxes (aka checkboxes) in GitHub-flavored markdown will be
preserved (both modifications from the commonmark standard).
Math elements
A list containing the YAML of the file (yaml) and its body (body) as XML.
path <- system.file("extdata", "example1.md", package = "tinkr") post_list <- to_xml(path) names(post_list) path2 <- system.file("extdata", "example2.Rmd", package = "tinkr") post_list2 <- to_xml(path2) post_list2
path <- system.file("extdata", "example1.md", package = "tinkr") post_list <- to_xml(path) names(post_list) path2 <- system.file("extdata", "example2.Rmd", package = "tinkr") post_list2 <- to_xml(path2) post_list2
Wrapper around an XML representation of a Markdown document. It contains four publicly accessible slots: path, yaml, body, and ns.
This class is a fancy wrapper around the results of to_xml()
and
has methods that make it easier to add, analyze, remove, or write elements
of your markdown document.
path
[character
] path to file on disk
yaml
[character
] text block at head of file
body
[xml_document
] an xml document of the (R)Markdown file.
ns
[xml_document
] an xml namespace object definining "md" to
commonmark.
new()
Create a new yarn document
yarn$new(path = NULL, encoding = "UTF-8", sourcepos = FALSE, ...)
path
[character
] path to a markdown episode file on disk
encoding
[character
] encoding passed to readLines()
sourcepos
passed to commonmark::markdown_xml()
. If TRUE
, the
source position of the file will be included as a "sourcepos" attribute.
Defaults to FALSE
.
...
arguments passed on to to_xml()
.
A new yarn object containing an XML representation of a (R)Markdown file.
path <- system.file("extdata", "example1.md", package = "tinkr") ex1 <- tinkr::yarn$new(path) ex1 path2 <- system.file("extdata", "example2.Rmd", package = "tinkr") ex2 <- tinkr::yarn$new(path2) ex2
reset()
reset a yarn document from the original file
yarn$reset()
path <- system.file("extdata", "example1.md", package = "tinkr") ex1 <- tinkr::yarn$new(path) # OH NO ex1$body ex1$body <- xml2::xml_missing() ex1$reset() ex1$body
write()
Write a yarn document to Markdown/R Markdown
yarn$write(path = NULL, stylesheet_path = stylesheet())
path
path to the file you want to write
stylesheet_path
path to the xsl stylesheet to convert XML to markdown.
path <- system.file("extdata", "example1.md", package = "tinkr") ex1 <- tinkr::yarn$new(path) ex1 tmp <- tempfile() try(readLines(tmp)) # nothing in the file ex1$write(tmp) head(readLines(tmp)) # now a markdown file unlink(tmp)
show()
show the markdown contents on the screen
yarn$show(stylesheet_path = stylesheet())
stylesheet_path
path to the xsl stylesheet to convert XML to markdown.
a character vector with one line for each line in the output
path <- system.file("extdata", "example2.Rmd", package = "tinkr") ex2 <- tinkr::yarn$new(path) ex2$head(5) ex2$tail(5) ex2$show()
head()
show the head of the markdown contents on the screen
yarn$head(n = 6L, stylesheet_path = stylesheet())
n
the number of elements to show from the top. Negative numbers
stylesheet_path
path to the xsl stylesheet to convert XML to markdown. exclude lines from the bottom
a character vector with n
elements
tail()
show the tail of the markdown contents on the screen
yarn$tail(n = 6L, stylesheet_path = stylesheet())
n
the number of elements to show from the bottom. Negative numbers
stylesheet_path
path to the xsl stylesheet to convert XML to markdown. exclude lines from the top
a character vector with n
elements
add_md()
add an arbitrary Markdown element to the document
yarn$add_md(md, where = 0L)
md
a string of markdown formatted text.
where
the location in the document to add your markdown text.
This is passed on to xml2::xml_add_child()
. Defaults to 0, which
indicates the very top of the document.
path <- system.file("extdata", "example2.Rmd", package = "tinkr") ex <- tinkr::yarn$new(path) # two headings, no lists xml2::xml_find_all(ex$body, "md:heading", ex$ns) xml2::xml_find_all(ex$body, "md:list", ex$ns) ex$add_md( "# Hello\n\nThis is *new* formatted text from `{tinkr}`!", where = 1L )$add_md( " - This\n - is\n - a new list", where = 2L ) # three headings xml2::xml_find_all(ex$body, "md:heading", ex$ns) xml2::xml_find_all(ex$body, "md:list", ex$ns) tmp <- tempfile() ex$write(tmp) readLines(tmp, n = 20)
protect_math()
Protect math blocks from being escaped
yarn$protect_math()
path <- system.file("extdata", "math-example.md", package = "tinkr") ex <- tinkr::yarn$new(path) ex$tail() # math blocks are escaped :( ex$protect_math()$tail() # math blocks are no longer escaped :)
clone()
The objects of this class are cloneable with this method.
yarn$clone(deep = FALSE)
deep
Whether to make a deep clone.
## ------------------------------------------------ ## Method `yarn$new` ## ------------------------------------------------ path <- system.file("extdata", "example1.md", package = "tinkr") ex1 <- tinkr::yarn$new(path) ex1 path2 <- system.file("extdata", "example2.Rmd", package = "tinkr") ex2 <- tinkr::yarn$new(path2) ex2 ## ------------------------------------------------ ## Method `yarn$reset` ## ------------------------------------------------ path <- system.file("extdata", "example1.md", package = "tinkr") ex1 <- tinkr::yarn$new(path) # OH NO ex1$body ex1$body <- xml2::xml_missing() ex1$reset() ex1$body ## ------------------------------------------------ ## Method `yarn$write` ## ------------------------------------------------ path <- system.file("extdata", "example1.md", package = "tinkr") ex1 <- tinkr::yarn$new(path) ex1 tmp <- tempfile() try(readLines(tmp)) # nothing in the file ex1$write(tmp) head(readLines(tmp)) # now a markdown file unlink(tmp) ## ------------------------------------------------ ## Method `yarn$show` ## ------------------------------------------------ path <- system.file("extdata", "example2.Rmd", package = "tinkr") ex2 <- tinkr::yarn$new(path) ex2$head(5) ex2$tail(5) ex2$show() ## ------------------------------------------------ ## Method `yarn$add_md` ## ------------------------------------------------ path <- system.file("extdata", "example2.Rmd", package = "tinkr") ex <- tinkr::yarn$new(path) # two headings, no lists xml2::xml_find_all(ex$body, "md:heading", ex$ns) xml2::xml_find_all(ex$body, "md:list", ex$ns) ex$add_md( "# Hello\n\nThis is *new* formatted text from `{tinkr}`!", where = 1L )$add_md( " - This\n - is\n - a new list", where = 2L ) # three headings xml2::xml_find_all(ex$body, "md:heading", ex$ns) xml2::xml_find_all(ex$body, "md:list", ex$ns) tmp <- tempfile() ex$write(tmp) readLines(tmp, n = 20) ## ------------------------------------------------ ## Method `yarn$protect_math` ## ------------------------------------------------ path <- system.file("extdata", "math-example.md", package = "tinkr") ex <- tinkr::yarn$new(path) ex$tail() # math blocks are escaped :( ex$protect_math()$tail() # math blocks are no longer escaped :)
## ------------------------------------------------ ## Method `yarn$new` ## ------------------------------------------------ path <- system.file("extdata", "example1.md", package = "tinkr") ex1 <- tinkr::yarn$new(path) ex1 path2 <- system.file("extdata", "example2.Rmd", package = "tinkr") ex2 <- tinkr::yarn$new(path2) ex2 ## ------------------------------------------------ ## Method `yarn$reset` ## ------------------------------------------------ path <- system.file("extdata", "example1.md", package = "tinkr") ex1 <- tinkr::yarn$new(path) # OH NO ex1$body ex1$body <- xml2::xml_missing() ex1$reset() ex1$body ## ------------------------------------------------ ## Method `yarn$write` ## ------------------------------------------------ path <- system.file("extdata", "example1.md", package = "tinkr") ex1 <- tinkr::yarn$new(path) ex1 tmp <- tempfile() try(readLines(tmp)) # nothing in the file ex1$write(tmp) head(readLines(tmp)) # now a markdown file unlink(tmp) ## ------------------------------------------------ ## Method `yarn$show` ## ------------------------------------------------ path <- system.file("extdata", "example2.Rmd", package = "tinkr") ex2 <- tinkr::yarn$new(path) ex2$head(5) ex2$tail(5) ex2$show() ## ------------------------------------------------ ## Method `yarn$add_md` ## ------------------------------------------------ path <- system.file("extdata", "example2.Rmd", package = "tinkr") ex <- tinkr::yarn$new(path) # two headings, no lists xml2::xml_find_all(ex$body, "md:heading", ex$ns) xml2::xml_find_all(ex$body, "md:list", ex$ns) ex$add_md( "# Hello\n\nThis is *new* formatted text from `{tinkr}`!", where = 1L )$add_md( " - This\n - is\n - a new list", where = 2L ) # three headings xml2::xml_find_all(ex$body, "md:heading", ex$ns) xml2::xml_find_all(ex$body, "md:list", ex$ns) tmp <- tempfile() ex$write(tmp) readLines(tmp, n = 20) ## ------------------------------------------------ ## Method `yarn$protect_math` ## ------------------------------------------------ path <- system.file("extdata", "math-example.md", package = "tinkr") ex <- tinkr::yarn$new(path) ex$tail() # math blocks are escaped :( ex$protect_math()$tail() # math blocks are no longer escaped :)