From df94569299582be6db9b00ea50ba5897c8fbf61b Mon Sep 17 00:00:00 2001 From: Baldur Kristinsson Date: Wed, 3 Feb 2016 14:52:55 +0000 Subject: [PATCH] Major improvements to the export support. * Add support for per-page configuration for export formats. * Add support for biblatex/natbib in latex/pdf/beamer output. * Fix handling of docx and odt *_template option. --- CHANGELOG | 7 +++ README.md | 44 ++++++++++++++++--- pandoc.pm | 129 +++++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 148 insertions(+), 32 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c588e0d..c2fa829 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,10 @@ +0.4 (2016-02-03) +================= + +* Add support for per-page configuration for export formats. +* Add support for biblatex/natbib in latex/pdf/beamer output. +* Fix handling of docx and odt `*_template` option (`--reference-XXX`, not `--template`). + 0.3 (2016-02-02) ================ diff --git a/README.md b/README.md index c3f05de..87f9e7c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ ikiwiki-pandoc Pandoc plugin for ikiwiki. -Pandoc has a richer syntax and more flexible configuration than Markdown, and is also able to parse a variety of other syntaxes. This plugin can be configured to generate wiki pages from LaTeX, reST, mediawiki, textile, OPML or Emacs Org sources, as well as markdown. It can also be configured to convert inline TeX math using a variety of methods. Finally, if Pandoc was compiled with the `-fhighlighting` option, it will apply syntax highlighting to code blocks and `inline code spans`. +[Pandoc](http://johnmacfarlane.net/pandoc/) has a richer syntax and more flexible configuration than standard Markdown, and is also able to parse a variety of other syntaxes. This plugin can be configured to generate wiki pages from LaTeX, reST, mediawiki, textile, OPML or Emacs Org sources, as well as markdown. It can also be configured to convert and display inline TeX math using a variety of methods. If Pandoc was compiled with the `-fhighlighting` option, it will also apply syntax highlighting to code blocks and `inline code spans`. Finally, it is possible to export the content of a wiki page to several of the non-HTML formats supported by pandoc, including pdf and docx, or to create slideshows using Beamer and reveal.js. * @@ -120,6 +120,8 @@ will export files of all formats except Beamer and reveal.js. When such extra formats have been generated for a page, links to the exported files will be appended to the so-called action links ("Edit", "History", etc.). These links are at the top of the page in the default theme. +#### Configuration options + There are several configuration options related to the export functionality: * `pandoc_latex_template`: Path to pandoc template for LaTeX and PDF output. Since PDF files are created by way of LaTeX, there is no separate PDF template. (Obviously, PDF generation requires a working LaTeX installation). @@ -134,11 +136,11 @@ There are several configuration options related to the export functionality: * `pandoc_revealjs_extra_options`: List of extra pandoc options for Reveal.js slides generation. **Please note** that the option `--self-contained` is added automatically. In order for this to work, pandoc has to know where to find the reveal.js Javascript and CSS files. The easiest way of making sure of this is to keep them in pandoc's default user data directory. You can see the name of this folder by running `pandoc --version`; usually it is `~/.pandoc`, in which case the reveal.js files would be in the subdirectory `~/.pandoc/reveal.js/`. You can download the most recent reveal.js release [here](https://github.com/hakimel/reveal.js/releases). -* `pandoc_docx_template`: Path to pandoc template for MS Word (`docx`) output. +* `pandoc_docx_template`: Path to reference `docx` document used by pandoc for MS Word output. * `pandoc_docx_extra_options`: List of extra pandoc options for `docx` generation. -* `pandoc_odt_template`: Path to pandoc template for OpenDocument (`odt`) output for LibreOffice, OpenOffice, etc.. +* `pandoc_odt_template`: Path to reference `odt` document used by pandoc for OpenDocument output for LibreOffice, OpenOffice, etc. * `pandoc_odt_extra_options`: List of extra pandoc options for `odt` generation. @@ -146,11 +148,39 @@ There are several configuration options related to the export functionality: * `pandoc_epub_extra_options`: List of extra pandoc options for epub generation. -**Notable limitations** with regard to the export suppport: +#### Overriding settings on a specific page + +It is possible to override and/or extend the settings for an output format on a given page, using meta attributes with the same names as the configuration options above, except without the `pandoc_*` prefix. As an example, consider the YAML metadata block below: + +```yaml +title: The Communist Manifesto +author: + - Karl Marx + - Friedrich Engels +date: 1848-02-21 +lang: en-GB +bibliography: /home/km/bib/communism.bib +generate_pdf: true +generate_docx: true +latex_template: booklet_tpl.latex +latex_extra_options: + - -\-biblatex + - -\-variable=biblio-style:authoryear +docx_template: manifesto_tpl.docx +``` + +Here, the `latex_template` setting (which controls both `pdf` and `latex` output) will **replace** whatever was configured in the `*.setup` file under `pandoc_latex_template`, while the `latex_extra_options` setting will be **added** to the list of extra arguments (if any) specified in `pandoc_latex_extra_options`. + +Also note that pandoc interprets string values in the meta block as markdown, which is why we need to backslash-escape one of the leading hyphens in each option. Otherwise, `--` will be turned into `–` (an ndash) during meta parsing, at least if the `pandoc_smart` configuration option is turned on, with predictably undesireable results. + +Finally, specifying `--biblatex` or `--natbib` in the extra options for the `pdf` or `beamer` export formats will automatically turn off citations processing using `pandoc-citeproc`, leaving that task to the relevant LaTeX packages. In order for this to work properly, you need to have a working LaTeX installation, including the utility `latexmk`, which has to be in your `$PATH`. + +#### Notable export limitations + +* There is currently no option for turning some list of export formats on by default for all pandoc-processed pages. The reason is that some plugins which insert content into the page, notably the [template plugin](https://ikiwiki.info/plugins/template/), call pandoc in such a way that the pandoc plugin apparently has no certain way of distinguishing between these calls and the processing of an entire page. A global option might thus lead to much wasted work and conceivably even to the overwriting of freshly-generated export files by incorrect content. -* There is currently no way of overriding template or option settings for a specific format on a per-page basis. +* Many export formats supported by pandoc itself are not supported at present by ikiwiki-pandoc. These include `fb2`, `docbook`, `context` and `rtf`, as well as a few html-based slides formats (`s5`, `slidy`, ...) and some text-based markup formats (`asciidoc`, `docuwiki`, ...). -* There is currently no option for turning some list of export formats on by default for all pandoc-processed pages. The reason is that some plugins which insert content into the page, notably the [template plugin](https://ikiwiki.info/plugins/template/), call pandoc in such a way that the pandoc plugin apparently has no certain way of distinguishing between these calls and the processing of an entire page. A global option might thus lead to much wasted work and conceivably even to the overwriting of export files by incorrect content. Details ------- @@ -166,7 +196,7 @@ Pandoc can be configured to apply classes globally to all its inline code blocks } ~~~ -The line of `~~~` can be longer than 3 characters, if you like. This manner of writing indented code blocks also +The line of `~~~` can be longer than 3 characters, if you like. Github-style code blocks (using backticks instead of tildes) are also supported. This manner of writing indented code blocks also permits us to specify the block's specific syntax, which might be different from other blocks: diff --git a/pandoc.pm b/pandoc.pm index 3fb55b0..d0daf33 100755 --- a/pandoc.pm +++ b/pandoc.pm @@ -425,33 +425,23 @@ sub htmlize ($@) { # Get some selected meta attributes, more specifically: # (title date bibliography csl subtitle abstract summary description - # version references author [+ num_authors primary_author]) - - sub compile_string { - # Partially represents an item from the data structure in meta as a string. - my @uncompiled = @_; - return $uncompiled[0] if @uncompiled==1 && !ref($uncompiled[0]); - @uncompiled = @{$uncompiled[0]} if @uncompiled==1 && ref $uncompiled[0] eq 'ARRAY'; - my $compiled_string = ''; - foreach my $word_or_space (@uncompiled) { - next unless ref $word_or_space eq 'HASH'; - my $type = $word_or_space->{'t'}; - $compiled_string .= compile_string(@{ $word_or_space->{c} }) if $type eq 'MetaInlines'; - next unless $type eq 'Str' || $type eq 'Space' || $type eq 'MetaString'; - $compiled_string .= $type eq 'Space' ? ' ' : $word_or_space->{c}; - } - return $compiled_string; - } + # version lang locale references author [+ num_authors primary_author]), + # as well as some configuration options (generate_*, *_extra_options, *_template). + my @format_keys = grep { $_ ne 'pdf' } keys %extra_formats; my %scalar_meta = map { ($_=>undef) } qw( title date bibliography csl subtitle abstract summary description version lang locale); + $scalar_meta{$_.'_template'} = undef for @format_keys; my %bool_meta = map { ("generate_$_"=>0) } keys %extra_formats; my %list_meta = map { ($_=>[]) } qw/author references/; + $list_meta{$_.'_extra_options'} = [] for @format_keys; my $have_bibl = 0; foreach my $k (keys %scalar_meta) { next unless $meta->{$k}; $scalar_meta{$k} = compile_string($meta->{$k}->{c}); + # NB! Note that this is potentially risky, since pagestate is sticky, and + # we only cleanup the pandoc_* values in {meta}. $pagestate{$page}{meta}{$k} = $scalar_meta{$k}; $pagestate{$page}{meta}{"pandoc_$k"} = $pagestate{$page}{meta}{$k}; } @@ -470,10 +460,10 @@ sub htmlize ($@) { } foreach my $k (keys %list_meta) { next unless $meta->{$k}; - $list_meta{$k} = $meta->{$k}->{'c'}; - $list_meta{$k} = [ map { compile_string($_) } @{$list_meta{$k}} ] if $k eq 'author'; + $list_meta{$k} = unwrap_c($meta->{$k}); + $list_meta{$k} = [$list_meta{$k}] unless ref $list_meta{$k} eq 'ARRAY'; $have_bibl = 1 if $k eq 'references'; - $pagestate{$page}{meta}{"pandoc_$k"} = $pagestate{$page}{meta}{$k}; + $pagestate{$page}{meta}{"pandoc_$k"} = $list_meta{$k}; } # Try to add other keys as scalars, with pandoc_ prefix only. foreach my $k (keys %$meta) { @@ -599,25 +589,69 @@ sub export_file { my $subdir = $1 if $export_path =~ /(.*)\//; my @extra_args = @{ $extra_formats{$ext}->{extra} }; my $eopt = $ext eq 'pdf' ? 'latex' : $ext; - my $template = $config{"pandoc_".$eopt."_template"} || ''; - push @extra_args, "--template=$template" if $template; + # Note that template in meta OVERRIDES template in config, + # while extra_options in meta are ADDED to extra_options in config. + my $template = $pagestate{$page}{meta}{"pandoc_".$eopt."_template"} + || $config{"pandoc_".$eopt."_template"} || ''; + if ($template) { + push @extra_args, ($ext =~ /^(docx|odt)$/ + ? "--reference-$ext=$template" + : "--template=$template"); + } my $conf_extra = $config{"pandoc_".$eopt."_extra_options"}; - if (ref $conf_extra eq 'ARRAY' && @$conf_extra) { - push @extra_args, @$conf_extra; + my $conf_extra_custom = $pagestate{$page}{meta}{"pandoc_".$eopt."_extra_options"}; + foreach my $cnf ($conf_extra, $conf_extra_custom) { + if (ref $cnf eq 'ARRAY' && @$cnf) { + push @extra_args, @$cnf; + } + } + # If the user has asked for native LaTeX bibliography handling in the + # extra_args for this export format (using --biblatex or --natbib), + # some extra care is needed. Among other things, we need an external + # tool for PDF generation. In this case, $indirect_pdf will be true. + my %maybe_non_citeproc = qw/latex 1 pdf 1 beamer 1/; + my $indirect_pdf = 0; + if ($maybe_non_citeproc{$ext} && grep { /^(?:--biblatex|--natbib)$/ } @extra_args) { + $indirect_pdf = 1 unless $ext eq 'latex'; # both for pdf and beamer + @args = grep { ! /--filter=.*pandoc-citeproc/ } @args; } eval { if ($subdir && !-d $subdir) { make_path($subdir) or die "Could not make_path $subdir: $!"; } my $to_format = $extra_formats{$ext}->{format} || $ext; + my $tmp_export_path = $export_path; + $tmp_export_path =~ s/\.pdf$/.tex/ if $indirect_pdf; open(EXPORT, "|-", $command, '-f' => 'json', '-t' => $to_format, - '-o' => $export_path, + '-o' => $tmp_export_path, @args, @extra_args) or die "Could not open pipe for $ext: $!"; print EXPORT $json_content; close EXPORT or die "Could not close pipe for $ext: $!"; + if ($indirect_pdf && $tmp_export_path ne $export_path) { + my @latexmk_args = qw(-quiet -silent); + if (grep { /xelatex/ } @extra_args) { + push @latexmk_args, '-xelatex'; + } elsif (grep { /lualatex/ } @extra_args) { + push @latexmk_args, '-lualatex'; + } else { + push @latexmk_args, '-pdf'; + } + chdir $subdir or die "Could not chdir to $subdir: $!"; + my $plain_fn = $1 if $tmp_export_path =~ /([^\/]+)$/; + $plain_fn =~ s/\.tex//; + system('latexmk', @latexmk_args, $plain_fn) == 0 + or die "Could not run latexmk for pdf generation ($export_path): $!"; + system('latexmk', '-c', '-quiet', '-silent', $plain_fn) == 0 + or die "Could not run latexmk for cleanup ($export_path): $!"; + # These files are apparently not cleaned up by latexmk -c. + foreach ('run.xml', 'bbl') { + my $fn = "$subdir/$plain_fn.$_"; + unlink($fn) if -f $fn; + } + } $pagestate{$page}{pandoc_extra_formats}{$ext} = $export_url; }; if ($@) { @@ -651,4 +685,49 @@ sub _export_file_path_and_url { return ($export_path, $export_url); } + +## compile_string and unwrap_c are used to make the meta data structures +## easier to work with for perl. + +sub compile_string { + # Partially represents an item from the data structure in meta as a string. + my @uncompiled = @_; + return $uncompiled[0] if @uncompiled==1 && !ref($uncompiled[0]); + @uncompiled = @{$uncompiled[0]} if @uncompiled==1 && ref $uncompiled[0] eq 'ARRAY'; + my $compiled_string = ''; + foreach my $word_or_space (@uncompiled) { + next unless ref $word_or_space eq 'HASH'; + my $type = $word_or_space->{'t'}; + $compiled_string .= compile_string(@{ $word_or_space->{c} }) if $type eq 'MetaInlines'; + next unless $type eq 'Str' || $type eq 'Space' || $type eq 'MetaString'; + $compiled_string .= $type eq 'Space' ? ' ' : $word_or_space->{c}; + } + return $compiled_string; +} +sub unwrap_c { + # Unwrap pandoc's MetaLists, MetaInlines, etc. + # Finds the deepest-level scalar value for 'c' in the data structure. + # Lists with one element are replaced with the scalar, lists with more + # than one element are returned as an arrayref containing scalars. + my $container = shift; + if (ref $container eq 'ARRAY' && @$container > 1) { + if (ref $container->[0] eq 'HASH' && $container->[0]->{t} =~ /^(?:Str|Space)$/) { + # handles scalar author fields + return join('', map { compile_string($_) } @$container); + } else { + return [map {unwrap_c($_)} @$container]; + } + } elsif (ref $container eq 'ARRAY' && @$container) { + return unwrap_c($container->[0]); + } elsif (ref $container eq 'ARRAY') { + return; + } elsif (ref $container eq 'HASH' && $container->{c}) { + return unwrap_c($container->{c}); + } elsif (ref $container) { + return; + } else { + return $container; + } +} + 1; -- 2.39.2