How do I export pages to HTML using SOAP or REST API?

David Meibusch June 30, 2015

I see in the rpc/soap-axis/confluenceservice-v2?wsdl that there is an exportSpaceRequest.

Does anyone know how to use it? i.e. what are the parameters?

 

Even better, is there a REST API that I could use to export HTML for a page (and it's children)?

4 answers

1 accepted

Comments for this post are closed

Community moderators have prevented the ability to post new answers.

Post a new question

1 vote
Answer accepted
David Meibusch July 12, 2015

We ended up with a dedicated documentation space in Confluence:

  • using permission restrictions to determine the subset of pages exports
  • using exportSpace via SOAP interface to trigger the export
  • plain HTTP GET to get the exported space
  • post process the HTML for are target

 

Here's the basics of the export script (in Groovy)

@Grapes([
        @Grab(group='args4j', module='args4j',version='2.0.29'),
        @Grab(group='com.github.groovy-wslite', module='groovy-wslite', version='1.1.2'),
        @Grab(group='org.slf4j', module='slf4j-api', version='1.7.12'),
        @Grab(group='org.slf4j', module='jcl-over-slf4j', version='1.7.12'),
        @Grab(group='ch.qos.logback', module='logback-core', version='1.1.3'),
        @Grab(group='ch.qos.logback', module='logback-classic', version='1.1.3'),
        @Grab(group='org.apache.httpcomponents', module='httpclient',version='4.4.1'),
        @Grab(group='org.apache.commons', module='commons-lang3', version='3.1'),
        @Grab(group='commons-io', module='commons-io', version='2.4'),
        @GrabExclude('commons-logging:commons-logging'),
])

import groovy.util.logging.Slf4j
import org.apache.http.HttpHost
import org.apache.http.HttpResponse
import org.apache.http.auth.AuthScope
import org.apache.http.auth.UsernamePasswordCredentials
import org.apache.http.client.*
import org.apache.http.client.config.RequestConfig
import org.apache.http.client.methods.HttpGet
import org.apache.http.client.protocol.HttpClientContext
import org.apache.http.impl.auth.BasicScheme
import org.apache.http.impl.client.BasicAuthCache
import org.apache.http.impl.client.BasicCredentialsProvider
import org.apache.http.impl.client.HttpClientBuilder
import org.kohsuke.args4j.*
import wslite.soap.*

/**
 * Main class
 */
@Slf4j
class DocoExporter {

    static final String TEST_CONFLUENCE = "http://bandicoot/confluencetest/"
    static final String DEFAULT_SPACE_KEY = "DOCCS"
    static final String DEFAULT_USERNAME = "user"
    static final String DEFAULT_PASSWORD = "xxxxx"

    @Option(name = "--confluence-url", aliases=["-c"], usage="Confluence URL", required=false)
    String confluenceUrl = TEST_CONFLUENCE

    @Option(name = "--confluence-space", aliases=["-s"], usage="Confluence Space to export (key)", required=false)
    String confluenceSpaceKey = DEFAULT_SPACE_KEY

    @Option(name = "--username", aliases=["-u"], usage="Username", required=false)
    String userName = DEFAULT_USERNAME

    @Option(name = "--password", aliases=["-p"], usage="Password", required=false)
    String password = DEFAULT_PASSWORD

    @Option(name = "--export-file", aliases=["-f"], usage="Export file", required=true)
    String exportFile

    @Option(name = "--timeout", aliases=["-t"], usage="Time out in seconds for connect and response", required=false)
    int timeout = 300

    static main(args) {
        new DocoExporter().doMain(args)
    }

    def doMain(args) {
        CmdLineParser parser = new CmdLineParser(this)
        try {
            parser.parseArgument(args)
        } catch (CmdLineException e) {
            log.error(e.getMessage());
            log.error("groovy ${this.class.name} [options...] arguments...");
            parser.printUsage(System.err);

            log.info("  Example: groovy ${this.class.name}${parser.printExample(OptionHandlerFilter.ALL)}");

            System.exit 2;
        }

        log.info("Export file: ${exportFile}")
        def soapEndpoint = new URL(new URL(confluenceUrl), "plugins/servlet/soap-axis1/confluenceservice-v2")
        log.debug("SOAP endpoint: ${soapEndpoint}")

        def client = new SOAPClient(soapEndpoint.toString())
        def response = client.send(SOAPAction: "") {
            body {
                login('xmlns': 'http://soap.rpc.confluence.atlassian.com') {
                    username(userName)
                    password(password)
                }
            }
        }
        log.debug "Login response {}", response
        assert 200 == response.httpResponse.statusCode

        def apiToken = response.body.loginResponse.loginReturn

        try {
            log.info "Login ${response.httpResponse.statusMessage} ${apiToken}"

            log.debug "Export space ${confluenceSpaceKey}"
            response = client.send(SOAPAction: "") {
                body {
                    exportSpace('xmlns': 'http://soap.rpc.confluence.atlassian.com') {
                        token(apiToken)
                        spaceKey(confluenceSpaceKey)
                        exportType("TYPE_HTML")
                        exportAll(false) // Only export the pages for which the user has permission
                    }
                }
            }

            log.debug "Export space response {}", response
            assert 200 == response.httpResponse.statusCode

            URL exportUrl = new URL(response.body.exportSpaceResponse.exportSpaceReturn.toString())
            log.info "Export space URL {}", exportUrl

            HttpHost targetHost = new HttpHost(exportUrl.host, exportUrl.port, exportUrl.protocol)
            CredentialsProvider provider = new BasicCredentialsProvider()
            provider.setCredentials(new AuthScope(AuthScope.ANY_HOST, AuthScope.ANY_PORT, AuthScope.ANY_REALM),
                                    new UsernamePasswordCredentials(userName, password))
            RequestConfig config = RequestConfig.custom()
                                            .setConnectTimeout(timeout * 1000)
                                            .setConnectionRequestTimeout(timeout * 1000)
                                            .setSocketTimeout(timeout * 1000).build()

            AuthCache authCache = new BasicAuthCache()
            authCache.put(targetHost, new BasicScheme())
            HttpClientContext context = HttpClientContext.create();
            context.setCredentialsProvider(provider);
            context.setAuthCache(authCache);

            HttpClientBuilder.create()
                             .setDefaultCredentialsProvider(provider)
                             .setDefaultRequestConfig(config)
                             .setRetryHandler()
                             .build().withCloseable() { httpClient ->
                HttpResponse httpResponse = httpClient.execute(targetHost, new HttpGet(exportUrl.path), context)
                new File(exportFile) << httpResponse.entity.content
                assert 200 == httpResponse.statusLine.statusCode
            }


        } finally {
            log.debug "Logout ${apiToken}"
            client.send(SOAPAction: "") {
                body {
                    logout('xmlns': 'http://soap.rpc.confluence.atlassian.com') {
                        token(apiToken)
                    }
                }
            }
            log.info "Logout ${response.httpResponse.statusMessage} ${apiToken}"
        }

    }
}
ibruyninckx May 6, 2019

Hi @David Meibusch - We used the groovy script above to export a few of our spaces. Everything seems to go right, but the export contains outdated information.

Pages that are modified over a week ago are not part of the export.

Any ideas how to prevent that?

ibruyninckx May 9, 2019

#Solved

 

Make sure that the zip file does not exist yet when running the above script.

Otherwise the archive will be opened and files will get added resulting in old results and corrupt archives.

1 vote
Stephen Deutsch
Rising Star
Rising Star
Rising Stars are recognized for providing high-quality answers to other users. Rising Stars receive a certificate of achievement and are on the path to becoming Community Leaders.
June 30, 2015

This page describes the function that allows you to export a space in HTML:

https://developer.atlassian.com/confdev/confluence-rest-api/confluence-xml-rpc-and-soap-apis/remote-confluence-methods#RemoteConfluenceMethods-Spaces

You need to login using the functions at the top of the page.

However, you might be able to do what you want to do with the REST API.  Pretty much any function that returns page information can also return the output of the page in HTML.  Just for the expand parameter use "body.export_view" (i.e. ?expand=body.export_view), or you could also use body.anonymous_export_view or body.view.  It's just that you will have to output the files yourself, and you will have to figure out how to save to a file and get images to display properly (you will also have to download these yourself).

David Meibusch July 1, 2015

The exportSpace API looks to be my only option at the moment. I really wanted to export a specific hierarchy of pages in a space, however the API doesn't appear to give the same flexibility as the Confluence UI which allows you to select pages. Individually walking the tree with the REST API looks...possible. But a lot of work and potentially error prone. The direction I'm going to go initially is to move our documentation to its own space and use {{exportSpace}}. We at least have that flexibility. It's likely we'll have to then post process the HTML to remove some pages / content. Possible we can achieve that just with permissions and a specific export user.

Sergey Svishchev
Rising Star
Rising Star
Rising Stars are recognized for providing high-quality answers to other users. Rising Stars receive a certificate of achievement and are on the path to becoming Community Leaders.
July 2, 2015

You could also use "renderContent(String token, String spaceKey, String pageId, String content) - returns the HTML rendered content for this page"

0 votes
Peter Kahn
Rising Star
Rising Star
Rising Stars are recognized for providing high-quality answers to other users. Rising Stars receive a certificate of achievement and are on the path to becoming Community Leaders.
January 19, 2016

Using Bob's exportSpace in groovy/gradle I do this:

 

/**
     * export space as pdf
     * @param spaceKey wiki spacekey
     * @return url-to-pdf
     */
    void exportSpaceAsPdf(def spaceKey, File outputFile) {
        // Setup Pdf Export Service
        PdfExportRpcServiceLocator serviceLocator = new PdfExportRpcServiceLocator()
        serviceLocator.setpdfexportEndpointAddress("${url}/rpc/soap-axis/pdfexport")
        serviceLocator.setMaintainSession(true)
        def pdfService = serviceLocator.getpdfexport()
        pdfService.exp
        // Login
        def token = pdfService.login(user, password)

        // Perform Export
        def pdfUrl = pdfService.exportSpace(token, spaceKey)

        // Download Pdf
        HttpClient client = new DefaultHttpClient();
        HttpGet httpget = new HttpGet(pdfUrl)
        httpget.addHeader(
                BasicScheme.authenticate(
                        new UsernamePasswordCredentials(user, password), "UTF-8", false))
        HttpResponse response = client.execute(httpget)
        HttpEntity entity = response.getEntity()

        if (entity != null) {
            InputStream inputStream = entity.getContent()
            FileOutputStream fos = new FileOutputStream(outputFile)
            int inByte
            while ((inByte = inputStream.read()) != -1)
                fos.write(inByte)
            inputStream.close()
            fos.close()
        } else {
            throw new GradleException("""Cannot Export Space to PDF:
Space:  ${spaceKey}
Dest:   ${outputFile.absolutePath}
URL:    ${pdfUrl}
Status: ${response.getStatusLine()}
""")
        }

    }
0 votes
Bob Swift OSS (Bob Swift Atlassian Apps)
Rising Star
Rising Star
Rising Stars are recognized for providing high-quality answers to other users. Rising Stars receive a certificate of achievement and are on the path to becoming Community Leaders.
July 1, 2015

Comments for this post are closed

Community moderators have prevented the ability to post new answers.

Post a new question

TAGS
AUG Leaders

Atlassian Community Events