Downloading an XML file and create a DCR from it

Posted: 22 July 2009 in CSSDK, Data Content Records, FormsPublisher, Java, Teamsite

/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package com.acme;

import com.interwoven.cssdk.access.CSAuthenticationException;
import com.interwoven.cssdk.access.CSAuthorizationException;
import com.interwoven.cssdk.access.CSExpiredSessionException;
import com.interwoven.cssdk.common.CSClient;
import com.interwoven.cssdk.common.CSException;
import com.interwoven.cssdk.common.CSRemoteException;
import com.interwoven.cssdk.factory.CSFactory;
import com.interwoven.cssdk.factory.CSLocalFactory;
import com.interwoven.cssdk.filesys.CSExtendedAttribute;
import com.interwoven.cssdk.filesys.CSSimpleFile;
import com.interwoven.cssdk.filesys.CSVPath;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
*
* @author laurent
*
*/
public class MediaSurfaceArticleDownloader {

private DocumentBuilder builder;
private XPathExpression xPathExpressionDate;
private XPathExpression xPathExpressionTitle;
private SimpleDateFormat dateFormat = new SimpleDateFormat(“dd MMMM yyyy”);
private SimpleDateFormat reverseDateFormat = new SimpleDateFormat(“yyyyMMdd”);
private Transformer transformer;
private CSClient client;
private CSExtendedAttribute[] extendedAttributes = new CSExtendedAttribute[1];

public MediaSurfaceArticleDownloader() {
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
this.builder = factory.newDocumentBuilder();
System.out.print(“Preparing XPath pattern…”);
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xPath = xPathfactory.newXPath();
this.xPathExpressionDate = xPath.compile(“/article/date”);
this.xPathExpressionTitle = xPath.compile(“/article/title”);
System.out.println(“Done”);
TransformerFactory tFactory = TransformerFactory.newInstance();
this.transformer = tFactory.newTransformer();
System.out.print(“Connecting to localhost…”);
Properties localProperties = new Properties();
localProperties.setProperty(“cssdk.cfg.path”, “/apps/interwoven/teamsite/cssdk/cssdk.cfg”);
CSFactory csFactory = CSLocalFactory.getFactory(localProperties);
this.client = csFactory.getClientForCurrentUser(new Locale(“en”, “uk”), “MediaSurfaceArticleDownloader”, “localhost”);
System.out.println(“Done”);
System.out.print(“Preparing extended attributes…”);
extendedAttributes[0] = new CSExtendedAttribute(“TeamSite/Templating/DCR/Type”, “intranet/article”);
System.out.println(“Done”);
} catch (CSAuthenticationException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (CSRemoteException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (CSException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (ParserConfigurationException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (TransformerConfigurationException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (XPathExpressionException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
}
}

private void download(URL targetURL) {
Document document;
String targetDirectoryName = “/default/main/intranet/WORKAREA/Compliance/templatedata/intranet/article/data/compliance/news”;
byte[] fileBytes = new byte[255];
try {

// read the targetURL and create an XML file with it
InputStream inputStream = targetURL.openStream();
String outputFileName = targetDirectoryName + “/” + targetURL.getFile().replaceAll(“/compliance/whats-new/(.*)\\?.*$”, “$1”);
System.out.println(“output file name: ” + outputFileName);
OutputStream outputStream = new FileOutputStream(outputFileName);
int bytesRead;
while ((bytesRead=inputStream.read(fileBytes))!= -1) {
outputStream.write(fileBytes, 0, bytesRead);
}

// set the metadata on the file
CSSimpleFile inputFile = (CSSimpleFile) client.getFile(new CSVPath(outputFileName));
inputFile.setExtendedAttributes(extendedAttributes);

// rename the file with the article date and the title
// we need to re-open the stream that the document builder has closed for us.
InputSource inputSource = new InputSource(new FileInputStream(inputFile.getVPath().getPathNoServer().toString()));
String dateAsString = xPathExpressionDate.evaluate(inputSource);
String reverseDateString;
Date newsDate = new Date();
try {
newsDate = dateFormat.parse(dateAsString);
} catch (Exception e) {
newsDate = new Date();
}
try {
reverseDateString = reverseDateFormat.format(newsDate);
} catch (Exception e) {
reverseDateString = “19700101”; /* use the epoch */
}

// we need to re-open the stream that the date XPath evaluation has closed for us.
inputSource = new InputSource(new FileInputStream(inputFile.getVPath().getPathNoServer().toString()));
String title = xPathExpressionTitle.evaluate(inputSource);
String titleNoSpace = title.replaceAll(“\\s“, “-“);

// come up with the file name that will contain this DCR
outputFileName = reverseDateString + “-” + titleNoSpace;
outputFileName = outputFileName.replaceAll(“(<B>|</B>|<I>|</I>)”,””);
outputFileName = outputFileName.replaceAll(“[‘\\\\/:*?\”<>|,&;’`]”,””);
System.out.println(“renaming file to ” + outputFileName);
inputFile.rename(outputFileName, true);

} catch (CSAuthorizationException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (CSExpiredSessionException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (CSRemoteException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (CSException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (XPathExpressionException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
//        } catch (TransformerConfigurationException ex) {
//            Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
//        } catch (TransformerException ex) {
//            Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
//        } catch (SAXException ex) {
//            Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
}

}

public void seek(boolean download) {
InputStream inputStream = null;
try {

//URL articlesURL = new URL(“file:///home/piquetl/WIM-1734/mediasurface.source.articles.xml“);
//URL articlesURL = new URL(“http://webcontent/whatsnew/compliance/?view=toTeamSite“);
URL articlesURL = new URL(“http://webcontent/compliance/whats-new/?view=toTeamSite“);
inputStream = articlesURL.openStream();
InputSource inputSource = new InputSource(inputStream);
Document document = builder.parse(inputSource);
NodeList articleNodes = document.getElementsByTagName(“article”);

for (int i = 0; i < articleNodes.getLength(); i++) {
URL articleURL = new URL(articleNodes.item(i).getFirstChild().getNodeValue());
//System.err.println(articleURL.getPath());

if (!articleURL.getFile().contains(“.pdf”) &&
!articleURL.getFile().contains(“.doc”) &&
!articleURL.getFile().contains(“.xls”) &&
!articleURL.getFile().contains(“.ppt”)) {
System.out.println(articleNodes.item(i).getFirstChild().getNodeValue());
if (download) {
download(articleURL);
}
}
}

} catch (SAXException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (MalformedURLException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
} finally {
try {
inputStream.close();
} catch (IOException ex) {
Logger.getLogger(MediaSurfaceArticleDownloader.class.getName()).log(Level.SEVERE, null, ex);
}
}

}

/**
* @param args the command line arguments
*/
public static void main(String[] args) {
MediaSurfaceArticleDownloader urlDownloader = new MediaSurfaceArticleDownloader();
urlDownloader.seek(true); // true for download, false otherwise
}
}

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s