// $Id: nyse.java,v 1.19 2005/07/27 05:37:38 mdean Exp $


class nyse
{
    static final java.text.DateFormat isoDateFormat = new java.text.SimpleDateFormat("yyyy-MM-dd");
    static final java.text.DateFormat listingDateFormat = new java.text.SimpleDateFormat("dd MMM yyyy");

    static com.hp.hpl.mesa.rdf.jena.model.Model nyse(String symbol)
	throws Exception
    {
	symbol = symbol.toLowerCase();
	org.daml.html.Tree tree = new org.daml.html.Tree("http://www.nyse.com/about/listed/" + symbol + ".html");
	// tree.dump(System.out);	// XXX
	String prefix = "html/body/table/tr/td[2]/table/";
	String title = tree.getString(prefix + "tr/td/h1[2]/text()");
	String homePage = tree.getString(prefix + "tr[2]/td/table/tr[2]/td/table/tr/td/div[2]/table/tr/td[2]/a/text()");
	int index = (homePage == null) ? 1 : 2;
	String industry = tree.getString(prefix + "tr[2]/td/table/tr[2]/td/table/tr/td/div[2]/table/tr[" + index++ + "]/td[2]/text()").trim();
	String issueType = tree.getString(prefix + "tr[2]/td/table/tr[2]/td/table/tr/td/div/table/tr[" + index++ + "]/td[2]/text()").trim();
	String country = tree.getString(prefix + "tr[2]/td/table/tr[2]/td/table/tr/td/div/table/tr[" + index++ + "]/td[2]/text()").trim();
	String listingDate = tree.getString(prefix + "tr[2]/td/table/tr[2]/td/table/tr/td/div/table/tr[" + index++ + "]/td[2]/text()");
	String description = tree.getString(prefix + "tr[2]/td/table/tr[2]/td/table/tr/td/div[3]/table/tr[2]/td/text()");
	// some pages (e.g. VZ) list Investor Contact URL
	
	com.hp.hpl.mesa.rdf.jena.model.Model model = new com.hp.hpl.mesa.rdf.jena.mem.ModelMem();
	com.hp.hpl.mesa.rdf.jena.model.Resource symbolResource = model.createResource("");

	// strip prefix and symbol from title
	int greater = title.lastIndexOf('>');
	if (greater != -1)
	    title = title.substring(greater + 1);
	int paren = title.lastIndexOf('(');
	if (paren != -1)
	    title = title.substring(0, paren - 1);

	model.add(symbolResource,
		  com.hp.hpl.mesa.rdf.jena.vocabulary.RDF.type,
		  nyse_ont.Symbol);
	model.add(symbolResource,
		  nyse_ont.symbol,
		  symbol.toUpperCase());
	model.add(symbolResource,
		  nyse_ont.name,
		  title);
	if (description != null) // delisted symbol, e.g. BEL?
	    model.add(symbolResource,
		      nyse_ont.description,
		      description);
	model.add(symbolResource,
		  nyse_ont.listingDate,
		  isoDateFormat.format(listingDateFormat.parse(listingDate)));
	model.add(symbolResource,
		  nyse_ont.industry,
		  industry);
	model.add(symbolResource,
		  nyse_ont.issueType,
		  issueType);
	if (homePage != null)
	    model.add(symbolResource,
		      nyse_ont.homePage,
		      homePage.trim());
	model.add(symbolResource,
		  nyse_ont.country,
		  country);

	return model;
    }

    static void usage()
    {
	System.err.println("Usage:  symbol");
	System.exit(1);
    }

    public static void main(String args[])
	throws Exception
    {
	if (args.length != 1)
	    usage();

	com.hp.hpl.mesa.rdf.jena.model.Model model = nyse(args[0]);
	java.io.PrintWriter writer = new java.io.PrintWriter(System.out);
	model.write(writer);
	writer.close();
    }
}
