// $Id: gendaml.java,v 1.11 2005/01/19 05:04:42 mdean Exp $


class gendaml
{
    static String fipsBase = "http://www.daml.org/2001/09/countries/fips";
    static String chiefsirectory = "http://www.daml.org/2002/02/chiefs/";
    static String chiefsBase = "http://www.cia.gov/cia/publications/factbook/";

    static String FILE_EXTENSION = ".owl";

    static java.text.DecimalFormat df = new java.text.DecimalFormat("#,##0");

    static com.hp.hpl.mesa.rdf.jena.model.Model countriesModel = new com.hp.hpl.mesa.rdf.jena.mem.ModelMem();

    /**
     * map String name to com.hp.hpl.mesa.jena.model.Resource
     */
    static java.util.Hashtable countryNames = new java.util.Hashtable();

    /**
     * remove extra spaces and commas
     */
    static String clean(String string)
    {
	if (string == null)
	    return null;

	string = string.trim();
	int comma = string.indexOf(',');
	if (comma != (-1))
	    {
		string = string.substring(0, comma) + string.substring(comma + 1);
		return clean(string);
	    }
	return string;
    }

    static class Country
    {
	String uri;
	String name;
	String fipsCode;
	/**
	 * No Diplomatic Exchanges
	 */
	boolean nde = false;
	String lastUpdate;
	java.util.Vector lines = new java.util.Vector();
	com.hp.hpl.mesa.rdf.jena.model.Resource resource;

	/**
	 * String name -> Country
	 */
	static java.util.TreeMap countries = new java.util.TreeMap();

	Country(String uri)
	{
	    this.uri = uri;
	}

	void parse()
	    throws Exception
	{
	    org.daml.html.Tree tree = new org.daml.html.Tree("file:" + uri);
	    // tree.dump(System.out);

	    String pair = tree.getString("html/body/text()").trim();
	    int sep = pair.indexOf(" - ");
	    name = pair.substring(0, sep);
	    if (name.endsWith("---NDE"))
		{
		    nde = true;
		    name = name.substring(0, name.length() - 6);
		}
	    else if (name.endsWith("--NDE"))
		{
		    nde = true;
		    name = name.substring(0, name.length() - 5);
		}
	    lastUpdate = pair.substring(sep + 3);

	    // identify FIPS code
	    resource = (com.hp.hpl.mesa.rdf.jena.model.Resource) countryNames.get(name.toUpperCase());
	    if (resource == null)
		{
		    System.err.println("can't find FIPS code for " + name + " in " + uri);
		    return;
		}
	    else
		{
		    String uri = resource.toString();
		    fipsCode = uri.substring(uri.length() - 2);
		}

	    countries.put(name, this);

	    com.hp.hpl.mesa.rdf.jena.model.Model model = new com.hp.hpl.mesa.rdf.jena.mem.ModelMem();

	    for (int i = 0; true; i++)
		{
		    String prefix = "html/body/center[2]/table/tr[" + (i + 1) + "]/";
		    String office = tree.getString(prefix + "td[1]/text()");
		    if (office == null)
			break;
		    String surname = clean(tree.getString(prefix + "td[2]/b/text()"));
		    String given = clean(tree.getString(prefix + "td[2]/text()"));
		    String title = clean(tree.getString(prefix + "td[2]/i/text()"));

		    com.hp.hpl.mesa.rdf.jena.model.Resource officeResource = model.createResource();
		    com.hp.hpl.mesa.rdf.jena.model.Resource personResource = model.createResource();
			      
		    model.add(resource,
			      chiefs_ont.office,
			      officeResource);
		    model.add(officeResource,
			      com.hp.hpl.mesa.rdf.jena.vocabulary.RDF.type,
			      chiefs_ont.Office);
		    model.add(officeResource,
			      chiefs_ont.title,
			      office);
		    model.add(personResource,
			      com.hp.hpl.mesa.rdf.jena.vocabulary.RDF.type,
			      foaf_ont.Person);
		    if (surname != null)
			model.add(personResource,
				  foaf_ont.surname,
				  surname);
		    if (given != null)
			model.add(personResource,
				  foaf_ont.givenname,
				  given);
		    if (title != null)
			model.add(personResource,
				  foaf_ont.title,
				  title);
		    model.add(officeResource,
			      chiefs_ont.holder,
			      personResource);
		}

	    java.io.PrintWriter stream = new java.io.PrintWriter(new java.io.FileOutputStream(fipsCode.toLowerCase() + FILE_EXTENSION));
	    stream.println("<?xml version='1.0' encoding='ISO-8859-1'?>");
	    model.write(stream);
	    stream.close();
	}
    }

    /**
     * manually map a few coutries that don't use standard FIPS names
     */
    static void mapCountry(String name, String code)
	throws Exception
    {
	countryNames.put(name.toUpperCase(), 
			 countriesModel.createResource(fipsBase + "#" + code));
    }

    public static void main(String args[])
	throws Exception
    {
	// get Country codes and names (pre-loading to avoid some errors)
	mapCountry("Bahamas, The", "BF");
	mapCountry("Bahrain, Kingdom of", "BA");
	mapCountry("Burkina Faso", "UV");
	mapCountry("Congo, Republic of the", "CF");
	mapCountry("Congo, Democratic Republic of the", "CG");
	mapCountry("Cote d'Ivoire", "IV");
	mapCountry("Gambia, The", "GA");
	mapCountry("Guinea-Bissau", "PU");
	mapCountry("Holy See (Vatican City)", "VT");
	mapCountry("Korea, North", "KN");
	mapCountry("Korea, South", "KS");
	mapCountry("Macedonia", "MK");
	mapCountry("Micronesia, Federated States of", "FM");
	mapCountry("Palau", "PS");
	mapCountry("Saint Kitts and Nevis", "SC");
	mapCountry("Saint Lucia", "ST");
	mapCountry("Saint Vincent and the Grenadines", "VC");
	mapCountry("Samoa", "AQ");
	mapCountry("Yugoslavia", "SR");
	mapCountry("Netherlands Antilles", "NT");
	mapCountry("Taiwan", "TW");
	mapCountry("Bermuda", "BD");
	mapCountry("Cook Islands", "CW");
	mapCountry("Aruba", "AA");
	mapCountry("Romania", "RO");
	mapCountry("Serbia and Montenegro", "YI"); // http://www.cia.gov/cia/publications/factbook/geos/yi.html
	mapCountry("East Timor", "TT");	// http://www.cia.gov/cia/publications/factbook/geos/tt.html

	countriesModel.read(fipsBase);
	com.hp.hpl.mesa.rdf.jena.model.Property name = countriesModel.createProperty("http://www.daml.org/2001/09/countries/fips-10-4-ont#name");
	com.hp.hpl.mesa.rdf.jena.model.StmtIterator stmtIterator = countriesModel.listStatements();
	while (stmtIterator.hasNext())
	    {
		com.hp.hpl.mesa.rdf.jena.model.Statement statement = stmtIterator.next();
		if (statement.getPredicate().equals(name))
		    {
			countryNames.put(statement.getObject().toString(),
					 statement.getSubject());
		    }
	    }

	// process inputs
	for (int i = 0; i < args.length; i++)
	    {
		String arg = args[i];
		Country country = new Country(arg);
		country.parse();
	    }

	// generate index page
	java.io.PrintWriter stream = new java.io.PrintWriter(new java.io.FileOutputStream("countries.html"));
	String title = "DAML Program CIA Chiefs of State Pages";
	stream.println("<title>" + title + "</title>");
	stream.println("<h1>" + title + "</h1>");
	stream.println("<h2>Countries</h2>");
	stream.println("<ul>");
	java.util.Iterator iterator = Country.countries.values().iterator();
	while (iterator.hasNext())
	    {
		Country country = (Country) iterator.next();
		stream.println("  <li><a href=\"" + country.fipsCode.toLowerCase() + "\">" + country.name + "</a></li>");
	    }
	stream.println("</ul>");
	stream.println("<hr>");
	stream.println(new java.util.Date());
	stream.close();
    }
}
