Parsing an HTML/XML document

  1. From a String
  2. From a File
  3. From the Internets
  4. Encoding
  5. Parse Options

From a String

let html = "<html><body><h1>Tutorials</h1></body></html>"
if let htmlDoc = HTML(html: html, encoding: NSUTF8StringEncoding) {
}

let xml = "<root><item><name>Tutorials</name></item></root>"
if let xmlDoc = XML(xml: xml, encoding: NSUTF8StringEncoding) {
}

The variables htmlDoc and xmlDoc are Kanna documents, which have interesting properties and methods.

From a File

let data = NSData(contentsOfFile: filePath)!
if let doc = HTML(html: data, encoding: NSUTF8StringEncoding) {
}

or

let html = try! String(contentsOfFile: path, encoding: NSUTF8StringEncoding)
if let doc = HTML(html: html, encoding: NSUTF8StringEncoding) {
}

From the Internets

let url = NSURL(string: "https://en.wikipedia.org/wiki/Cat")
if let doc = HTML(url: url!, encoding: NSUTF8StringEncoding) {
}

Of cause, You can use other networking libraries. (e.g. Alamofire)

Alamofire.request(.GET, NSURL(string: "https://en.wikipedia.org/wiki/Cat")!)
    .responseString(encoding: NSUTF8StringEncoding) { (request, response, string, error) in
        if let doc = HTML(html: string!, encoding: NSUTF8StringEncoding) {
        }
    }

Encoding

If you want Kanna to handle the document encoding properly, Your best bet is to explicitly set the encoding. Hrea is an example of explicitly setting the encoding to EUC-JP on the parser:

let html = "<html><body><h1>Tutorials</h1></body></html>"
if let htmlDoc = HTML(html: html, encoding: NSJapaneseEUCStringEncoding) {
}

Parse Options

Kanna offers quite a few options that affect how a document is parsed.

Note: This is optional argument.

let html = "<html><body><h1>Tutorials</h1></body></html>"
if let doc = HTML(html: html, encoding: NSUTF8StringEncoding, option: .HtmlParseUseLibxml([.STRICT])) {
}