lib/github/commands/asciidoc2html (view raw)
1#!/usr/bin/env python
2
3"""A small wrapper file for parsing AsciiDoc files at Github."""
4
5__author__ = "Devin Weaver"
6__copyright__ = "Copyright (C) 2009 Devin Weaver"
7__license__ = "Public Domain"
8__version__ = "0.1"
9
10"""
11github_asciidoc.py
12------------------
13
14This is a wrapper file for parsing AsciiDoc files at github. It wraps the
15current AsciiDoc API.
16
17AsciiDoc specifications suggest using the file extension of `.txt` however this
18causes conflict because there is no way to determine if a text file is an
19AsciiDoc or not without pre-processing the file. This gives us two simple
20options:
21
221. **Parse all text files**. We could have all files ending in `.txt` or
23 ``README.txt`` be parsed through AsciiDoc. It will print pretty text fine
24 even if it isn't formatted as such. However this could be *not what the user
25 expects*.
262. **Pick a unique extension**. We could pick a unique extension (i.e.
27 `.asciidoc`) to prevent clashing. Although not directly suggested by the
28 author of AsciiDoc there is no standard or practice to the contrary.
29
30Option two is recommended by myself.
31
32Requirements
33~~~~~~~~~~~~
34
35The AsciiDoc API comes in two parts. The first is the system installation of
36AsciiDoc which has a simple install_. The second part is the API script. You
37can either copy this to the current directory or the application's lib folder.
38There is more information on the `API page`_
39
40The `re` package is imported here for the purpose to accomplish E-Mail address
41cloaking. AsciiDoc does not offer it's own cloaking algorithm like docutils
42does. So I made a simple one here to do the same. **If the expense of regex's
43is too high it can be easily commented out.**
44
45.. tip::
46 AsciiDoc by default runs in *safe mode* which means it will not include
47 external files that are **not** in the same directory as the `infile`.
48 However since we use a StringIO through the API it should be based on the
49 current working directory.
50
51.. _install: http://www.methods.co.nz/asciidoc/userguide.html
52.. _API page: http://www.methods.co.nz/asciidoc/asciidocapi.html
53"""
54
55try:
56 import locale
57 locale.setlocale(locale.LC_ALL, '')
58except:
59 pass
60
61import sys
62import cStringIO # faster then StringIO
63from asciidocapi import AsciiDocAPI
64from asciidocapi import AsciiDocError
65import re # only needed to simulate cloak_email_addresses
66
67def main():
68 """
69 Parses the given AsciiDoc file or the redirected string input and returns
70 the HTML body.
71
72 Usage: asciidoc2html < README.rst
73 asciidoc2html README.rst
74 """
75 try:
76 text = open(sys.argv[1], 'r').read()
77 except IOError: # given filename could not be found
78 return ''
79 except IndexError: # no filename given
80 text = sys.stdin.read()
81
82 infile = cStringIO.StringIO(text)
83 outfile = cStringIO.StringIO()
84 asciidoc = AsciiDocAPI()
85 asciidoc.options('-s')
86
87 try:
88 asciidoc.execute(infile, outfile, 'xhtml11')
89 except AsciiDocError, strerror:
90 str = "%s" % (strerror)
91 str = str.replace("&", "&") # Must be done first
92 str = str.replace("<", "%lt;")
93 str = str.replace(">", "%gt;")
94 outfile.write ("<blockquote><strong>AsciiDoc ERROR: %s</strong></blockquote>" % (str))
95
96 """
97 Cloak email addresses
98
99 AsciiDoc API does not have a `cloak_email_addresses` option. We can do the
100 same with a set of regex but that can be expensive. Keep section commented
101 to disable. So ``abc@mail.example.com`` becomes:
102
103 -----------
104 <a class="reference" href="mailto:abc%40mail.example.org">
105 abc<span>@</span>mail<span>.</span>example<span>.</span>org</a>
106 -----------
107 """
108 def mangleEmail(matches):
109 email1 = "%s%40%s" % (matches.group(1), matches.group(2))
110 email1 = email1.replace(".", ".")
111 email2 = "%s<span>@</span>%s" % (matches.group(1), matches.group(2))
112 email2 = email2.replace(".", "<span>.</span>")
113 return "<a class=\"reference\" href=\"mailto:%s\">%s</a>" % (email1, email2)
114
115 return re.sub(r'<a href="mailto:([^@]+)@([^@]+)">([^@]+)@([^@]+)</a>', mangleEmail, outfile.getvalue())
116 #return outfile.getvalue()
117
118if __name__ == '__main__':
119 print main()