Adding demos

bzr revid: pinky-64aceb047a0e4da538512b7ddf33d458dd6f8eb3
This commit is contained in:
pinky 2007-01-07 23:34:09 +00:00
parent 963212514a
commit 1315ca0b97
13 changed files with 2181 additions and 0 deletions

View File

@ -0,0 +1,105 @@
#Copyright ReportLab Europe Ltd. 2000-2004
#see license.txt for license details
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/demos/colors/colortest.py
import reportlab.pdfgen.canvas
from reportlab.lib import colors
from reportlab.lib.units import inch
def run():
c = reportlab.pdfgen.canvas.Canvas('colortest.pdf')
#do a test of CMYK interspersed with RGB
#first do RGB values
framePage(c, 'Color Demo - RGB Space and CMYK spaces interspersed' )
y = 700
c.setFillColorRGB(0,0,0)
c.drawString(100, y, 'cyan')
c.setFillColorCMYK(1,0,0,0)
c.rect(200, y, 300, 30, fill=1)
y = y - 40
c.setFillColorRGB(0,0,0)
c.drawString(100, y, 'red')
c.setFillColorRGB(1,0,0)
c.rect(200, y, 300, 30, fill=1)
y = y - 40
c.setFillColorRGB(0,0,0)
c.drawString(100, y, 'magenta')
c.setFillColorCMYK(0,1,0,0)
c.rect(200, y, 300, 30, fill=1)
y = y - 40
c.setFillColorRGB(0,0,0)
c.drawString(100, y, 'green')
c.setFillColorRGB(0,1,0)
c.rect(200, y, 300, 30, fill=1)
y = y - 40
c.setFillColorRGB(0,0,0)
c.drawString(100, y, 'yellow')
c.setFillColorCMYK(0,0,1,0)
c.rect(200, y, 300, 30, fill=1)
y = y - 40
c.setFillColorRGB(0,0,0)
c.drawString(100, y, 'blue')
c.setFillColorRGB(0,0,1)
c.rect(200, y, 300, 30, fill=1)
y = y - 40
c.setFillColorRGB(0,0,0)
c.drawString(100, y, 'black')
c.setFillColorCMYK(0,0,0,1)
c.rect(200, y, 300, 30, fill=1)
y = y - 40
c.showPage()
#do all named colors
framePage(c, 'Color Demo - RGB Space - page %d' % c.getPageNumber())
all_colors = reportlab.lib.colors.getAllNamedColors().items()
all_colors.sort() # alpha order by name
c.setFont('Times-Roman', 12)
c.drawString(72,730, 'This shows all the named colors in the HTML standard.')
y = 700
for (name, color) in all_colors:
c.setFillColor(colors.black)
c.drawString(100, y, name)
c.setFillColor(color)
c.rect(200, y-10, 300, 30, fill=1)
y = y - 40
if y < 100:
c.showPage()
framePage(c, 'Color Demo - RGB Space - page %d' % c.getPageNumber())
y = 700
c.save()
def framePage(canvas, title):
canvas.setFont('Times-BoldItalic',20)
canvas.drawString(inch, 10.5 * inch, title)
canvas.setFont('Times-Roman',10)
canvas.drawCentredString(4.135 * inch, 0.75 * inch,
'Page %d' % canvas.getPageNumber())
#draw a border
canvas.setStrokeColorRGB(1,0,0)
canvas.setLineWidth(5)
canvas.line(0.8 * inch, inch, 0.8 * inch, 10.75 * inch)
#reset carefully afterwards
canvas.setLineWidth(1)
canvas.setStrokeColorRGB(0,0,0)
if __name__ == '__main__':
run()

View File

@ -0,0 +1,3 @@
This is Aaron Watters' first script;
it renders his paper for IPC8 into
PDF. A fascinating read, as well.

View File

@ -0,0 +1,903 @@
#Copyright ReportLab Europe Ltd. 2000-2004
#see license.txt for license details
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/demos/gadflypaper/gfe.py
__version__=''' $Id: gfe.py 2385 2004-06-17 15:26:05Z rgbecker $ '''
__doc__=''
#REPORTLAB_TEST_SCRIPT
import sys
from reportlab.platypus import *
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.rl_config import defaultPageSize
PAGE_HEIGHT=defaultPageSize[1]
styles = getSampleStyleSheet()
Title = "Integrating Diverse Data Sources with Gadfly 2"
Author = "Aaron Watters"
URL = "http://www.chordate.com/"
email = "arw@ifu.net"
Abstract = """This paper describes the primative methods underlying the implementation
of SQL query evaluation in Gadfly 2, a database management system implemented
in Python [Van Rossum]. The major design goals behind
the architecture described here are to simplify the implementation
and to permit flexible and efficient extensions to the gadfly
engine. Using this architecture and its interfaces programmers
can add functionality to the engine such as alternative disk based
indexed table implementations, dynamic interfaces to remote data
bases or or other data sources, and user defined computations."""
from reportlab.lib.units import inch
pageinfo = "%s / %s / %s" % (Author, email, Title)
def myFirstPage(canvas, doc):
canvas.saveState()
#canvas.setStrokeColorRGB(1,0,0)
#canvas.setLineWidth(5)
#canvas.line(66,72,66,PAGE_HEIGHT-72)
canvas.setFont('Times-Bold',16)
canvas.drawString(108, PAGE_HEIGHT-108, Title)
canvas.setFont('Times-Roman',9)
canvas.drawString(inch, 0.75 * inch, "First Page / %s" % pageinfo)
canvas.restoreState()
def myLaterPages(canvas, doc):
#canvas.drawImage("snkanim.gif", 36, 36)
canvas.saveState()
#canvas.setStrokeColorRGB(1,0,0)
#canvas.setLineWidth(5)
#canvas.line(66,72,66,PAGE_HEIGHT-72)
canvas.setFont('Times-Roman',9)
canvas.drawString(inch, 0.75 * inch, "Page %d %s" % (doc.page, pageinfo))
canvas.restoreState()
def go():
Elements.insert(0,Spacer(0,inch))
doc = SimpleDocTemplate('gfe.pdf')
doc.build(Elements,onFirstPage=myFirstPage, onLaterPages=myLaterPages)
Elements = []
HeaderStyle = styles["Heading1"] # XXXX
def header(txt, style=HeaderStyle, klass=Paragraph, sep=0.3):
s = Spacer(0.2*inch, sep*inch)
Elements.append(s)
para = klass(txt, style)
Elements.append(para)
ParaStyle = styles["Normal"]
def p(txt):
return header(txt, style=ParaStyle, sep=0.1)
#pre = p # XXX
PreStyle = styles["Code"]
def pre(txt):
s = Spacer(0.1*inch, 0.1*inch)
Elements.append(s)
p = Preformatted(txt, PreStyle)
Elements.append(p)
#header(Title, sep=0.1. style=ParaStyle)
header(Author, sep=0.1, style=ParaStyle)
header(URL, sep=0.1, style=ParaStyle)
header(email, sep=0.1, style=ParaStyle)
header("ABSTRACT")
p(Abstract)
header("Backgrounder")
p("""\
The term "database" usually refers to a persistent
collection of data. Data is persistent if it continues
to exist whether or not it is associated with a running
process on the computer, or even if the computer is
shut down and restarted at some future time. Database
management systems provide support for constructing databases,
maintaining databases, and extracting information from databases.""")
p("""\
Relational databases manipulate and store persistent
table structures called relations, such as the following
three tables""")
pre("""\
-- drinkers who frequent bars (this is a comment)
select * from frequents
DRINKER | PERWEEK | BAR
============================
adam | 1 | lolas
woody | 5 | cheers
sam | 5 | cheers
norm | 3 | cheers
wilt | 2 | joes
norm | 1 | joes
lola | 6 | lolas
norm | 2 | lolas
woody | 1 | lolas
pierre | 0 | frankies
)
""")
pre("""\
-- drinkers who like beers
select * from likes
DRINKER | PERDAY | BEER
===============================
adam | 2 | bud
wilt | 1 | rollingrock
sam | 2 | bud
norm | 3 | rollingrock
norm | 2 | bud
nan | 1 | sierranevada
woody | 2 | pabst
lola | 5 | mickies
""")
pre("""\
-- beers served from bars
select * from serves
BAR | QUANTITY | BEER
=================================
cheers | 500 | bud
cheers | 255 | samadams
joes | 217 | bud
joes | 13 | samadams
joes | 2222 | mickies
lolas | 1515 | mickies
lolas | 333 | pabst
winkos | 432 | rollingrock
frankies | 5 | snafu
""")
p("""
The relational model for database structures makes
the simplifying assumption that all data in a database
can be represented in simple table structures
such as these. Although this assumption seems extreme
it provides a good foundation for defining solid and
well defined database management systems and some
of the most successful software companies in the
world, such as Oracle, Sybase, IBM, and Microsoft,
have marketed database management systems based on
the relational model quite successfully.
""")
p("""
SQL stands for Structured Query Language.
The SQL language defines industry standard
mechanisms for creating, querying, and modified
relational tables. Several years ago SQL was one
of many Relational Database Management System
(RDBMS) query languages in use, and many would
argue not the best on. Now, largely due
to standardization efforts and the
backing of IBM, SQL is THE standard way to talk
to database systems.
""")
p("""
There are many advantages SQL offers over other
database query languages and alternative paradigms
at this time (please see [O'Neill] or [Korth and Silberschatz]
for more extensive discussions and comparisons between the
SQL/relational approach and others.)
""")
p("""
The chief advantage over all contenders at this time
is that SQL and the relational model are now widely
used as interfaces and back end data stores to many
different products with different performance characteristics,
user interfaces, and other qualities: Oracle, Sybase,
Ingres, SQL Server, Access, Outlook,
Excel, IBM DB2, Paradox, MySQL, MSQL, POSTgres, and many
others. For this reason a program designed to use
an SQL database as its data storage mechanism can
easily be ported from one SQL data manager to another,
possibly on different platforms. In fact the same
program can seamlessly use several backends and/or
import/export data between different data base platforms
with trivial ease.
No other paradigm offers such flexibility at the moment.
""")
p("""
Another advantage which is not as immediately
obvious is that the relational model and the SQL
query language are easily understood by semi-technical
and non-technical professionals, such as business
people and accountants. Human resources managers
who would be terrified by an object model diagram
or a snippet of code that resembles a conventional
programming language will frequently feel quite at
ease with a relational model which resembles the
sort of tabular data they deal with on paper in
reports and forms on a daily basis. With a little training the
same HR managers may be able to translate the request
"Who are the drinkers who like bud and frequent cheers?"
into the SQL query
""")
pre("""
select drinker
from frequents
where bar='cheers'
and drinker in (
select drinker
from likes
where beer='bud')
""")
p("""
(or at least they have some hope of understanding
the query once it is written by a technical person
or generated by a GUI interface tool). Thus the use
of SQL and the relational model enables communication
between different communities which must understand
and interact with stored information. In contrast
many other approaches cannot be understood easily
by people without extensive programming experience.
""")
p("""
Furthermore the declarative nature of SQL
lends itself to automatic query optimization,
and engines such as Gadfly can automatically translate a user query
into an optimized query plan which takes
advantage of available indices and other data characteristics.
In contrast more navigational techniques require the application
program itself to optimize the accesses to the database and
explicitly make use of indices.
""")
# HACK
Elements.append(PageBreak())
p("""
While it must be admitted that there are application
domains such as computer aided engineering design where
the relational model is unnatural, it is also important
to recognize that for many application domains (such
as scheduling, accounting, inventory, finance, personal
information management, electronic mail) the relational
model is a very natural fit and the SQL query language
make most accesses to the underlying data (even sophisticated
ones) straightforward. """)
p("""For an example of a moderately
sophisticated query using the tables given above,
the following query lists the drinkers who frequent lolas bar
and like at least two beers not served by lolas
""")
if 0:
go()
sys.exit(1)
pre("""
select f.drinker
from frequents f, likes l
where f.drinker=l.drinker and f.bar='lolas'
and l.beer not in
(select beer from serves where bar='lolas')
group by f.drinker
having count(distinct beer)>=2
""")
p("""
yielding the result
""")
pre("""
DRINKER
=======
norm
""")
p("""
Experience shows that queries of this sort are actually
quite common in many applications, and are often much more
difficult to formulate using some navigational database
organizations, such as some "object oriented" database
paradigms.
""")
p("""
Certainly,
SQL does not provide all you need to interact with
databases -- in order to do "real work" with SQL you
need to use SQL and at least one other language
(such as C, Pascal, C++, Perl, Python, TCL, Visual Basic
or others) to do work (such as readable formatting a report
from raw data) that SQL was not designed to do.
""")
header("Why Gadfly 1?")
p("""Gadfly 1.0 is an SQL based relational database implementation
implemented entirely in the Python programming language, with
optional fast data structure accellerators implemented in the
C programming language. Gadfly is relatively small, highly portable,
very easy to use (especially for programmers with previous experience
with SQL databases such as MS Access or Oracle), and reasonably
fast (especially when the kjbuckets C accellerators are used).
For moderate sized problems Gadfly offers a fairly complete
set of features such as transaction semantics, failure recovery,
and a TCP/IP based client/server mode (Please see [Gadfly] for
detailed discussion).""")
header("Why Gadfly 2?")
p("""Gadfly 1.0 also has significant limitations. An active Gadfly
1.0 database keeps all data in (virtual) memory, and hence a Gadfly
1.0 database is limited in size to available virtual memory. Important
features such as date/time/interval operations, regular expression
matching and other standard SQL features are not implemented in
Gadfly 1.0. The optimizer and the query evaluator perform optimizations
using properties of the equality predicate but do not optimize
using properties of inequalities such as BETWEEN or less-than.
It is possible to add "extension views" to a Gadfly
1.0 database, but the mechanism is somewhat clumsy and indices
over extension views are not well supported. The features of Gadfly
2.0 discussed here attempt to address these deficiencies by providing
a uniform extension model that permits addition of alternate table,
function, and predicate implementations.""")
p("""Other deficiencies, such as missing constructs like "ALTER
TABLE" and the lack of outer joins and NULL values are not
addressed here, although they may be addressed in Gadfly 2.0 or
a later release. This paper also does not intend to explain
the complete operations of the internals; it is intended to provide
at least enough information to understand the basic mechanisms
for extending gadfly.""")
p("""Some concepts and definitions provided next help with the description
of the gadfly interfaces. [Note: due to the terseness of this
format the ensuing is not a highly formal presentation, but attempts
to approach precision where precision is important.]""")
header("The semilattice of substitutions")
p("""Underlying the gadfly implementation are the basic concepts
associated with substitutions. A substitution is a mapping
of attribute names to values (implemented in gadfly using kjbuckets.kjDict
objects). Here an attribute refers to some sort of "descriptive
variable", such as NAME and a value is an assignment for that variable,
like "Dave Ascher". In Gadfly a table is implemented as a sequence
of substitutions, and substitutions are used in many other ways as well.
""")
p("""
For example consider the substitutions""")
pre("""
A = [DRINKER=>'sam']
B = [DRINKER=>'sam', BAR=>'cheers']
C = [DRINKER=>'woody', BEER=>'bud']
D = [DRINKER=>'sam', BEER=>'mickies']
E = [DRINKER=>'sam', BAR=>'cheers', BEER=>'mickies']
F = [DRINKER=>'sam', BEER=>'mickies']
G = [BEER=>'bud', BAR=>'lolas']
H = [] # the empty substitution
I = [BAR=>'cheers', CAPACITY=>300]""")
p("""A trivial but important observation is that since substitutions
are mappings, no attribute can assume more than one value in a
substitution. In the operations described below whenever an operator
"tries" to assign more than one value to an attribute
the operator yields an "overdefined" or "inconsistent"
result.""")
header("Information Semi-order:")
p("""Substitution B is said to be
more informative than A because B agrees with all assignments
in A (in addition to providing more information as well). Similarly
we say that E is more informative than A, B, D, F. and H but E
is not more informative than the others since, for example G disagrees
with E on the value assigned to the BEER attribute and I provides
additional CAPACITY information not provided in E.""")
header("Joins and Inconsistency:")
p("""A join of two substitutions
X and Y is the least informative substitution Z such that Z is
more informative (or equally informative) than both X and Y. For
example B is the join of B with A, E is the join of B with D and""")
pre("""
E join I =
[DRINKER=>'sam', BAR=>'cheers', BEER=>'mickies', CAPACITY=>300]""")
p("""For any two substitutions either (1) they disagree on the value
assigned to some attribute and have no join or (2) they agree
on all common attributes (if there are any) and their join is
the union of all (name, value) assignments in both substitutions.
Written in terms of kjbucket.kjDict operations two kjDicts X and
Y have a join Z = (X+Y) if and only if Z.Clean() is not None.
Two substitutions that have no join are said to be inconsistent.
For example I and G are inconsistent since they disagree on
the value assigned to the BAR attribute and therefore have no
join. The algebra of substitutions with joins technically defines
an abstract algebraic structure called a semilattice.""")
header("Name space remapping")
p("""Another primitive operation over substitutions is the remap
operation S2 = S.remap(R) where S is a substitution and R is a
graph of attribute names and S2 is a substitution. This operation
is defined to produce the substitution S2 such that""")
pre("""
Name=>Value in S2 if and only if
Name1=>Value in S and Name<=Name1 in R
""")
p("""or if there is no such substitution S2 the remap value is said
to be overdefined.""")
p("""For example the remap operation may be used to eliminate attributes
from a substitution. For example""")
pre("""
E.remap([DRINKER<=DRINKER, BAR<=BAR])
= [DRINKER=>'sam', BAR=>'cheers']
""")
p("""Illustrating that remapping using the [DRINKER&lt;=DRINKER,
BAR&lt;=BAR] graph eliminates all attributes except DRINKER and
BAR, such as BEER. More generally remap can be used in this way
to implement the classical relational projection operation. (See [Korth and Silberschatz]
for a detailed discussion of the projection operator and other relational
algebra operators such as selection, rename, difference and joins.)""")
p("""The remap operation can also be used to implement "selection
on attribute equality". For example if we are interested
in the employee names of employees who are their own bosses we
can use the remapping graph""")
pre("""
R1 = [NAME<=NAME, NAME<=BOSS]
""")
p("""and reject substitutions where remapping using R1 is overdefined.
For example""")
pre("""
S1 = [NAME=>'joe', BOSS=>'joe']
S1.remap(R1) = [NAME=>'joe']
S2 = [NAME=>'fred', BOSS=>'joe']
S2.remap(R1) is overdefined.
""")
p("""The last remap is overdefined because the NAME attribute cannot
assume both the values 'fred' and 'joe' in a substitution.""")
p("""Furthermore, of course, the remap operation can be used to
"rename attributes" or "copy attribute values"
in substitutions. Note below that the missing attribute CAPACITY
in B is effectively ignored in the remapping operation.""")
pre("""
B.remap([D<=DRINKER, B<=BAR, B2<=BAR, C<=CAPACITY])
= [D=>'sam', B=>'cheers', B2=>'cheers']
""")
p("""More interestingly, a single remap operation can be used to
perform a combination of renaming, projection, value copying,
and attribute equality selection as one operation. In kjbuckets the remapper
graph is implemented using a kjbuckets.kjGraph and the remap operation
is an intrinsic method of kjbuckets.kjDict objects.""")
header("Generalized Table Joins and the Evaluator Mainloop""")
p("""Strictly speaking the Gadfly 2.0 query evaluator only uses
the join and remap operations as its "basic assembly language"
-- all other computations, including inequality comparisons and
arithmetic, are implemented externally to the evaluator as "generalized
table joins." """)
p("""A table is a sequence of substitutions (which in keeping with
SQL semantics may contain redundant entries). The join between
two tables T1 and T2 is the sequence of all possible defined joins
between pairs of elements from the two tables. Procedurally we
might compute the join as""")
pre("""
T1JoinT2 = empty
for t1 in T1:
for t2 in T2:
if t1 join t2 is defined:
add t1 join t2 to T1joinT2""")
p("""In general circumstances this intuitive implementation is a
very inefficient way to compute the join, and Gadfly almost always
uses other methods, particularly since, as described below, a
"generalized table" can have an "infinite"
number of entries.""")
p("""For an example of a table join consider the EMPLOYEES table
containing""")
pre("""
[NAME=>'john', JOB=>'executive']
[NAME=>'sue', JOB=>'programmer']
[NAME=>'eric', JOB=>'peon']
[NAME=>'bill', JOB=>'peon']
""")
p("""and the ACTIVITIES table containing""")
pre("""
[JOB=>'peon', DOES=>'windows']
[JOB=>'peon', DOES=>'floors']
[JOB=>'programmer', DOES=>'coding']
[JOB=>'secretary', DOES=>'phone']""")
p("""then the join between EMPLOYEES and ACTIVITIES must containining""")
pre("""
[NAME=>'sue', JOB=>'programmer', DOES=>'coding']
[NAME=>'eric', JOB=>'peon', DOES=>'windows']
[NAME=>'bill', JOB=>'peon', DOES=>'windows']
[NAME=>'eric', JOB=>'peon', DOES=>'floors']
[NAME=>'bill', JOB=>'peon', DOES=>'floors']""")
p("""A compiled gadfly subquery ultimately appears to the evaluator
as a sequence of generalized tables that must be joined (in combination
with certain remapping operations that are beyond the scope of
this discussion). The Gadfly mainloop proceeds following the very
loose pseudocode:""")
pre("""
Subs = [ [] ] # the unary sequence containing "true"
While some table hasn't been chosen yet:
Choose an unchosen table with the least cost join estimate.
Subs = Subs joined with the chosen table
return Subs""")
p("""[Note that it is a property of the join operation that the
order in which the joins are carried out will not affect the result,
so the greedy strategy of evaluating the "cheapest join next"
will not effect the result. Also note that the treatment of logical
OR and NOT as well as EXIST, IN, UNION, and aggregation and so
forth are not discussed here, even though they do fit into this
approach.]""")
p("""The actual implementation is a bit more complex than this,
but the above outline may provide some useful intuition. The "cost
estimation" step and the implementation of the join operation
itself are left up to the generalized table object implementation.
A table implementation has the ability to give an "infinite"
cost estimate, which essentially means "don't join me in
yet under any circumstances." """)
header("Implementing Functions")
p("""As mentioned above operations such as arithmetic are implemented
using generalized tables. For example the arithmetic Add operation
is implemented in Gadfly internally as an "infinite generalized
table" containing all possible substitutions""")
pre("""
ARG0=>a, ARG1=>b, RESULT=>a+b]
""")
p("""Where a and b are all possible values which can be summed.
Clearly, it is not possible to enumerate this table, but given
a sequence of substitutions with defined values for ARG0 and ARG1
such as""")
pre("""
[ARG0=>1, ARG1=-4]
[ARG0=>2.6, ARG1=50]
[ARG0=>99, ARG1=1]
""")
p("""it is possible to implement a "join operation" against
this sequence that performs the same augmentation as a join with
the infinite table defined above:""")
pre("""
[ARG0=>1, ARG1=-4, RESULT=-3]
[ARG0=>2.6, ARG1=50, RESULT=52.6]
[ARG0=>99, ARG1=1, RESULT=100]
""")
p("""Furthermore by giving an "infinite estimate" for
all attempts to evaluate the join where ARG0 and ARG1 are not
available the generalized table implementation for the addition
operation can refuse to compute an "infinite join." """)
p("""More generally all functions f(a,b,c,d) are represented in
gadfly as generalized tables containing all possible relevant
entries""")
pre("""
[ARG0=>a, ARG1=>b, ARG2=>c, ARG3=>d, RESULT=>f(a,b,c,d)]""")
p("""and the join estimation function refuses all attempts to perform
a join unless all the arguments are provided by the input substitution
sequence.""")
header("Implementing Predicates")
p("""Similarly to functions, predicates such as less-than and BETWEEN
and LIKE are implemented using the generalized table mechanism.
For example the "x BETWEEN y AND z" predicate is implemented
as a generalized table "containing" all possible""")
pre("""
[ARG0=>a, ARG1=>b, ARG2=>c]""")
p("""where b&lt;a&lt;c. Furthermore joins with this table are not
permitted unless all three arguments are available in the sequence
of input substitutions.""")
header("Some Gadfly extension interfaces")
p("""A gadfly database engine may be extended with user defined
functions, predicates, and alternative table and index implementations.
This section snapshots several Gadfly 2.0 interfaces, currently under
development and likely to change before the package is released.""")
p("""The basic interface for adding functions and predicates (logical tests)
to a gadfly engine are relatively straightforward. For example to add the
ability to match a regular expression within a gadfly query use the
following implementation.""")
pre("""
from re import match
def addrematch(gadflyinstance):
gadflyinstance.add_predicate("rematch", match)
""")
p("""
Then upon connecting to the database execute
""")
pre("""
g = gadfly(...)
...
addrematch(g)
""")
p("""
In this case the "semijoin operation" associated with the new predicate
"rematch" is automatically generated, and after the add_predicate
binding operation the gadfly instance supports queries such as""")
pre("""
select drinker, beer
from likes
where rematch('b*', beer) and drinker not in
(select drinker from frequents where rematch('c*', bar))
""")
p("""
By embedding the "rematch" operation within the query the SQL
engine can do "more work" for the programmer and reduce or eliminate the
need to process the query result externally to the engine.
""")
p("""
In a similar manner functions may be added to a gadfly instance,""")
pre("""
def modulo(x,y):
return x % y
def addmodulo(gadflyinstance):
gadflyinstance.add_function("modulo", modulo)
...
g = gadfly(...)
...
addmodulo(g)
""")
p("""
Then after the binding the modulo function can be used whereever
an SQL expression can occur.
""")
p("""
Adding alternative table implementations to a Gadfly instance
is more interesting and more difficult. An "extension table" implementation
must conform to the following interface:""")
pre("""
# get the kjbuckets.kjSet set of attribute names for this table
names = table.attributes()
# estimate the difficulty of evaluating a join given known attributes
# return None for "impossible" or n>=0 otherwise with larger values
# indicating greater difficulty or expense
estimate = table.estimate(known_attributes)
# return the join of the rows of the table with
# the list of kjbuckets.kjDict mappings as a list of mappings.
resultmappings = table.join(listofmappings)
""")
p("""
In this case add the table to a gadfly instance using""")
pre("""
gadflyinstance.add_table("table_name", table)
""")
p("""
For example to add a table which automatically queries filenames
in the filesystems of the host computer a gadfly instance could
be augmented with a GLOB table implemented using the standard
library function glob.glob as follows:""")
pre("""
import kjbuckets
class GlobTable:
def __init__(self): pass
def attributes(self):
return kjbuckets.kjSet("PATTERN", "NAME")
def estimate(self, known_attributes):
if known_attributes.member("PATTERN"):
return 66 # join not too difficult
else:
return None # join is impossible (must have PATTERN)
def join(self, listofmappings):
from glob import glob
result = []
for m in listofmappings:
pattern = m["PATTERN"]
for name in glob(pattern):
newmapping = kjbuckets.kjDict(m)
newmapping["NAME"] = name
if newmapping.Clean():
result.append(newmapping)
return result
...
gadfly_instance.add_table("GLOB", GlobTable())
""")
p("""
Then one could formulate queries such as "list the files in directories
associated with packages installed by guido"
""")
pre("""
select g.name as filename
from packages p, glob g
where p.installer = 'guido' and g.pattern=p.root_directory
""")
p("""
Note that conceptually the GLOB table is an infinite table including
all filenames on the current computer in the "NAME" column, paired with
a potentially infinite number of patterns.
""")
p("""
More interesting examples would allow queries to remotely access
data served by an HTTP server, or from any other resource.
""")
p("""
Furthermore an extension table can be augmented with update methods
""")
pre("""
table.insert_rows(listofmappings)
table.update_rows(oldlist, newlist)
table.delete_rows(oldlist)
""")
p("""
Note: at present the implementation does not enforce recovery or
transaction semantics for updates to extension tables, although this
may change in the final release.
""")
p("""
The table implementation is free to provide its own implementations of
indices which take advantage of data provided by the join argument.
""")
header("Efficiency Notes")
p("""The following thought experiment attempts to explain why the
Gadfly implementation is surprisingly fast considering that it
is almost entirely implemented in Python (an interpreted programming
language which is not especially fast when compared to alternatives).
Although Gadfly is quite complex, at an abstract level the process
of query evaluation boils down to a series of embedded loops.
Consider the following nested loops:""")
pre("""
iterate 1000:
f(...) # fixed cost of outer loop
iterate 10:
g(...) # fixed cost of middle loop
iterate 10:
# the real work (string parse, matrix mul, query eval...)
h(...)""")
p("""In my experience many computations follow this pattern where
f, g, are complex, dynamic, special purpose and h is simple, general
purpose, static. Some example computations that follow this pattern
include: file massaging (perl), matrix manipulation (python, tcl),
database/cgi page generation, and vector graphics/imaging.""")
p("""Suppose implementing f, g, h in python is easy but result in
execution times10 times slower than a much harder implementation
in C, choosing arbitrary and debatable numbers assume each function
call consumes 1 tick in C, 5 ticks in java, 10 ticks in python
for a straightforward implementation of each function f, g, and
h. Under these conditions we get the following cost analysis,
eliminating some uninteresting combinations, of implementing the
function f, g, and h in combinations of Python, C and java:""")
pre("""
COST | FLANG | GLANG | HLANG
==================================
111000 | C | C | C
115000 | java | C | C
120000 | python | C | C
155000 | java | java | C
210000 | python | python | C
555000 | java | java | java
560000 | python | java | java
610000 | python | python | java
1110000 | python | python | python
""")
p("""Note that moving only the innermost loop to C (python/python/C)
speeds up the calculation by half an order of magnitude compared
to the python-only implementation and brings the speed to within
a factor of 2 of an implementation done entirely in C.""")
p("""Although this artificial and contrived thought experiment is
far from conclusive, we may be tempted to draw the conclusion
that generally programmers should focus first on obtaining a working
implementation (because as John Ousterhout is reported to have
said "the biggest performance improvement is the transition
from non-working to working") using the methodology that
is most likely to obtain a working solution the quickest (Python). Only then if the performance
is inadequate should the programmer focus on optimizing
the inner most loops, perhaps moving them to a very efficient
implementation (C). Optimizing the outer loops will buy little
improvement, and should be done later, if ever.""")
p("""This was precisely the strategy behind the gadfly implementations,
where most of the inner loops are implemented in the kjbuckets
C extension module and the higher level logic is all in Python.
This also explains why gadfly appears to be "slower"
for simple queries over small data sets, but seems to be relatively
"faster" for more complex queries over larger data sets,
since larger queries and data sets take better advantage of the
optimized inner loops.""")
header("A Gadfly variant for OLAP?")
p("""In private correspondence Andy Robinson points out that the
basic logical design underlying Gadfly could be adapted to provide
Online Analytical Processing (OLAP) and other forms of data warehousing
and data mining. Since SQL is not particularly well suited for
the kinds of requests common in these domains the higher level
interfaces would require modification, but the underlying logic
of substitutions and name mappings seems to be appropriate.""")
header("Conclusion")
p("""The revamped query engine design in Gadfly 2 supports
a flexible and general extension methodology that permits programmers
to extend the gadfly engine to include additional computations
and access to remote data sources. Among other possibilities this
will permit the gadfly engine to make use of disk based indexed
tables and to dynamically retrieve information from remote data
sources (such as an Excel spreadsheet or an Oracle database).
These features will make gadfly a very useful tool for data manipulation
and integration.""")
header("References")
p("""[Van Rossum] Van Rossum, Python Reference Manual, Tutorial, and Library Manuals,
please look to http://www.python.org
for the latest versions, downloads and links to printed versions.""")
p("""[O'Neill] O'Neill, P., Data Base Principles, Programming, Performance,
Morgan Kaufmann Publishers, San Francisco, 1994.""")
p("""[Korth and Silberschatz] Korth, H. and Silberschatz, A. and Sudarshan, S.
Data Base System Concepts, McGraw-Hill Series in Computer Science, Boston,
1997""")
p("""[Gadfly]Gadfly: SQL Relational Database in Python,
http://www.chordate.com/kwParsing/gadfly.html""")
go()

View File

@ -0,0 +1,56 @@
This contains a number of benchmarks and demos
based on Homer's Odyssey (which is widely available
in plain, line-oriented text format). There are a large
selection of online books at:
http://classics.mit.edu/
Our distribution ships with just the first chapter
in odyssey.txt. For a more meaningful speed test,
download the full copy from
http://www.reportlab.com/ftp/odyssey.full.zip
or
ftp://ftp.reportlab.com/odyssey.full.zip
and unzip to extract odyssey.full.txt (608kb).
Benchmark speed depends quite critically
on the presence of our accelerator module,
_rl_accel, which is a C (or Java) extension.
Serious users ought to compile or download this!
The times quoted are from one machine (Andy Robinson's
home PC, approx 1.2Ghz 128Mb Ram, Win2k in Sep 2003)
in order to give a rough idea of what features cost
what performance.
The tests are as follows:
(1) odyssey.py (produces odyssey.pdf)
This demo takes a large volume of text and prints it
in the simplest way possible. It is a demo of the
basic technique of looping down a page manually and
breaking at the bottom. On my 1.2 Ghz machine this takes
1.91 seconds (124 pages per second)
(2) fodyssey.py (produces fodyssey.pdf)
This is a 'flowing document' we parse the file and
throw away line breaks to make proper paragraphs.
The Platypus framework renders these. This necessitates
measuring the width of every word in every paragraph
for wrapping purposes.
This takes 3.27 seconds on the same machine. Paragraph
wrapping basically doubles the work. The text is more
compact with about 50% more words per page. Very roughly,
we can wrap 40 pages of ten-point text per second and save
to PDF.
(3) dodyssey.py (produced dodyssey.pdf)
This is a slightly fancier version which uses different
page templates (one column for first page in a chapter,
two column for body poages). The additional layout logic
adds about 15%, going up to 3.8 seconds. This is probably
a realistic benchmark for a simple long text document
with a single pass. Documents doing cross-references
and a table of contents might need twice as long.

View File

@ -0,0 +1,254 @@
#Copyright ReportLab Europe Ltd. 2000-2004
#see license.txt for license details
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/demos/odyssey/dodyssey.py
__version__=''' $Id: dodyssey.py 2856 2006-05-11 09:48:13Z rgbecker $ '''
__doc__=''
#REPORTLAB_TEST_SCRIPT
import sys, copy, string, os
from reportlab.platypus import *
_NEW_PARA=os.environ.get('NEW_PARA','0')[0] in ('y','Y','1')
_REDCAP=int(os.environ.get('REDCAP','0'))
_CALLBACK=os.environ.get('CALLBACK','0')[0] in ('y','Y','1')
if _NEW_PARA:
def Paragraph(s,style):
from rlextra.radxml.para import Paragraph as PPPP
return PPPP(s,style)
from reportlab.lib.units import inch
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY
import reportlab.rl_config
reportlab.rl_config.invariant = 1
styles = getSampleStyleSheet()
Title = "The Odyssey"
Author = "Homer"
def myTitlePage(canvas, doc):
canvas.saveState()
canvas.restoreState()
def myLaterPages(canvas, doc):
canvas.saveState()
canvas.setFont('Times-Roman',9)
canvas.drawString(inch, 0.75 * inch, "Page %d" % doc.page)
canvas.restoreState()
def go():
def myCanvasMaker(fn,**kw):
from reportlab.pdfgen.canvas import Canvas
canv = apply(Canvas,(fn,),kw)
# attach our callback to the canvas
canv.myOnDrawCB = myOnDrawCB
return canv
doc = BaseDocTemplate('dodyssey.pdf',showBoundary=0)
#normal frame as for SimpleFlowDocument
frameT = Frame(doc.leftMargin, doc.bottomMargin, doc.width, doc.height, id='normal')
#Two Columns
frame1 = Frame(doc.leftMargin, doc.bottomMargin, doc.width/2-6, doc.height, id='col1')
frame2 = Frame(doc.leftMargin+doc.width/2+6, doc.bottomMargin, doc.width/2-6,
doc.height, id='col2')
doc.addPageTemplates([PageTemplate(id='First',frames=frameT, onPage=myTitlePage),
PageTemplate(id='OneCol',frames=frameT, onPage=myLaterPages),
PageTemplate(id='TwoCol',frames=[frame1,frame2], onPage=myLaterPages),
])
doc.build(Elements,canvasmaker=myCanvasMaker)
Elements = []
ChapterStyle = copy.deepcopy(styles["Heading1"])
ChapterStyle.alignment = TA_CENTER
ChapterStyle.fontsize = 14
InitialStyle = copy.deepcopy(ChapterStyle)
InitialStyle.fontsize = 16
InitialStyle.leading = 20
PreStyle = styles["Code"]
def newPage():
Elements.append(PageBreak())
chNum = 0
def myOnDrawCB(canv,kind,label):
print 'myOnDrawCB(%s)'%kind, 'Page number=', canv.getPageNumber(), 'label value=', label
def chapter(txt, style=ChapterStyle):
global chNum
Elements.append(NextPageTemplate('OneCol'))
newPage()
chNum = chNum + 1
if _NEW_PARA or not _CALLBACK:
Elements.append(Paragraph(('chap %d'%chNum)+txt, style))
else:
Elements.append(Paragraph(('foo<onDraw name="myOnDrawCB" label="chap %d"/> '%chNum)+txt, style))
Elements.append(Spacer(0.2*inch, 0.3*inch))
if useTwoCol:
Elements.append(NextPageTemplate('TwoCol'))
def fTitle(txt,style=InitialStyle):
Elements.append(Paragraph(txt, style))
ParaStyle = copy.deepcopy(styles["Normal"])
ParaStyle.spaceBefore = 0.1*inch
if 'right' in sys.argv:
ParaStyle.alignment = TA_RIGHT
elif 'left' in sys.argv:
ParaStyle.alignment = TA_LEFT
elif 'justify' in sys.argv:
ParaStyle.alignment = TA_JUSTIFY
elif 'center' in sys.argv or 'centre' in sys.argv:
ParaStyle.alignment = TA_CENTER
else:
ParaStyle.alignment = TA_JUSTIFY
useTwoCol = 'notwocol' not in sys.argv
def spacer(inches):
Elements.append(Spacer(0.1*inch, inches*inch))
def p(txt, style=ParaStyle):
if _REDCAP:
fs, fe = '<font color="red" size="+2">', '</font>'
n = len(txt)
for i in xrange(n):
if 'a'<=txt[i]<='z' or 'A'<=txt[i]<='Z':
txt = (txt[:i]+(fs+txt[i]+fe))+txt[i+1:]
break
if _REDCAP>=2 and n>20:
j = i+len(fs)+len(fe)+1+int((n-1)/2)
while not ('a'<=txt[j]<='z' or 'A'<=txt[j]<='Z'): j += 1
txt = (txt[:j]+('<b><i><font size="+2" color="blue">'+txt[j]+'</font></i></b>'))+txt[j+1:]
if _REDCAP==3 and n>20:
n = len(txt)
fs = '<font color="green" size="+1">'
for i in xrange(n-1,-1,-1):
if 'a'<=txt[i]<='z' or 'A'<=txt[i]<='Z':
txt = txt[:i]+((fs+txt[i]+fe)+txt[i+1:])
break
Elements.append(Paragraph(txt, style))
firstPre = 1
def pre(txt, style=PreStyle):
global firstPre
if firstPre:
Elements.append(NextPageTemplate('OneCol'))
newPage()
firstPre = 0
spacer(0.1)
p = Preformatted(txt, style)
Elements.append(p)
def parseOdyssey(fn):
from time import time
E = []
t0=time()
L = open(fn,'r').readlines()
t1 = time()
print "open(%s,'r').readlines() took %.4f seconds" %(fn,t1-t0)
for i in xrange(len(L)):
if L[i][-1]=='\012':
L[i] = L[i][:-1]
t2 = time()
print "Removing all linefeeds took %.4f seconds" %(t2-t1)
L.append('')
L.append('-----')
def findNext(L, i):
while 1:
if string.strip(L[i])=='':
del L[i]
kind = 1
if i<len(L):
while string.strip(L[i])=='':
del L[i]
if i<len(L):
kind = L[i][-1]=='-' and L[i][0]=='-'
if kind:
del L[i]
if i<len(L):
while string.strip(L[i])=='':
del L[i]
break
else:
i = i + 1
return i, kind
f = s = 0
while 1:
f, k = findNext(L,0)
if k: break
E.append([spacer,2])
E.append([fTitle,'<font color="red">%s</font>' % Title, InitialStyle])
E.append([fTitle,'<font size="-4">by</font> <font color="green">%s</font>' % Author, InitialStyle])
while 1:
if f>=len(L): break
if string.upper(L[f][0:5])=='BOOK ':
E.append([chapter,L[f]])
f=f+1
while string.strip(L[f])=='': del L[f]
style = ParaStyle
func = p
else:
style = PreStyle
func = pre
while 1:
s=f
f, k=findNext(L,s)
sep= (func is pre) and '\012' or ' '
E.append([func,string.join(L[s:f],sep),style])
if k: break
t3 = time()
print "Parsing into memory took %.4f seconds" %(t3-t2)
del L
t4 = time()
print "Deleting list of lines took %.4f seconds" %(t4-t3)
for i in xrange(len(E)):
apply(E[i][0],E[i][1:])
t5 = time()
print "Moving into platypus took %.4f seconds" %(t5-t4)
del E
t6 = time()
print "Deleting list of actions took %.4f seconds" %(t6-t5)
go()
t7 = time()
print "saving to PDF took %.4f seconds" %(t7-t6)
print "Total run took %.4f seconds"%(t7-t0)
import md5
print 'file digest: %s' % md5.md5(open('dodyssey.pdf','rb').read()).hexdigest()
def run():
for fn in ('odyssey.full.txt','odyssey.txt'):
if os.path.isfile(fn):
parseOdyssey(fn)
break
def doProf(profname,func,*args,**kwd):
import hotshot, hotshot.stats
prof = hotshot.Profile(profname)
prof.runcall(func)
prof.close()
stats = hotshot.stats.load(profname)
stats.strip_dirs()
stats.sort_stats('time', 'calls')
stats.print_stats(20)
if __name__=='__main__':
if '--prof' in sys.argv:
doProf('dodyssey.prof',run)
else:
run()

View File

@ -0,0 +1,165 @@
#Copyright ReportLab Europe Ltd. 2000-2004
#see license.txt for license details
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/demos/odyssey/fodyssey.py
__version__=''' $Id: fodyssey.py 2385 2004-06-17 15:26:05Z rgbecker $ '''
__doc__=''
#REPORTLAB_TEST_SCRIPT
import sys, copy, string, os
from reportlab.platypus import *
from reportlab.lib.units import inch
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY
styles = getSampleStyleSheet()
Title = "The Odyssey"
Author = "Homer"
def myFirstPage(canvas, doc):
canvas.saveState()
canvas.restoreState()
def myLaterPages(canvas, doc):
canvas.saveState()
canvas.setFont('Times-Roman',9)
canvas.drawString(inch, 0.75 * inch, "Page %d" % doc.page)
canvas.restoreState()
def go():
doc = SimpleDocTemplate('fodyssey.pdf',showBoundary='showboundary' in sys.argv)
doc.allowSplitting = not 'nosplitting' in sys.argv
doc.build(Elements,myFirstPage,myLaterPages)
Elements = []
ChapterStyle = copy.copy(styles["Heading1"])
ChapterStyle.alignment = TA_CENTER
ChapterStyle.fontsize = 16
InitialStyle = copy.deepcopy(ChapterStyle)
InitialStyle.fontsize = 16
InitialStyle.leading = 20
PreStyle = styles["Code"]
def newPage():
Elements.append(PageBreak())
def chapter(txt, style=ChapterStyle):
newPage()
Elements.append(Paragraph(txt, style))
Elements.append(Spacer(0.2*inch, 0.3*inch))
def fTitle(txt,style=InitialStyle):
Elements.append(Paragraph(txt, style))
ParaStyle = copy.deepcopy(styles["Normal"])
ParaStyle.spaceBefore = 0.1*inch
if 'right' in sys.argv:
ParaStyle.alignment = TA_RIGHT
elif 'left' in sys.argv:
ParaStyle.alignment = TA_LEFT
elif 'justify' in sys.argv:
ParaStyle.alignment = TA_JUSTIFY
elif 'center' in sys.argv or 'centre' in sys.argv:
ParaStyle.alignment = TA_CENTER
else:
ParaStyle.alignment = TA_JUSTIFY
def spacer(inches):
Elements.append(Spacer(0.1*inch, inches*inch))
def p(txt, style=ParaStyle):
Elements.append(Paragraph(txt, style))
def pre(txt, style=PreStyle):
spacer(0.1)
p = Preformatted(txt, style)
Elements.append(p)
def parseOdyssey(fn):
from time import time
E = []
t0=time()
L = open(fn,'r').readlines()
t1 = time()
print "open(%s,'r').readlines() took %.4f seconds" %(fn,t1-t0)
for i in xrange(len(L)):
if L[i][-1]=='\012':
L[i] = L[i][:-1]
t2 = time()
print "Removing all linefeeds took %.4f seconds" %(t2-t1)
L.append('')
L.append('-----')
def findNext(L, i):
while 1:
if string.strip(L[i])=='':
del L[i]
kind = 1
if i<len(L):
while string.strip(L[i])=='':
del L[i]
if i<len(L):
kind = L[i][-1]=='-' and L[i][0]=='-'
if kind:
del L[i]
if i<len(L):
while string.strip(L[i])=='':
del L[i]
break
else:
i = i + 1
return i, kind
f = s = 0
while 1:
f, k = findNext(L,0)
if k: break
E.append([spacer,2])
E.append([fTitle,'<font color=red>%s</font>' % Title, InitialStyle])
E.append([fTitle,'<font size=-4>by</font> <font color=green>%s</font>' % Author, InitialStyle])
while 1:
if f>=len(L): break
if string.upper(L[f][0:5])=='BOOK ':
E.append([chapter,L[f]])
f=f+1
while string.strip(L[f])=='': del L[f]
style = ParaStyle
func = p
else:
style = PreStyle
func = pre
while 1:
s=f
f, k=findNext(L,s)
sep= (func is pre) and '\012' or ' '
E.append([func,string.join(L[s:f],sep),style])
if k: break
t3 = time()
print "Parsing into memory took %.4f seconds" %(t3-t2)
del L
t4 = time()
print "Deleting list of lines took %.4f seconds" %(t4-t3)
for i in xrange(len(E)):
apply(E[i][0],E[i][1:])
t5 = time()
print "Moving into platypus took %.4f seconds" %(t5-t4)
del E
t6 = time()
print "Deleting list of actions took %.4f seconds" %(t6-t5)
go()
t7 = time()
print "saving to PDF took %.4f seconds" %(t7-t6)
print "Total run took %.4f seconds"%(t7-t0)
for fn in ('odyssey.full.txt','odyssey.txt'):
if os.path.isfile(fn):
break
if __name__=='__main__':
parseOdyssey(fn)

View File

@ -0,0 +1,151 @@
#Copyright ReportLab Europe Ltd. 2000-2004
#see license.txt for license details
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/demos/odyssey/odyssey.py
__version__=''' $Id: odyssey.py 2385 2004-06-17 15:26:05Z rgbecker $ '''
___doc__=''
#odyssey.py
#
#Demo/benchmark of PDFgen rendering Homer's Odyssey.
#results on my humble P266 with 64MB:
# Without page compression:
# 239 pages in 3.76 seconds = 77 pages per second
# With textOut rather than textLine, i.e. computing width
# of every word as we would for wrapping:
# 239 pages in 10.83 seconds = 22 pages per second
# With page compression and textLine():
# 239 pages in 39.39 seconds = 6 pages per second
from reportlab.pdfgen import canvas
import time, os, sys
#find out what platform we are on and whether accelerator is
#present, in order to print this as part of benchmark info.
try:
import _rl_accel
ACCEL = 1
except ImportError:
ACCEL = 0
from reportlab.lib.units import inch, cm
from reportlab.lib.pagesizes import A4
#precalculate some basics
top_margin = A4[1] - inch
bottom_margin = inch
left_margin = inch
right_margin = A4[0] - inch
frame_width = right_margin - left_margin
def drawPageFrame(canv):
canv.line(left_margin, top_margin, right_margin, top_margin)
canv.setFont('Times-Italic',12)
canv.drawString(left_margin, top_margin + 2, "Homer's Odyssey")
canv.line(left_margin, top_margin, right_margin, top_margin)
canv.line(left_margin, bottom_margin, right_margin, bottom_margin)
canv.drawCentredString(0.5*A4[0], 0.5 * inch,
"Page %d" % canv.getPageNumber())
def run(verbose=1):
if sys.platform[0:4] == 'java':
impl = 'Jython'
else:
impl = 'Python'
verStr = '%d.%d' % (sys.version_info[0:2])
if ACCEL:
accelStr = 'with _rl_accel'
else:
accelStr = 'without _rl_accel'
print 'Benchmark of %s %s %s' % (impl, verStr, accelStr)
started = time.time()
canv = canvas.Canvas('odyssey.pdf', invariant=1)
canv.setPageCompression(1)
drawPageFrame(canv)
#do some title page stuff
canv.setFont("Times-Bold", 36)
canv.drawCentredString(0.5 * A4[0], 7 * inch, "Homer's Odyssey")
canv.setFont("Times-Bold", 18)
canv.drawCentredString(0.5 * A4[0], 5 * inch, "Translated by Samuel Burton")
canv.setFont("Times-Bold", 12)
tx = canv.beginText(left_margin, 3 * inch)
tx.textLine("This is a demo-cum-benchmark for PDFgen. It renders the complete text of Homer's Odyssey")
tx.textLine("from a text file. On my humble P266, it does 77 pages per secondwhile creating a 238 page")
tx.textLine("document. If it is asked to computer text metrics, measuring the width of each word as ")
tx.textLine("one would for paragraph wrapping, it still manages 22 pages per second.")
tx.textLine("")
tx.textLine("Andy Robinson, Robinson Analytics Ltd.")
canv.drawText(tx)
canv.showPage()
#on with the text...
drawPageFrame(canv)
canv.setFont('Times-Roman', 12)
tx = canv.beginText(left_margin, top_margin - 0.5*inch)
for fn in ('odyssey.full.txt','odyssey.txt'):
if os.path.isfile(fn):
break
data = open(fn,'r').readlines()
for line in data:
#this just does it the fast way...
tx.textLine(line)
#this forces it to do text metrics, which would be the slow
#part if we were wrappng paragraphs.
#canv.textOut(line)
#canv.textLine('')
#page breaking
y = tx.getY() #get y coordinate
if y < bottom_margin + 0.5*inch:
canv.drawText(tx)
canv.showPage()
drawPageFrame(canv)
canv.setFont('Times-Roman', 12)
tx = canv.beginText(left_margin, top_margin - 0.5*inch)
#page
pg = canv.getPageNumber()
if verbose and pg % 10 == 0:
print 'formatted page %d' % canv.getPageNumber()
if tx:
canv.drawText(tx)
canv.showPage()
drawPageFrame(canv)
if verbose:
print 'about to write to disk...'
canv.save()
finished = time.time()
elapsed = finished - started
pages = canv.getPageNumber()-1
speed = pages / elapsed
fileSize = os.stat('odyssey.pdf')[6] / 1024
print '%d pages in %0.2f seconds = %0.2f pages per second, file size %d kb' % (
pages, elapsed, speed, fileSize)
import md5
print 'file digest: %s' % md5.md5(open('odyssey.pdf','rb').read()).hexdigest()
if __name__=='__main__':
quiet = ('-q' in sys.argv)
run(verbose = not quiet)

View File

@ -0,0 +1,207 @@
Provided by The Internet Classics Archive.
See bottom for copyright. Available online at
http://classics.mit.edu//Homer/odyssey.html
The Odyssey
By Homer
Translated by Samuel Butler
----------------------------------------------------------------------
BOOK I
<bullet indent="-18"><font name="courier" size="13" color="blue">I</font></bullet><font color="green"><b><i>Tell</i></b></font> me, O muse, of that ingenious hero who travelled far and wide
a b c &amp;| &amp; | <b>A</b>' <b>A</b> ' after he had sacked the famous town of <font color="red" size="12"><b>Troy</b></font>. Many cities did he visit,
and many were the nations with whose manners and customs he was acquainted;
moreover he suffered much by sea while trying to save his own life
and bring his men safely home; but do what he might he could not
save<super><font color="red">1</font></super>
his men, for they perished through their own sheer folly in eating
the cattle of the Sun-god Hyperion; so the god prevented them from
ever reaching home. Tell me, too, about all these things, O daughter
of Jove, from whatsoever source you may know them.
So now all who escaped death in battle or by shipwreck had got safely
home except Ulysses, and he, though he was longing to return to his
wife and country, was detained by the goddess Calypso, who had got
him into a large cave and wanted to marry him. But as years went by,
there came a time when the gods settled that he should go back to
Ithaca; even then, however, when he was among his own people, his
troubles were not yet over; nevertheless all the gods had now begun
to pity him except Neptune, who still persecuted him without ceasing
and would not let him get home.
<font color="green">Now Neptune had gone off to the Ethiopians, who are at the world's
end, and lie in two halves, the one looking West and the other East.
He had gone there to accept a hecatomb of sheep and oxen, and was
enjoying himself at his festival; but the other gods met in the house
of Olympian Jove, and the sire of gods and men spoke first. At that
moment he was thinking of Aegisthus, who had been killed by Agamemnon's
son Orestes; so he said to the other gods:</font>
"See now, how men lay blame upon us gods for what is after all nothing
but their own folly. Look at Aegisthus; he must needs make love to
Agamemnon's wife unrighteously and then kill Agamemnon, though he
knew it would be the death of him; for I sent Mercury to warn him
not to do either of these things, inasmuch as Orestes would be sure
to take his revenge when he grew up and wanted to return home. Mercury
told him this in all good will but he would not listen, and now he
has paid for everything in full."
Then Minerva said, "Father, son of Saturn, King of kings, it served
Aegisthus right, and so it would any one else who does as he did;
but Aegisthus is neither here nor there; it is for Ulysses that my
heart bleeds, when I think of his sufferings in that lonely sea-girt
island, far away, poor man, from all his friends. It is an island
covered with forest, in the very middle of the sea, and a goddess
lives there, daughter of the magician Atlas, who looks after the bottom
of the ocean, and carries the great columns that keep heaven and earth
asunder. This daughter of Atlas has got hold of poor unhappy Ulysses,
and keeps trying by every kind of blandishment to make him forget
his home, so that he is tired of life, and thinks of nothing but how
he may once more see the smoke of his own chimneys. You, sir, take
no heed of this, and yet when Ulysses was before Troy did he not propitiate
you with many a burnt sacrifice? Why then should you keep on being
so angry with him?"
And Jove said, "My child, what are you talking about? How can I forget
Ulysses than whom there is no more capable man on earth, nor more
liberal in his offerings to the immortal gods that live in heaven?
Bear in mind, however, that Neptune is still furious with Ulysses
for having blinded an eye of Polyphemus king of the Cyclopes. Polyphemus
is son to Neptune by the nymph Thoosa, daughter to the sea-king Phorcys;
therefore though he will not kill Ulysses outright, he torments him
by preventing him from getting home. Still, let us lay our heads together
and see how we can help him to return; Neptune will then be pacified,
for if we are all of a mind he can hardly stand out against us."
And Minerva said, "Father, son of Saturn, King of kings, if, then,
the gods now mean that Ulysses should get home, we should first send
Mercury to the Ogygian island to tell Calypso that we have made up
our minds and that he is to return. In the meantime I will go to Ithaca,
to put heart into Ulysses' son Telemachus; I will embolden him to
call the Achaeans in assembly, and speak out to the suitors of his
mother Penelope, who persist in eating up any number of his sheep
and oxen; I will also conduct him to Sparta and to Pylos, to see if
he can hear anything about the return of his dear father- for this
will make people speak well of him."
Ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis.
Ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis.
Ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis.
Ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis.
Ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis.
Ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis.
Ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis.
Ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis.
Ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis ellipsis.
"Men of Ithaca, it is all your own fault that things have turned out
as they have; you would not listen to me, nor yet to Mentor, when
we bade you check the folly of your sons who were doing much wrong
in the wantonness of their hearts- wasting the substance and dishonouring
the wife of a chieftain who they thought would not return. Now, however,
let it be as I say, and do as I tell you. Do not go out against Ulysses,
or you may find that you have been drawing down evil on your own heads."
This was what he said, and more than half raised a loud shout, and
at once left the assembly. But the rest stayed where they were, for
the speech of Halitherses displeased them, and they sided with Eupeithes;
they therefore hurried off for their armour, and when they had armed
themselves, they met together in front of the city, and Eupeithes
led them on in their folly. He thought he was going to avenge the
murder of his son, whereas in truth he was never to return, but was
himself to perish in his attempt.
Then Minerva said to Jove, "Father, son of Saturn, king of kings,
answer me this question- What do you propose to do? Will you set them
fighting still further, or will you make peace between them?"
And Jove answered, "My child, why should you ask me? Was it not by
your own arrangement that Ulysses came home and took his revenge upon
the suitors? Do whatever you like, but I will tell you what I think
will be most reasonable arrangement. Now that Ulysses is revenged,
let them swear to a solemn covenant, in virtue of which he shall continue
to rule, while we cause the others to forgive and forget the massacre
of their sons and brothers. Let them then all become friends as heretofore,
and let peace and plenty reign."
This was what Minerva was already eager to bring about, so down she
darted from off the topmost summits of Olympus.
Now when Laertes and the others had done dinner, Ulysses began by
saying, "Some of you go out and see if they are not getting close
up to us." So one of Dolius's sons went as he was bid. Standing on
the threshold he could see them all quite near, and said to Ulysses,
"Here they are, let us put on our armour at once."
They put on their armour as fast as they could- that is to say Ulysses,
his three men, and the six sons of Dolius. Laertes also and Dolius
did the same- warriors by necessity in spite of their grey hair. When
they had all put on their armour, they opened the gate and sallied
forth, Ulysses leading the way.
Then Jove's daughter Minerva came up to them, having assumed the form
and voice of Mentor. Ulysses was glad when he saw her, and said to
his son Telemachus, "Telemachus, now that are about to fight in an
engagement, which will show every man's mettle, be sure not to disgrace
your ancestors, who were eminent for their strength and courage all
the world over."
"You say truly, my dear father," answered Telemachus, "and you shall
see, if you will, that I am in no mind to disgrace your family."
Laertes was delighted when he heard this. "Good heavens, he exclaimed,
"what a day I am enjoying: I do indeed rejoice at it. My son and grandson
are vying with one another in the matter of valour."
On this Minerva came close up to him and said, "Son of Arceisius-
best friend I have in the world- pray to the blue-eyed damsel, and
to Jove her father; then poise your spear and hurl it."
As she spoke she infused fresh vigour into him, and when he had prayed
to her he poised his spear and hurled it. He hit Eupeithes' helmet,
and the spear went right through it, for the helmet stayed it not,
and his armour rang rattling round him as he fell heavily to the ground.
Meantime Ulysses and his son fell the front line of the foe and smote
them with their swords and spears; indeed, they would have killed
every one of them, and prevented them from ever getting home again,
only Minerva raised her voice aloud, and made every one pause. "Men
of Ithaca," she cried, cease this dreadful war, and settle the matter
at once without further bloodshed."
On this pale fear seized every one; they were so frightened that their
arms dropped from their hands and fell upon the ground at the sound
of the goddess's voice, and they fled back to the city for their lives.
But Ulysses gave a great cry, and gathering himself together swooped
down like a soaring eagle. Then the son of Saturn sent a thunderbolt
of fire that fell just in front of Minerva, so she said to Ulysses,
"Ulysses, noble son of Laertes, stop this warful strife, or Jove will
be angry with you."
Thus spoke Minerva, and Ulysses obeyed her gladly. Then Minerva assumed
the form and voice of Mentor, and presently made a covenant of peace
between the two contending parties.
THE END
----------------------------------------------------------------------
Copyright statement:
The Internet Classics Archive by Daniel C. Stevenson, Web Atomics.
World Wide Web presentation is copyright (C) 1994-1998, Daniel
C. Stevenson, Web Atomics.
All rights reserved under international and pan-American copyright
conventions, including the right of reproduction in whole or in part
in any form. Direct permission requests to classics@classics.mit.edu.
Translation of "The Deeds of the Divine Augustus" by Augustus is
copyright (C) Thomas Bushnell, BSG.
To really test that reportlab can produce pages quickly download the
complete version of the test from http://classics.mit.edu//Homer/odyssey.html
and copy it to this directory as odyssey.full.txt.
A zipped version of the full text is available for download at
ftp://ftp.reportlab.com/odyssey.full.zip

View File

@ -0,0 +1,73 @@
# rlzope : an external Zope method to show people how to use
# the ReportLab toolkit from within Zope.
#
# this method searches an image named 'logo' in the
# ZODB then prints it at the top of a simple PDF
# document made with ReportLab
#
# the resulting PDF document is returned to the
# user's web browser and, if possible, it is
# simultaneously saved into the ZODB.
#
# this method illustrates how to use both the platypus
# and canvas frameworks.
#
# License : The ReportLab Toolkit's license (similar to BSD)
#
# Author : Jerome Alet - alet@unice.fr
#
Installation instructions :
===========================
0 - If not installed then install Zope.
1 - Install reportlab in the Zope/lib/python/Shared directory by unpacking
the tarball and putting a reportlabs.pth file in site-packages for the Zope
used with Python. The path value in the reportlabs.pth file must be
relative. For a typical Zope installation, the path is "../../python/Shared".
Remember to restart Zope so the new path is instantiated.
2 - Install PIL in the Zope/lib/python/Shared directory. You need to
ensure that the _imaging.so or .pyd is also installed appropriately.
It should be compatible with the python running the zope site.
3 - Copy rlzope.py to your Zope installation's "Extensions"
subdirectory, e.g. /var/lib/zope/Extensions/ under Debian GNU/Linux.
4 - From within Zope's management interface, add an External Method with
these parameters :
Id : rlzope
Title : rlzope
Module Name : rlzope
Function Name : rlzope
5 - From within Zope's management interface, add an image called "logo"
in the same Folder than rlzope, or somewhere above in the Folder
hierarchy. For example you can use ReportLab's logo which you
can find in reportlab/docs/images/replogo.gif
6 - Point your web browser to rlzope, e.g. on my laptop under
Debian GNU/Linux :
http://localhost:9673/rlzope
This will send a simple PDF document named 'dummy.pdf' to your
web browser, and if possible save it as a File object in the
Zope Object DataBase, with this name. Note, however, that if
an object with the same name already exists then it won't
be replaced for security reasons.
You can optionally add a parameter called 'name' with
a filename as the value, to specify another filename,
e.g. :
logo
http://localhost:9673/rlzope?name=sample.pdf
7 - Adapt it to your own needs.
8 - Enjoy !
Send comments or bug reports at : alet@unice.fr

View File

@ -0,0 +1,169 @@
#
# Using the ReportLab toolkit from within Zope
#
# WARNING : The MyPDFDoc class deals with ReportLab's platypus framework,
# while the MyPageTemplate class directly deals with ReportLab's
# canvas, this way you know how to do with both...
#
# License : the ReportLab Toolkit's one
# see : http://www.reportlab.com
#
# Author : Jerome Alet - alet@unice.fr
#
#
import string, cStringIO
try :
from Shared.reportlab.platypus.paragraph import Paragraph
from Shared.reportlab.platypus.doctemplate import *
from Shared.reportlab.lib.units import inch
from Shared.reportlab.lib import styles
from Shared.reportlab.lib.utils import ImageReader
except ImportError :
from reportlab.platypus.paragraph import Paragraph
from reportlab.platypus.doctemplate import *
from reportlab.lib.units import inch
from reportlab.lib import styles
from reportlab.lib.utils import ImageReader
class MyPDFDoc :
class MyPageTemplate(PageTemplate) :
"""Our own page template."""
def __init__(self, parent) :
"""Initialise our page template."""
#
# we must save a pointer to our parent somewhere
self.parent = parent
# Our doc is made of a single frame
content = Frame(0.75 * inch, 0.5 * inch, parent.document.pagesize[0] - 1.25 * inch, parent.document.pagesize[1] - (1.5 * inch))
PageTemplate.__init__(self, "MyTemplate", [content])
# get all the images we need now, in case we've got
# several pages this will save some CPU
self.logo = self.getImageFromZODB("logo")
def getImageFromZODB(self, name) :
"""Retrieves an Image from the ZODB, converts it to PIL,
and makes it 0.75 inch high.
"""
try :
# try to get it from ZODB
logo = getattr(self.parent.context, name)
except AttributeError :
# not found !
return None
# Convert it to PIL
image = ImageReader(cStringIO.StringIO(str(logo.data)))
(width, height) = image.getSize()
# scale it to be 0.75 inch high
multi = ((height + 0.0) / (0.75 * inch))
width = int(width / multi)
height = int(height / multi)
return ((width, height), image)
def beforeDrawPage(self, canvas, doc) :
"""Draws a logo and an contribution message on each page."""
canvas.saveState()
if self.logo is not None :
# draws the logo if it exists
((width, height), image) = self.logo
canvas.drawImage(image, inch, doc.pagesize[1] - inch, width, height)
canvas.setFont('Times-Roman', 10)
canvas.drawCentredString(inch + (doc.pagesize[0] - (1.5 * inch)) / 2, 0.25 * inch, "Contributed by Jerome Alet - alet@unice.fr")
canvas.restoreState()
def __init__(self, context, filename) :
# save some datas
self.context = context
self.built = 0
self.objects = []
# we will build an in-memory document
# instead of creating an on-disk file.
self.report = cStringIO.StringIO()
# initialise a PDF document using ReportLab's platypus
self.document = BaseDocTemplate(self.report)
# add our page template
# (we could add more than one, but I prefer to keep it simple)
self.document.addPageTemplates(self.MyPageTemplate(self))
# get the default style sheets
self.StyleSheet = styles.getSampleStyleSheet()
# then build a simple doc with ReportLab's platypus
sometext = "A sample script to show how to use ReportLab from within Zope"
url = self.escapexml(context.absolute_url())
urlfilename = self.escapexml(context.absolute_url() + '/%s' % filename)
self.append(Paragraph("Using ReportLab from within Zope", self.StyleSheet["Heading3"]))
self.append(Spacer(0, 10))
self.append(Paragraph("You launched it from : %s" % url, self.StyleSheet['Normal']))
self.append(Spacer(0, 40))
self.append(Paragraph("If possible, this report will be automatically saved as : %s" % urlfilename, self.StyleSheet['Normal']))
# generation du document PDF
self.document.build(self.objects)
self.built = 1
def __str__(self) :
"""Returns the PDF document as a string of text, or None if it's not ready yet."""
if self.built :
return self.report.getvalue()
else :
return None
def append(self, object) :
"""Appends an object to our platypus "story" (using ReportLab's terminology)."""
self.objects.append(object)
def escapexml(self, s) :
"""Escape some xml entities."""
s = string.strip(s)
s = string.replace(s, "&", "&amp;")
s = string.replace(s, "<", "&lt;")
return string.replace(s, ">", "&gt;")
def rlzope(self) :
"""A sample external method to show people how to use ReportLab from within Zope."""
try:
#
# which file/object name to use ?
# append ?name=xxxxx to rlzope's url to
# choose another name
filename = self.REQUEST.get("name", "dummy.pdf")
if filename[-4:] != '.pdf' :
filename = filename + '.pdf'
# tell the browser we send some PDF document
# with the requested filename
# get the document's content itself as a string of text
content = str(MyPDFDoc(self, filename))
# we will return it to the browser, but before that we also want to
# save it into the ZODB into the current folder
try :
self.manage_addFile(id = filename, file = content, title = "A sample PDF document produced with ReportLab", precondition = '', content_type = "application/pdf")
except :
# it seems an object with this name already exists in the ZODB:
# it's more secure to not replace it, since we could possibly
# destroy an important PDF document of this name.
pass
self.REQUEST.RESPONSE.setHeader('Content-Type', 'application/pdf')
self.REQUEST.RESPONSE.setHeader('Content-Disposition', 'attachment; filename=%s' % filename)
except:
import traceback, sys, cgi
content = sys.stdout = sys.stderr = cStringIO.StringIO()
self.REQUEST.RESPONSE.setHeader('Content-Type', 'text/html')
traceback.print_exc()
sys.stdout = sys.__stdout__
sys.stderr = sys.__stderr__
content = '<html><head></head><body><pre>%s</pre></body></html>' % cgi.escape(content.getvalue())
# then we also return the PDF content to the browser
return content

View File

@ -0,0 +1,7 @@
This lists out the standard 14 fonts
in a very plain and simple fashion.
Notably, the output is huge - it makes
two separate text objects for each glyph.
Smarter programming would make tighter
PDF, but more lines of Python!

View File

@ -0,0 +1,74 @@
#Copyright ReportLab Europe Ltd. 2000-2004
#see license.txt for license details
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/demos/stdfonts/stdfonts.py
__version__=''' $Id: stdfonts.py 2830 2006-04-05 15:18:32Z rgbecker $ '''
__doc__="""
This generates tables showing the 14 standard fonts in both
WinAnsi and MacRoman encodings, and their character codes.
Supply an argument of 'hex' or 'oct' to get code charts
in those encodings; octal is what you need for \\n escape
sequences in Python literals.
usage: standardfonts.py [dec|hex|oct]
"""
import sys
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfgen import canvas
import string
label_formats = {'dec':('%d=', 'Decimal'),
'oct':('%o=','Octal'),
'hex':('0x%x=', 'Hexadecimal')}
def run(mode):
label_formatter, caption = label_formats[mode]
for enc in ['MacRoman', 'WinAnsi']:
canv = canvas.Canvas(
'StandardFonts_%s.pdf' % enc,
)
canv.setPageCompression(0)
for faceName in pdfmetrics.standardFonts:
if faceName in ['Symbol', 'ZapfDingbats']:
encLabel = faceName+'Encoding'
else:
encLabel = enc + 'Encoding'
fontName = faceName + '-' + encLabel
pdfmetrics.registerFont(pdfmetrics.Font(fontName,
faceName,
encLabel)
)
canv.setFont('Times-Bold', 18)
canv.drawString(80, 744, fontName)
canv.setFont('Times-BoldItalic', 12)
canv.drawRightString(515, 744, 'Labels in ' + caption)
#for dingbats, we need to use another font for the numbers.
#do two parallel text objects.
for byt in range(32, 256):
col, row = divmod(byt - 32, 32)
x = 72 + (66*col)
y = 720 - (18*row)
canv.setFont('Helvetica', 14)
canv.drawString(x, y, label_formatter % byt)
canv.setFont(fontName, 14)
canv.drawString(x+44, y, chr(byt).decode(encLabel,'ignore').encode('utf8'))
canv.showPage()
canv.save()
if __name__ == '__main__':
if len(sys.argv)==2:
mode = string.lower(sys.argv[1])
if mode not in ['dec','oct','hex']:
print __doc__
elif len(sys.argv) == 1:
mode = 'dec'
run(mode)
else:
print __doc__

View File

@ -0,0 +1,14 @@
#!/bin/env python
#Copyright ReportLab Europe Ltd. 2000-2004
#see license.txt for license details
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/demos/tests/testdemos.py
__version__=''' $Id: testdemos.py 2385 2004-06-17 15:26:05Z rgbecker $ '''
__doc__='Test all demos'
_globals=globals().copy()
import os, sys
from reportlab import pdfgen
for p in ('pythonpoint/pythonpoint.py','stdfonts/stdfonts.py','odyssey/odyssey.py', 'gadflypaper/gfe.py'):
fn = os.path.normcase(os.path.normpath(os.path.join(os.path.dirname(pdfgen.__file__),'..','demos',p)))
os.chdir(os.path.dirname(fn))
execfile(fn,_globals.copy())