[Scribus] Python plugin for html import
Henning Schröder
scribus
Thu Jul 31 05:02:36 CEST 2003
Hi!
To test the scripting facilities I wrote a rudimentary html import filter.
Working with the plugin is really fun - the api is good documented and
really easy :-)
Unfortunately for a complete filter I would need extended bindings.
For example I only can set the font size for a complete textbox (selecting
text regions would be nice!) and I don't have access to styles.
There is a great potential for extensions I think.
Even some larger (less time consuming) core parts could be done in Python in a
much faster easier way than in C++.
Besides better access to the document and its objects it would be cool if I
could connect my script to other menus or toolbar buttons.
The attachment contains the html import filter.
Henning
-------------- next part --------------
from scribus import *
from sgmllib import SGMLParser
from htmlentitydefs import entitydefs
import os
DEFAULT_SIZE = 10
HEADERS = {'h1': 48, 'h2': 36, 'h3': 24,
'h4': 18, 'h5': 14, 'h6': 12 }
NEWLINE = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'br', 'p', 'li', 'div', 'tr']
class HTMLParser(SGMLParser):
def __init__(self, textbox):
SGMLParser.__init__(self)
self.in_body = 0
self.textbox = textbox
self.textbox.setSize(DEFAULT_SIZE)
def start_body(self, attrs):
self.in_body = 1
def end_body(self):
self.in_body = 0
def unknown_starttag(self, name, attrs):
if name in NEWLINE:
self.textbox.append('\n')
if name in HEADERS.keys():
self.textbox.setSize(HEADERS[name])
def unknown_endtag(self, name):
if name in HEADERS.keys():
self.textbox.setSize(DEFAULT_SIZE)
if name in NEWLINE:
self.textbox.append('\n')
def handle_data(self, raw_data):
if self.in_body:
data = ' '.join(
raw_data.replace('\n', ' ').split())
if raw_data.startswith(' '):
data = ' ' + data
if raw_data.endswith(' ') and len(raw_data) > 1:
data = data + ' '
self.textbox.append(data)
def unknown_entityref(self, entity):
self.handle_data(entitydefs.get(entity, ''))
def openFileDialog():
cmd = 'kdialog --getopenfilename "." "*.html|*.htm"'
pipe = os.popen(cmd)
filename = pipe.read()[:-1]
pipe.close()
return filename
def messageBox(msg):
cmd = 'kdialog --msgbox "%s"' % msg
os.system(cmd)
class TextBox:
def __init__(self, x, y, width, height):
self.boxid = CreateText(x, y, width, height)
def setSize(self, size):
# XXX: disabled, doesn't work like expected
pass #SetFontSize(size, self.boxid)
def append(self, text):
InsertText(text, GetTextLength(self.boxid),
self.boxid)
def main():
if HaveDoc():
filename = openFileDialog()
if filename:
SetUnit(1)
textbox = TextBox(20, 20, 70, 250)
parser = HTMLParser(textbox)
parser.feed(open(filename).read())
else:
messageBox("No document selected")
main()
More information about the scribus
mailing list