biopdf logo

Auto Keywords Using OnPreprocessText Event Handler

This example shows you how to parse a text representation of the current print job. The parsed values can be used to modify the configuration. An OnPreprocessText event handler is implemented in the example code below. It will count instances of all words in the document and sort them to get the top 5 words used in the document. A list of keywords will be created from this from the most used words and the configuration is changed to set the keywords document property for the created PDF document.

Please note that the OnConfigLoaded event is used to change the ExtractText configuration to yes. This makes the PDF writer create the text file representation of the current print job. The ExtractText configuration can also be specified in the runonce.ini or any other settings file.

Rem -- This script will illustrate how to extract and process the text 
Rem -- of the printed output.

Sub OnConfigLoaded()
    Rem -- Modify the configuration to extract text from the printer 
    Rem -- output.
    Context("Config")("extracttext") = "yes"
End Sub

Sub OnPreprocessText()
    Const ForReading = 1
    Dim fn, f, fso, cnt
    Dim d, i, word, a
    Dim keywords
    
    Rem -- Get the name of the text file from the context object
    fn = Context("TextFileName")
    
    Rem -- Count the pages of the text file. Each page is separated 
    Rem -- by a formfeed character chr(12).
    Set d = CreateObject("Scripting.Dictionary")
    Set fso = CreateObject("Scripting.FilesystemObject")
    Set f = fso.OpenTextFile(fn, ForReading)
    While Not f.AtEndOfStream
        l = f.ReadLine()

        Rem -- Count the words
        a = Split(l, " ")
        For i = LBound(a) To UBound(a)
            word = LCase(a(i))
            If Len(word) > 3 Then d(word) = d(word) + 1
        Next
    Wend
    f.Close

    Rem -- Sort the list of words
    SortDictionary d, "desc"

    Rem -- Pick the first 5 words
    keywords = ""
    cnt = 0
    For Each word In d.keys
        cnt = cnt + 1
        If keywords <> "" Then keywords = keywords & " "
        keywords = keywords & word
        If cnt = 5 Then Exit For
    Next
    
    Rem -- Set the author value in the configuration
    Context("Config")("keywords") = keywords
End Sub

Rem -- Sort the dictionary values.
Rem -- The direction parameter must be either "asc" or "desc".
Sub SortDictionary(ByRef d, ByVal direction)
    Dim retv
    Dim max, k, maxkey
    
    direction = LCase(direction)
    If direction <> "asc" And direction <> "desc" Then 
        Err.Raise 1000, , "Direction parameter must be " & _
            "either asc or desc in call to SortDictionary."
    End If
    
    Set retv = CreateObject("Scripting.Dictionary")
    While d.Count > 0
        max = Empty
        maxkey = ""
        For Each k In d.keys
            If (d(k) > max And direction = "desc"Or _
                (d(k) < max And direction = "asc"Or _
                IsEmpty(max) Then

                max = d(k)
                maxkey = k
            End If
        Next
        retv(maxkey) = d(maxkey)
        d.Remove maxkey
    Wend
    Set d = retv
End Sub

Download Example Files

You can download and run the example yourself. The files needed are available here. The VBS file must be placed in the macros sub folder of the PDF writer installation. You can use the MacroDir setting to change the location of the VBS files if needed.

Example files (zip archive)

 
McAfee SiteAdvisor Norton Safe Web