a b/src/internfile/mh_xslt.cpp
1
/* Copyright (C) 2005 J.F.Dockes 
2
 *   This program is free software; you can redistribute it and/or modify
3
 *   it under the terms of the GNU General Public License as published by
4
 *   the Free Software Foundation; either version 2 of the License, or
5
 *   (at your option) any later version.
6
 *
7
 *   This program is distributed in the hope that it will be useful,
8
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 *   GNU General Public License for more details.
11
 *
12
 *   You should have received a copy of the GNU General Public License
13
 *   along with this program; if not, write to the
14
 *   Free Software Foundation, Inc.,
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16
 */
17
#include "autoconfig.h"
18
19
#include <libxml/parser.h>
20
#include <libxml/tree.h>
21
#include <libxslt/transform.h>
22
#include <libxslt/xsltInternals.h>
23
#include <libxslt/xsltutils.h>
24
25
#include "cstr.h"
26
#include "mh_xslt.h"
27
#include "log.h"
28
#include "smallut.h"
29
#include "md5ut.h"
30
#include "rclconfig.h"
31
#include "readfile.h"
32
33
using namespace std;
34
35
36
class FileScanXML : public FileScanDo {
37
public:
38
    FileScanXML(const string& fn) : m_fn(fn) {}
39
    virtual ~FileScanXML() {
40
        if (ctxt) {
41
            xmlFreeParserCtxt(ctxt);
42
        }
43
    }
44
45
    xmlDocPtr getDoc() {
46
        int ret;
47
        if ((ret = xmlParseChunk(ctxt, nullptr, 0, 1))) {
48
            xmlError *error = xmlGetLastError();
49
            LOGERR("FileScanXML: final xmlParseChunk failed with error " <<
50
                   ret << " error: " <<
51
                   (error ? error->message :
52
                    " null return from xmlGetLastError()") << "\n");
53
            return nullptr;
54
        }
55
        return ctxt->myDoc;
56
    }
57
58
    virtual bool init(int64_t size, string *) {
59
        LOGDEB1("FileScanXML: init: size " << size << endl);
60
        ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, m_fn.c_str());
61
        if (ctxt == nullptr) {
62
            LOGERR("FileScanXML: xmlCreatePushParserCtxt failed\n");
63
            return false;
64
        } else {
65
            return true;
66
        }
67
    }
68
    
69
    virtual bool data(const char *buf, int cnt, string*) {
70
        if (0) {
71
            string dt(buf, cnt);
72
            LOGDEB1("FileScanXML: data: cnt " << cnt << " data " << dt << endl);
73
        } else {
74
            LOGDEB1("FileScanXML: data: cnt " << cnt << endl);
75
        }            
76
        int ret;
77
        if ((ret = xmlParseChunk(ctxt, buf, cnt, 0))) {
78
            xmlError *error = xmlGetLastError();
79
            LOGERR("FileScanXML: xmlParseChunk failed with error " <<
80
                   ret << " for [" << buf << "] error " <<
81
                   (error ? error->message :
82
                    " null return from xmlGetLastError()") << "\n");
83
            return false;
84
        } else {
85
            LOGDEB1("xmlParseChunk ok (sent " << cnt << " bytes)\n");
86
            return true;
87
        }
88
    }
89
90
private:
91
    xmlParserCtxtPtr ctxt{nullptr};
92
    string m_fn;
93
};
94
95
class MimeHandlerXslt::Internal {
96
public:
97
    ~Internal() {
98
        if (metaOrAllSS) {
99
            xsltFreeStylesheet(metaOrAllSS);
100
        }
101
        if (dataSS) {
102
            xsltFreeStylesheet(dataSS);
103
        }
104
    }
105
    bool ok{false};
106
    xsltStylesheet *metaOrAllSS{nullptr};
107
    xsltStylesheet *dataSS{nullptr};
108
    string result;
109
};
110
111
MimeHandlerXslt::~MimeHandlerXslt()
112
{
113
    delete m;
114
}
115
116
MimeHandlerXslt::MimeHandlerXslt(RclConfig *cnf, const std::string& id,
117
                                 const std::vector<std::string>& params)
118
    : RecollFilter(cnf, id), m(new Internal)
119
{
120
    LOGDEB("MimeHandlerXslt: params: " << stringsToString(params) << endl);
121
    string filtersdir = path_cat(cnf->getDatadir(), "filters");
122
123
    xmlSubstituteEntitiesDefault(0);
124
    xmlLoadExtDtdDefaultValue = 0;
125
126
    // params can be "xslt stylesheetall" or
127
    // "xslt metamember stylesheetmeta datamember stylesheetdata"
128
    if (params.size() == 2) {
129
        string ssfn = path_cat(filtersdir, params[1]);
130
        FileScanXML XMLstyle(ssfn);
131
        string reason;
132
        if (!file_scan(ssfn, &XMLstyle, &reason)) {
133
            LOGERR("MimeHandlerXslt: file_scan failed for style sheet " <<
134
                   ssfn << " : " << reason << endl);
135
            return;
136
        }
137
        xmlDoc *stl = XMLstyle.getDoc();
138
        if (stl == nullptr) {
139
            LOGERR("MimeHandlerXslt: getDoc failed for style sheet " <<
140
                   ssfn << endl);
141
            return;
142
        }
143
        m->metaOrAllSS = xsltParseStylesheetDoc(stl);
144
        if (m->metaOrAllSS) {
145
            m->ok = true;
146
        }
147
    } else if (params.size() == 4) {
148
    } else {
149
        LOGERR("MimeHandlerXslt: constructor with wrong param vector: " <<
150
               stringsToString(params) << endl);
151
    }
152
}
153
154
bool MimeHandlerXslt::set_document_file_impl(const std::string& mt, 
155
                                             const std::string &file_path)
156
{
157
    LOGDEB0("MimeHandlerXslt::set_document_file_: fn: " << file_path << endl);
158
    if (!m || !m->ok) {
159
        return false;
160
    }
161
    if (nullptr == m->dataSS) {
162
        if (nullptr == m->metaOrAllSS) {
163
            LOGERR("MimeHandlerXslt::set_document_file_impl: both ss empty??\n");
164
            return false;
165
        }
166
        FileScanXML XMLdoc(file_path);
167
        string md5, reason;
168
        if (!file_scan(file_path, &XMLdoc, 0, -1, &reason,
169
                       m_forPreview ? nullptr : &md5)) {
170
            LOGERR("MimeHandlerXslt::set_document_file_impl: file_scan failed "
171
                   "for " << file_path << " : " << reason << endl);
172
            return false;
173
        }
174
        if (!m_forPreview) {
175
            m_metaData[cstr_dj_keymd5] = md5;
176
        }
177
        xmlDocPtr doc = XMLdoc.getDoc();
178
        if (nullptr == doc) {
179
            LOGERR("MimeHandlerXslt::set_doc_file_impl: no parsed doc\n");
180
            return false;
181
        }
182
        xmlDocPtr transformed = xsltApplyStylesheet(m->metaOrAllSS, doc, NULL);
183
        if (nullptr == transformed) {
184
            LOGERR("MimeHandlerXslt::set_doc_file_: xslt transform failed\n");
185
            xmlFreeDoc(doc);
186
            return false;
187
        }
188
        xmlChar *outstr;
189
        int outlen;
190
        xsltSaveResultToString(&outstr, &outlen, transformed, m->metaOrAllSS);
191
        m->result = string((const char*)outstr, outlen);
192
        xmlFree(outstr);
193
        xmlFreeDoc(transformed);
194
        xmlFreeDoc(doc);
195
    } else {
196
        LOGERR("Not ready for multipart yet\n");
197
        abort();
198
    }
199
            
200
    m_havedoc = true;
201
    return true;
202
}
203
204
bool MimeHandlerXslt::set_document_string_impl(const string& mt, 
205
                                               const string& msgtxt)
206
{
207
    if (!m || !m->ok) {
208
        return false;
209
    }
210
    return true;
211
}
212
213
bool MimeHandlerXslt::next_document()
214
{
215
    if (!m || !m->ok) {
216
        return false;
217
    }
218
    if (m_havedoc == false)
219
  return false;
220
    m_havedoc = false;
221
    m_metaData[cstr_dj_keymt] = cstr_texthtml;
222
    m_metaData[cstr_dj_keycontent].swap(m->result);
223
    LOGDEB1("MimeHandlerXslt::next_document: result: [" <<
224
            m_metaData[cstr_dj_keycontent] << "]\n");
225
    return true;
226
}
227
228
void MimeHandlerXslt::clear_impl()
229
{
230
    m_havedoc = false;
231
    m->result.clear();
232
}