|
a/src/filters/rclpdf |
|
b/src/filters/rclpdf |
|
... |
|
... |
130 |
gsub(/&/, "\\&", mid)
|
130 |
gsub(/&/, "\\&", mid)
|
131 |
gsub(/</, "\\<", mid)
|
131 |
gsub(/</, "\\<", mid)
|
132 |
gsub(/>/, "\\>", mid)
|
132 |
gsub(/>/, "\\>", mid)
|
133 |
mid = "<title>" mid "</title>"
|
133 |
mid = "<title>" mid "</title>"
|
134 |
$0 = part1 mid part2
|
134 |
$0 = part1 mid part2
|
135 |
}
|
135 |
}
|
|
|
136 |
|
|
|
137 |
# Recoll treats "Subject" as a "title" element (based on emails). The PDF
|
|
|
138 |
# "Subject" metadata field is more like an HTML "description"
|
|
|
139 |
if(doescape == 0 && $0 ~ /<meta ?name="Subject"/){
|
|
|
140 |
gsub(/="Subject"/, "=\"Description\"", $0)
|
|
|
141 |
}
|
136 |
|
142 |
|
137 |
if ($0 == "<pre>"){
|
143 |
if ($0 == "<pre>"){
|
138 |
# Begin of body text. need to escape some chars from now on as
|
144 |
# Begin of body text. need to escape some chars from now on as
|
139 |
# pdftotext sometimes doesnt do it
|
145 |
# pdftotext sometimes doesnt do it
|
140 |
doescape++
|
146 |
doescape++
|