pdftohtml.sh 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. #!/bin/bash
  2. #
  3. # Convert PDF to hybrid HTML output with images and line art rendered to a
  4. # background image, and text overlaid on top as absolutely positioned HTML
  5. # text.
  6. input=$1
  7. out=${2:-out}
  8. fmt=${3:-png}
  9. dpi=${4:-96}
  10. scale=$(expr 72 '*' $dpi / 96)
  11. if test -f "$1"
  12. then
  13. echo Processing "$input" out=$out fmt=$fmt dpi=$dpi
  14. else
  15. echo "usage: pdftohtml.sh input.pdf output-stem image-format dpi"
  16. echo " example: pdftohtml.sh input.pdf output png 96"
  17. exit
  18. fi
  19. title=$(basename "$input" | sed 's/.pdf$//')
  20. mutool convert -Oresolution=$dpi -o $out.html "$input"
  21. sed -i -e "/<head>/a<title>$title</title>" $out.html
  22. sed -i -e "/^<div/s/page\([0-9]*\)\" style=\"/page\1\" style=\"background-image:url('$out\1.$fmt');/" $out.html
  23. mutool draw -K -r$dpi -o$out%d.png "$input"
  24. echo Converting to $fmt
  25. for png in $out*.png
  26. do
  27. xxx=$(basename $png .png).$fmt
  28. case $fmt in
  29. png)
  30. if command -v optipng >/dev/null
  31. then
  32. optipng -silent -strip all $png
  33. fi
  34. ;;
  35. jpg)
  36. if command -v mozjpeg >/dev/null
  37. then
  38. mozjpeg -outfile $xxx $png
  39. else
  40. convert -format $fmt $png $xxx
  41. fi
  42. ;;
  43. webp)
  44. if command -v cwebp >/dev/null
  45. then
  46. cwebp -quiet -o $xxx $png
  47. else
  48. convert -format $fmt $png $xxx
  49. fi
  50. ;;
  51. *)
  52. convert -format $fmt $png $xxx
  53. ;;
  54. esac
  55. done