<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>OCR on Saleem Ansari</title>
    <link>/tags/ocr/</link>
    <description>Recent content in OCR on Saleem Ansari</description>
    <generator>Hugo -- gohugo.io</generator>
    <language>en</language>
    <copyright>(c) 2024 Saleem Ansari</copyright>
    <lastBuildDate>Fri, 19 Jul 2013 00:00:00 +0000</lastBuildDate>
    <atom:link href="/tags/ocr/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Extract Text from from multi-page PDF with only Images</title>
      <link>/2013/07/19/extract-text-from-from-multi-page-pdf-with-only-images/</link>
      <pubDate>Fri, 19 Jul 2013 00:00:00 +0000</pubDate>
      <guid>/2013/07/19/extract-text-from-from-multi-page-pdf-with-only-images/</guid>
      <description>Sometimes there are only images in a PDF. In such cases you can not select text to copy / paste or just for reference.&#xA;To extract text from an Image or a PDF containing only images, I used Tesseract OCR Engine and Ghostscript. I am running Fedora 19 at the moment, however these steps should apply to an older version of Fedora or Ubuntu. ( I believe this can be done on Windows as well ).</description>
    </item>
  </channel>
</rss>
