1. 程式人生 > >在C#程式碼中提取PDF中的註釋文字

在C#程式碼中提取PDF中的註釋文字

//這裡要引用using iTextSharp.text;using iTextSharp.text.pdf;
PdfReader myPdfReader = new PdfReader(@"C:\Users\Administrator\Desktop\加快QTP執行速度的建議.pdf");
int a = myPdfReader.NumberOfPages;
PdfDictionary pageDict = myPdfReader.GetPageN(1);
PdfArray annotArray = pageDict.GetAsArray(PdfName.ANNOTS);
if (annotArray == null) return;
for (int i = 0; i < annotArray.Size; ++i)
{
          PdfDictionary curAnnot = annotArray.GetAsDict(i);
          string sSubType = curAnnot.Get(PdfName.SUBTYPE).ToString();
          if (sSubType == "/FreeText")
          {
                 PdfString pdfString = curAnnot.GetAsString(PdfName.CONTENTS);
                 if (pdfString != null)
                 {
                      string value = pdfString.ToUnicodeString();
                      System.Windows.Forms.MessageBox.Show(value);
                  }
           }
           //PdfDictionary annot = (PdfDictionary)PdfReader.GetPdfObject(annotArray[i]);
           //PdfString content = (PdfString)PdfReader.GetPdfObject(curAnnot.Get(PdfName.CONTENTS));
           //if (content != null)
          //{
             //    System.Windows.Forms.MessageBox.Show(content.ToUnicodeString());
           //}
}