LoginSignup
2
4

More than 1 year has passed since last update.

ライブラリを使わずにPDFを分析してみた(自分用)

Last updated at Posted at 2019-11-10

Passなし Encryptionありの Rev=3. RC4(40bit or 128bit) × /FlateDecode に対応。
(平文未動確)

画面キャプチャ

https://www.adobe.com/content/dam/acom/en/devnet/pdf/PDF32000_2008.pdf
を読み込ませた結果

image.png

参考

その他

下記を使えばブラウザ上でpdf構造を見れる。(JavaScript)

ブラウザ上でpdfを表示するライブラリ(JavaScript)

ソースコード

処理がかなり雑です。というか正常な手順を踏まずにデータ解析処理してますので真似はしないでください。

RC4は別途: https://qiita.com/kob58im/items/e20e35d9b11f39ce5da5

バグあり:オブジェクトの外まで検索してしまっている箇所が残っていそう。
誤った子ノードを探してしまい、無限に再帰するケースがある。
暫定処置で再帰の深さを制限している。

PdfStructViewer.cs

using System;
using System.Collections;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.IO.Compression;
//using System.Reflection;
//using System.Runtime.InteropServices;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;
using System.Security.Cryptography; // MD5CryptoServiceProvider

using MyUtil;


class PdfStructViewer : Form
{
    ListView lsv;
    TreeView trv;
    SplitContainer spl;
    SplitContainer splL;
    SplitContainer splR;
    RichTextBox rtxt;   // object view
    RichTextBox rtxtSt; // stream...endstream

    FileStream _pdfFs;
    MD5CryptoServiceProvider _md5sp;
    byte[] _baseKey;
    int[] _xref;
    int _rootObjRef;
    bool _lsvEventByProgram;

    class PageObjId
    {
        public int idContents;
        public int idResources;
        public PageObjId()
        {
            idContents = -1;
            idResources = -1;
        }
    }

    PdfStructViewer(string path)
    {
        Text = "PDF Struct viewer";
        ClientSize = new Size(700,500);

        spl = new SplitContainer();
        spl.Dock = DockStyle.Fill;
        spl.Orientation = Orientation.Vertical;
        Controls.Add(spl);

        
        splL = new SplitContainer();
        splL.Dock = DockStyle.Fill;
        splL.Orientation = Orientation.Horizontal;
        spl.Panel1.Controls.Add(splL);

        splR = new SplitContainer();
        splR.Dock = DockStyle.Fill;
        splR.Orientation = Orientation.Horizontal;
        spl.Panel2.Controls.Add(splR);


        lsv = new ListView();
        lsv.View = View.Details;
        lsv.FullRowSelect = true;
        lsv.HideSelection = false;
        lsv.MultiSelect = false;
        lsv.GridLines = true;
        lsv.Dock = DockStyle.Fill;
        lsv.Columns.Add("No.",     100, HorizontalAlignment.Left);
        lsv.Columns.Add("Address", 400, HorizontalAlignment.Left);
        splL.Panel1.Controls.Add(lsv);

        trv = new TreeView();
        trv.HideSelection = false;
        trv.Dock = DockStyle.Fill;
        splL.Panel2.Controls.Add(trv);


		rtxt = new RichTextBox();
        rtxt.ReadOnly = true;
		rtxt.Multiline = true;
		rtxt.WordWrap = false;
		rtxt.ScrollBars = RichTextBoxScrollBars.Both;
		rtxt.Dock = DockStyle.Fill;
		splR.Panel1.Controls.Add(rtxt);

        
		rtxtSt = new RichTextBox();
        rtxtSt.ReadOnly = true;
		rtxtSt.Multiline = true;
		rtxtSt.WordWrap = false;
		rtxtSt.ScrollBars = RichTextBoxScrollBars.Both;
		rtxtSt.Dock = DockStyle.Fill;
		splR.Panel2.Controls.Add(rtxtSt);


        // -----------------
        ParsePdfStruct(path);
        ParseTreeFromRoot();

        lsv.SelectedIndexChanged += Lsv_SelectedIndexChanged;
        trv.AfterSelect += Trv_AfterSelect;
    }

    void Trv_AfterSelect(object sender, EventArgs e)
    {
        TreeNode node = trv.SelectedNode;
        if (node==null){return;}
        int id = (int)node.Tag;

        _lsvEventByProgram = true;
        try{
            lsv.Items[id].Selected = true;
            lsv.EnsureVisible(id);
        }
        finally{
            _lsvEventByProgram = false;
        }
    }

    void Lsv_SelectedIndexChanged(object sender, EventArgs e)
    {
        var items = lsv.SelectedItems;
        if ( !_lsvEventByProgram ) {trv.SelectedNode = null;}
        if ( items.Count != 1 ) {
            return;
        }
        ListViewItem item = items[0];
        Console.WriteLine(item.SubItems[0].Text);
        
        int addr = Convert.ToInt32(item.SubItems[1].Text.Substring(2),16);

        _pdfFs.Position = addr;
        string objData = GetPreviewData(_pdfFs);
        rtxt.Text = objData;

        
        _pdfFs.Position = addr;
        byte[] streamData = ParseStreamBlock(_pdfFs, _baseKey, _md5sp);
        rtxtSt.Text = ConvertBytesToString(streamData);
    }


    TreeNode MakeTreeNode(string name, int id)
    {
        return MakeTreeNode(name, id, 0);
    }
    TreeNode MakeTreeNode(string name, int id, int pageOffset)
    {
        string pageInfo = "";
        if(pageOffset>0){pageInfo="("+pageOffset.ToString()+")";}
        TreeNode node = new TreeNode("["+id.ToString()+"]"+name + pageInfo);
        node.Tag = id;
        return node;
    }

    int _tmpCallDepth;
    void ParseTreeFromRoot()
    {
        TreeNode rootNode = MakeTreeNode("Root", _rootObjRef);
        trv.Nodes.Add(rootNode);

        var rootDict = ParseRootObj(_pdfFs, _xref[_rootObjRef]);

        int objRef = (int)rootDict["/Pages"];
        TreeNode pagesNode = MakeTreeNode("Pages", objRef);
        rootNode.Nodes.Add(pagesNode);

        _tmpCallDepth = 0;
        ParseKidsRecursive(pagesNode, objRef, 1);
    }


    int ParseKidsRecursive(TreeNode parentNode, int parentId, int pageOffset)
    {
        int pageCount = 0;
        _tmpCallDepth++;

        if ( _tmpCallDepth <= 7 ) {
            PageObjId contentId;
            int[] pagesKids = ParsePagesObj(_pdfFs, _xref[parentId], out pageCount, out contentId);
            if (contentId.idContents>=0) {
                TreeNode pageNode = MakeTreeNode("Contents", contentId.idContents);
                parentNode.Nodes.Add(pageNode);
            }
            if (contentId.idResources>=0) {
                TreeNode pageNode = MakeTreeNode("Resources", contentId.idResources);
                parentNode.Nodes.Add(pageNode);
            }
            foreach(int subObjRef in pagesKids ) {
                TreeNode pageNode = MakeTreeNode("Kids", subObjRef, pageOffset);
                parentNode.Nodes.Add(pageNode);
                pageOffset += ParseKidsRecursive(pageNode, subObjRef, pageOffset);
            }
        }
        else{
            Console.WriteLine("Skipped:"+parentId.ToString());
        }

        _tmpCallDepth--;
        return pageCount;
    }

    void ParsePdfStruct(string path)
    {
        _pdfFs = new FileStream(path, FileMode.Open, FileAccess.Read);

        int trailerAddr;
        long pos = GetXRefAddress(_pdfFs, out trailerAddr, 500);
        Console.WriteLine("xref address: 0x" + pos.ToString("X"));
        
        _xref = ParseXRefTable(_pdfFs, (int)pos);
        var trailerDict = ParseTrailer(_pdfFs, trailerAddr);
        
        lsv.BeginUpdate();
        try{
            lsv.Items.Clear();
            for(int i=0;i<_xref.Length;i++){
                lsv.Items.Add(new ListViewItem(new string[]{i.ToString(),"0x"+_xref[i].ToString("X")}));
            }
        }
        finally{
            lsv.EndUpdate();
        }

        _baseKey = null;
        _md5sp = new MD5CryptoServiceProvider();


        if (trailerDict.ContainsKey("/Encrypt")) {
            byte[] id = (byte[])trailerDict["/ID"];
            int encObjNum = (int)trailerDict["/Encrypt"];
            Console.Write("Encrypt obj num:");
            Console.WriteLine(encObjNum.ToString());
            _pdfFs.Position = _xref[encObjNum];
            Dictionary<string, object> encryptionDict;
            ParseEncryptObj(_pdfFs, out encryptionDict);
            
            _baseKey = CalcBaseKey(encryptionDict, id, _md5sp);
        }

        _rootObjRef = (int)trailerDict["/Root"];

        //DumpIntArrayWithIndex(pagesKids,false);
    }
 

    // read /-?[0-9]+/ in binary
    // てきとう
    static bool ReadIntegerDigits(FileStream fs, out long digits)
    {
        var sb = new StringBuilder();
        int b = fs.ReadByte();
        bool retCode = false;
        if ( b == '-' ) {
            sb.Append((char)b);
            b = fs.ReadByte();
        }

        if ( IsDigit(b) ) {
            retCode = true;
        }
        
        do {
            sb.Append((char)b);
            b = fs.ReadByte();
        }
        while( IsDigit(b) );

        if ( b >= 0 ) { // EOFでなければ1文字戻す
            fs.Position--;
        }

        string s = sb.ToString();
        if (retCode) {
            digits = Convert.ToInt64(s);
        }
        else {
            digits = 0;
        }
        return retCode;
    }

    // read /\/[A-Za-z_][A-Za-z0-9]*/ in binary
    // てきとう
    static bool ReadKeyword(FileStream fs, out string ret)
    {
        var sb = new StringBuilder();
        int b = fs.ReadByte();

        if ( b == '/' ) {
            sb.Append((char)b);
        }
        else {
            if ( b >= 0 ) { // EOFでなければ1文字戻す
                fs.Position--;
            }
            ret = null;
            return false;
        }

        b = fs.ReadByte();
        if ( IsAlphabet(b) ) {
            sb.Append((char)b);
        }
        else {
            ret = null;
            return false;
        }

        b = fs.ReadByte();
        while ( IsAlphabet(b) || IsDigit(b) ) {
            sb.Append((char)b);
            b = fs.ReadByte();
        }

        if ( b >= 0 ) { // EOFでなければ1文字戻す
            fs.Position--;
        }

        ret = sb.ToString();

        return true;
    }

    // read /([ \t\r\n]+|\b)/
    // 空白文字を読み捨てる
    static bool ReadSpaces(FileStream fs)
    {
        int b;
        do {
            b = fs.ReadByte();
        } while ( b ==' ' || b =='\t' || b=='\r' || b=='\n' );

        if ( b>=0 ) { // EOFでなければ1文字戻す
            fs.Position--;
        }
        return true;
    }

    static bool ReadFixedSequence(FileStream fs, string s)
    {
        byte[] a = Encoding.ASCII.GetBytes(s); // expected data

        for(int i=0;i<a.Length;i++) {
            int b = fs.ReadByte();
            if ( b != a[i] ) {
                return false;
            }
        }
        return true;
    }

    // read postscript string object (...) in binary
    // e.g.:
    //  (xxx)
    //  (xxx(xxx)xxx)
    //  (xxx\)xxx)
    static bool ReadPsString(FileStream fs, out byte[] ret)
    {
        int nCount = 0;// Count of open '('
        bool escape = false;
        var t = new List<byte>();
        
        int b = fs.ReadByte();
        if ( b != '(' ) {
            if ( b>=0 ) { // EOFでなければ1文字戻す
                fs.Position--;
            }
            ret = null;
            return false;
        }

        nCount++;
        while ( nCount > 0 ) {
            b = fs.ReadByte();
            if ( b < 0 ) { // EOF
                ret = null;
                return false;
            }
            if (!escape) {
                switch(b) {
                    case '(':
                        nCount++;
                        t.Add((byte)b);
                        break;
                    case ')':
                        nCount--;
                        if ( nCount > 0 ) { t.Add((byte)b); }
                        break;
                    case '\\':
                        escape = true;
                        break;
                    default:
                        t.Add((byte)b);
                        break;
                }
            }
            else {
                escape = false;
                // \000 - \377 がケアできていない
                switch(b) {
                    case 't': t.Add((byte)'\t'); break;
                    case 'r': t.Add((byte)'\r'); break;
                    case 'n': t.Add((byte)'\n'); break;
                    case 'b': t.Add((byte)'\b'); break;
                    case 'f': t.Add((byte)'\f'); break;
                    default:  t.Add((byte)b);    break;
                }
            }
        }
        ret = t.ToArray();
        return true;
    }

    // 未実装: read /<([ \r\n\t]*[0-9A-Fa-f]{2}[ \r\n\t]*)*>/ in binary
    // read /<([0-9A-Fa-f]{2})+>/ in binary
    static bool ReadHexString(FileStream fs, out byte[] ret)
    {
        int b1 = fs.ReadByte();
        if ( b1 != '<' ) {
            if ( b1>=0 ) { // EOFでなければ1文字戻す
                fs.Position--;
            }
            ret = null;
            return false;
        }

        var t = new List<byte>();

        while ( true ) {
            b1 = fs.ReadByte();
            if ( b1 == '>' ) { // end mark (正常終了)
                break;
            }
            if ( b1 < 0 ) { ret = null; return false; } // EOF

            int b2 = fs.ReadByte();
            
            if ( b2 < 0 ) { ret = null; return false; } // EOF

            int x = DecodeHexChar(b1, b2);
            if (x < 0) { // invalid format
                ret = null;
                return false; 
            }

            t.Add((byte)x);
        }
        ret = t.ToArray();
        return true;
    }

    static bool ReadPsOrHexString(FileStream fs, out byte[] ret)
    {
        ret = null;
        int b = fs.ReadByte();

        if ( b >= 0 ) {fs.Position--;}
        if ( b == '(') {
            return ReadPsString(fs, out ret);
        }
        else if (b=='<') {
            return ReadHexString(fs, out ret);
        }
        else {
            return false;
        }
    }
    
    static bool IsSpacerChar(int x) { return (x==' ')||(x=='\t')||(x=='\r')||(x=='\n'); }
    static bool IsDigit(int x) { return  ('0'<= x && x <='9'); }
    static bool IsAlphabet(int x) { return  ('A'<= x && x <='Z') || ('a'<= x && x <='z'); }

    static bool IsPrintableChar(byte b)
    {
        if (b=='\t'||b=='\n'||b=='\n'){return true;}
        if (0x20<=b && b<=0x7E){return true;}
        return false;
    }

    static int DecodeHexChar(int x)
    {
        if ('0'<= x && x <='9') {return  x-'0';}
        if ('A'<= x && x <='F') {return (x-'A')+10;}
        if ('a'<= x && x <='f') {return (x-'a')+10;}
        return -1;
    }
    static int DecodeHexChar(int x1, int x2)
    {
        x1 = DecodeHexChar(x1);
        if ( x1 < 0 ) {return -1;}
        x2 = DecodeHexChar(x2);
        if ( x2 < 0 ) {return -1;}
        return (x1<<4) | x2;
    }

    static string ConvertBytesToString(byte[] data)
    {
        if (data==null){return "";}

        var sb = new StringBuilder(data.Length);
        foreach(byte b in data){
            if (IsPrintableChar(b)){
                sb.Append((char)b);
            }
            else {
                sb.Append('?');
            }
        }
        return sb.ToString();
    }


    static bool ParseEncryptObj(FileStream fs, out Dictionary<string, object> dict)
    {
        dict = null;

        long objId;
        long genId;
        bool retCode;

        if (!ReadIntegerDigits(fs, out objId)){return false;}
        ReadSpaces(fs);
        if (!ReadIntegerDigits(fs, out genId)){return false;}
        ReadSpaces(fs);
        if (!ReadFixedSequence(fs, "obj")){return false;}
        ReadSpaces(fs);
        if (!ReadFixedSequence(fs, "<<")){return false;}
        ReadSpaces(fs);

        dict = new Dictionary<string, object>();
        dict["/Length"] = 40;
        while (true) {
            string kwd;
            retCode = ReadKeyword(fs, out kwd);
            if ( !retCode ) {
                break;
            }
            Console.Write(kwd??"null");
            Console.Write(" ");

            ReadSpaces(fs);
            switch (kwd) {
                case "/Filter":
                    {
                        string kwd2;
                        if (!ReadKeyword(fs, out kwd2)){
                            return false;
                        }
                        Console.WriteLine(kwd2??"null");
                        dict[kwd] = kwd2;
                        ReadSpaces(fs);
                    }
                    break;
                case "/V":
                case "/R":
                case "/P":
                case "/Length":
                    {
                        long numValue;
                        if(!ReadIntegerDigits(fs, out numValue)){
                            return false;
                        }
                        if (kwd == "/Length" && (numValue<40 || numValue%8!=0) ) {
                            return false;
                        }
                        Console.WriteLine(numValue);
                        dict[kwd] = numValue;
                        ReadSpaces(fs);
                    }
                    break;
                case "/O":
                case "/U":
                    {
                        byte[] content;
                        if(!ReadPsOrHexString(fs, out content)){
                            return false;
                        }
                        dict[kwd] = content;
                        ReadSpaces(fs);
                        Console.Write("#");
                        Console.WriteLine(content.Length);
                        if (content.Length != 32) {
                            return false;
                        }
                    }
                    break;
                default:
                    break;
            }
        }

        if (!dict.ContainsKey("/O")) { return false; }
        if (!dict.ContainsKey("/P")) { return false; }
        if (!dict.ContainsKey("/R")) { return false; }

        return true;
    }

    static byte[] Int32ToBytesLE(uint x)
    {
        byte[] a = new byte[4];
        a[0] = (byte)( x      & 0xFF);
        a[1] = (byte)((x>>8)  & 0xFF);
        a[2] = (byte)((x>>16) & 0xFF);
        a[3] = (byte)((x>>24) & 0xFF);
        return a;
    }


    static byte[] CalcBaseKey(Dictionary<string, object> encryptDict, byte[] id, MD5CryptoServiceProvider md5sp)
    {
        if (id.Length != 16) {return null;}

        byte[] pswdPadding = new byte[]{
            0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41,  0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
            0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80,  0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
        };

        // 32 + O 32 + P 4 + ID 16
        byte[] data = new byte[200];
        int pos = 0;
        int n = 0;

        n = pswdPadding.Length;
        Array.Copy(pswdPadding, 0, data, pos, n);
        pos += n;
        
        n = 32;
        Array.Copy((byte[])encryptDict["/O"], 0, data, pos, n);
        pos += n;

        n = 4;
        Array.Copy(Int32ToBytesLE((uint)(long)encryptDict["/P"]), 0, data, pos, n);
        pos += n;

        n = 16;//id.Length;
        Array.Copy(id, 0, data, pos, n);
        pos += n;

        Array.Resize(ref data, pos);

        byte[] baseKey = md5sp.ComputeHash(data);

        for (int i=0;i<50;i++) {
            Array.Resize(ref baseKey, (int)(long)encryptDict["/Length"]/8);
            baseKey = md5sp.ComputeHash(baseKey);
        }

        Array.Resize(ref baseKey, (int)(long)encryptDict["/Length"]/8);

        return baseKey;
    }

    
    static byte[] CalcObjKey(byte[] baseKey, uint objId, uint genId, MD5CryptoServiceProvider md5sp)
    {
        //byte[] sAlT = Encoding.ASCII.GetBytes("sAlT"); // AES使用時
        byte[] data = new byte[baseKey.Length+5];//+sAlT.Length];
        int pos=0;
        int n;

        n = baseKey.Length;
        Array.Copy(baseKey,0,data,pos,n);
        pos += n;

        n = 3;
        Array.Copy(Int32ToBytesLE(objId),0,data,pos,n);
        pos += n;

        n = 2;
        Array.Copy(Int32ToBytesLE(genId),0,data,pos,n);
        pos += n;

        //n = sAlT.Length;
        //Array.Copy(sAlT,0,data,pos,n);
        //pos += n;

        //Console.Write("objkey before hash:");
        //DumpByteArray(data,data.Length);

        byte[] objKey = md5sp.ComputeHash(data); // 16byte

        if ( objKey.Length > baseKey.Length + 5 ) {
            Array.Resize(ref objKey, baseKey.Length + 5);
        }

        Console.Write("ObjKeyLen:");
        Console.WriteLine(objKey.Length);

        return objKey;
    }

    
    static int GetXRefAddress(FileStream fs, out int trailerAddress, int searchBytesLen)
    {
        byte[] buf = new byte[searchBytesLen];
        
        fs.Seek(-searchBytesLen, SeekOrigin.End);
        long tmpPos = fs.Position;
        int n = fs.Read(buf, 0, buf.Length);

        int iEof = SearchBackward(buf, "%%EOF");
        int iStartXref = SearchBackward(buf, "startxref");
        trailerAddress = SearchBackward(buf, "trailer");
        if (trailerAddress>=0) {trailerAddress += (int)tmpPos;}

        if ( iEof >= 0 && iStartXref >= 0 && iEof >= iStartXref ) {
            iStartXref += "startxref".Length;
            if (buf[iStartXref]=='\r') {
                iStartXref++;
            }
            if (buf[iStartXref]=='\n') {
                iStartXref++;
            }
        }
        else {
            return -1;
        }

        return ParseBufferInt32(buf, iStartXref);
    }

    // 負数非対応
    static int ParseBufferInt32(byte[] buf, int index)
    {
        var sb = new StringBuilder();
        while( index<buf.Length && IsDigit(buf[index]) ) {
            sb.Append((char)buf[index]);
            index++;
        }
        string s = sb.ToString();
        if (s.Length==0){return -1;}

        return Convert.ToInt32(s);
    }

    
    static int[] ParseXRefTable(FileStream fs, int headPos)
    {
        int[] offsetTable;
        fs.Position = headPos + 4; // +4 : length of "xref"

        bool retCode;
        long n;

        {
            long startIndex;
            ReadSpaces(fs);
            retCode = ReadIntegerDigits(fs, out startIndex);
            if ( !retCode || startIndex != 0 ) {
                Console.WriteLine("not supported xref data.");
                return null;
            }
            ReadSpaces(fs);
            retCode = ReadIntegerDigits(fs, out n);
            if (n <= 0) {
                Console.WriteLine("not supported xref data.");
                return null;
            }
        }

        offsetTable = new int[n];

        ReadSpaces(fs);
        for (int i=0;i<n;i++) {
            long pos;
            long genId;
            retCode = ReadIntegerDigits(fs, out pos);
            if ( !retCode || pos<0 ) { return null; }
            offsetTable[i] = (int)pos;
            ReadSpaces(fs);

            retCode = ReadIntegerDigits(fs, out genId);
            if ( !retCode || genId<0 ) { return null; }
            ReadSpaces(fs);

            int b = fs.ReadByte();
            if ( b!='n' && b!='f' ) {
                return null;
            }
            ReadSpaces(fs);
        }

        return offsetTable;
    }


    static int[] ParsePagesObj(FileStream fs, int pagesPos, out int pageCount, out PageObjId subObjIds)
    {
        //var dict = new Dictionary<string,object>();
        pageCount = 1;
        subObjIds = new PageObjId();
        const int BufSize = 1000;
        byte[] buf = new byte[BufSize];

        fs.Position = pagesPos;
        int n = fs.Read(buf, 0, buf.Length);
        if (n<=0) {return null;}
        Array.Resize(ref buf, n);

        int endobjPos = Search(buf, 0, "endobj");
        if (endobjPos >= 0){
            Array.Resize(ref buf, endobjPos+"endobj".Length);
        }

        //  /Kids[nnnnn 0 R nnnnn 0 R ...]

        int[] ret = new int[0];
        {
            const string KeyName = "/Kids";
            int iKids = Search(buf, 0, KeyName);
            if (iKids>=0) {
                iKids += KeyName.Length;
                while ( IsSpacerChar(buf[iKids]) ) {
                    iKids++;
                }
                if (buf[iKids]=='[') {
                    iKids++;
                    
                    int iRight = Search(buf, iKids, "]");
                    if (iRight >= iKids+1) {
                        string s = Encoding.ASCII.GetString(buf,iKids,iRight-iKids);
                        //Console.WriteLine(s);
                        Regex r = new Regex(@"([0-9]+)[ \r\n\t]+([0-9]+)[ \r\n\t]+R\b");
                        MatchCollection mc = r.Matches(s);
                        ret = new int[mc.Count];
                        for(int i=0;i<mc.Count;i++) {
                            ret[i] = Convert.ToInt32(mc[i].Groups[1].Value);
                        }
                    }
                }
            }
        }

        int tmp;
        if ( FindUIntOfKeyInBuffer(buf, "/Count", out tmp) ) {
            pageCount = tmp;
        }
        if ( FindUIntOfKeyInBuffer(buf, "/Contents", out tmp) ) {
            subObjIds.idContents = tmp;
        }
        if ( FindUIntOfKeyInBuffer(buf, "/Resources", out tmp) ) {
            subObjIds.idResources = tmp;
        }
        
        return ret;
    }


    static Dictionary<string,object> ParseRootObj(FileStream fs, int pagesPos)
    {
        var dict = new Dictionary<string,object>();
        const int BufSize = 1000;
        byte[] buf = new byte[BufSize];
        
        fs.Position = pagesPos;
        int n = fs.Read(buf, 0, buf.Length);
        if (n<=0) {return null;}
        Array.Resize(ref buf, n);

        int tmp;
        if ( FindUIntOfKeyInBuffer(buf, "/Pages", out tmp) ) {
            dict["/Pages"] = tmp;
        }

        return dict;
    }

    static Dictionary<string,object> ParseTrailer(FileStream fs, int trailerPos)
    {
        if (trailerPos<0){return null;}
        var dict = new Dictionary<string,object>();

        const int BufSize = 1000;
        byte[] buf = new byte[BufSize];
        //Console.WriteLine(trailerPos.ToString("X"));
        fs.Position = trailerPos;
        
        int n = fs.Read(buf, 0, buf.Length);
        if (n<=0) {return null;}
        Array.Resize(ref buf, n);

        int tmp;
        if ( FindUIntOfKeyInBuffer(buf, "/Encrypt", out tmp) ) {
            dict["/Encrypt"] = tmp;
        } else {
            Console.WriteLine("enc not found");
        }
        if ( FindUIntOfKeyInBuffer(buf, "/Root", out tmp) ) {
            dict["/Root"] = tmp;
        }
        if ( FindUIntOfKeyInBuffer(buf, "/Info", out tmp) ) {
            dict["/Info"] = tmp;
        }

        {
            const string KeyName = "/ID";
            int iID = Search(buf, 0, KeyName);
            if ( iID >= 0 ) { // /ID [<HexStr>
                iID += KeyName.Length;
                while ( IsSpacerChar(buf[iID]) ) {
                    iID++;
                }
                if (buf[iID]=='['){iID++;}
                
                byte[] idData1;
                fs.Position = trailerPos + iID;
                bool retCode = ReadHexString(fs, out idData1);
                if (retCode) {
                    dict[KeyName] = idData1;
                }
                else{
                    Console.WriteLine("id parse failed");
                }
            }
            else{
                Console.WriteLine("id not found");
            }
        }

        return dict;
    }

    static bool FindUIntOfKeyInBuffer(byte[] buf, string keyName, out int ret)
    {
        int index = Search(buf, 0, keyName);
        if (index>=0) {
            index += keyName.Length;
            while ( IsSpacerChar(buf[index]) ) {
                index++;
            }
            ret = ParseBufferInt32(buf, index);
            if (ret>=0) {
                return true;
            }
        }
        ret = -1;
        return false;
    }

    static int Search(byte[] data, int startI, string s)
    {
        if (s == "") {return -1;}
        byte[] w = Encoding.ASCII.GetBytes(s);
        int pos = 0;

        int i = startI;
        while ( i < data.Length ) {
            if ( data[i] == w[pos] ) {
                pos++;
                i++;
                if ( pos == w.Length) {
                    return i-w.Length;
                }
            }
            else {
                i -= pos;
                i++;
                pos = 0;
            }
        }
        return -1;
    }

    static int SearchBackward(byte[] data, string s)
    {
        if (s == "") {return -1;}
        byte[] w = Encoding.ASCII.GetBytes(s);
        int pos = 0;

        int i = data.Length-1;
        while ( i >= 0 ) {
            if ( data[i] == w[(w.Length-1)-pos] ) {
                pos++;
                if ( pos == w.Length) {
                    return i;
                }
                i--;
            }
            else {
                i += pos-1;
                pos = 0;
            }
        }
        return -1;
    }

    static byte[] ParseStreamBlock(FileStream fs, byte[] baseKey, MD5CryptoServiceProvider md5sp)
    {
        //nnnn 0 obj <</Filter/FlateDecode/Length
        var dict = new Dictionary<string,object>();

        long objId;
        long genId;
        bool retCode;
        if ( !ReadIntegerDigits(fs, out objId) ) {return null;}
        ReadSpaces(fs);
        if ( !ReadIntegerDigits(fs, out genId) ) {return null;}
        ReadSpaces(fs);
        if ( !ReadFixedSequence(fs, "obj") ) {return null;}
        ReadSpaces(fs);
        if ( !ReadFixedSequence(fs, "<<") ) {return null;}
        ReadSpaces(fs);

        while (true) {
            string kwd;
            retCode = ReadKeyword(fs, out kwd);
            if ( !retCode ) {
                break;
            }
            //Console.Write(kwd??"null");
            //Console.Write(" ");

            ReadSpaces(fs);
            switch (kwd) {
                case "/Filter":
                    {
                        string kwd2;
                        if (!ReadKeyword(fs, out kwd2)) {
                            return null;
                        }
                        //Console.WriteLine(kwd2??"null");
                        dict[kwd] = kwd2;
                        ReadSpaces(fs);
                    }
                    break;
                case "/Length":
                case "/Length1":
                    {
                        long numValue;
                        if (!ReadIntegerDigits(fs, out numValue)) {
                            return null;
                        }
                        dict[kwd] = numValue;
                        //Console.WriteLine(numValue);
                        ReadSpaces(fs);
                    }
                    break;
                default:
                    {
                        // 未知のパラメータ
                        return null;
                    }
                    //break;
            }
        }
        if (!ReadFixedSequence(fs, ">>")){return null;}
        ReadSpaces(fs);
        if (!ReadFixedSequence(fs, "stream")){return null;}
        ReadSpaces(fs); // <-- 読み捨てるべきではないデータも読み捨ててしまいそう

        if ( !dict.ContainsKey("/Length") ){return null;}
        int length = (int)(long)dict["/Length"];

        byte[] decrypted;
        if ( baseKey == null ) { // no encryption
            decrypted = new Byte[length];
            fs.Read(decrypted,0,length);
        }
        else {
            decrypted = DecryptStream(fs, baseKey, (int)objId, (int)genId, length, md5sp);
        }

        byte[] ret;
        if ( dict.ContainsKey("/Filter") && (string)dict["/Filter"] == "/FlateDecode" ) {
            ret = new byte[0];
            using (var ms = new MemoryStream(decrypted)) {
                int header1 = ms.ReadByte();
                int header2 = ms.ReadByte();
                if ( header1 < 0 || header2 < 0 ) { return null; }
                if ( (header1 & 0x0F) != 0x08 ) { return null; }
                if ( ((header1<<8) + header2) % 31 != 0 ) { return null; }
                
                byte[] buffer = new byte[1000];
                using (DeflateStream ds = new DeflateStream(ms, CompressionMode.Decompress)) {
                    while (true) {
                        int nRead;
                        try{
                            nRead = ds.Read(buffer, 0, buffer.Length);
                        }
                        catch(Exception e) {
                            Console.WriteLine(e);
                            return null;
                        }

                        if (nRead <= 0) {
                            break;
                        }
                        int nSize = ret.Length;
                        Array.Resize(ref ret, nSize + nRead);
                        Array.Copy(buffer, 0, ret, nSize, nRead);
                    }
                }
            }
        }
        else {
            ret = decrypted;
        }
        return ret;
    }


    static byte[] DecryptStream(FileStream fs, byte[] baseKey, int objId, int genId, int length, MD5CryptoServiceProvider md5sp)
    {
        byte[] objKey = CalcObjKey(baseKey, (uint)objId, (uint)genId, md5sp);
        //Console.WriteLine(objKey.Length);

        byte[] buf = new byte[length];
        fs.Read(buf,0,length);
        
        byte[] plain = new byte[length];

        using (MemoryStream ms = new MemoryStream(buf))
        using (MemoryStream msDest = new MemoryStream(plain))
        {
            Arcfour arcfour = Arcfour.CreateArcfour(objKey);

            arcfour.Encrypt(ms, msDest, buf.Length);//decrypt
        }

        //Console.Write("mod31: ");
        //Console.WriteLine((plain[0]*256 + plain[1])%31);
        //DumpByteArray(plain,50);

        return plain;
    }

    // endobjが見つかるまでのデータをASCIIにして出力。(単語単位では検索してないのでよくない)
    // 可読でない文字は ? に置き換える。
    // stream...endstreamはそのまま出力。
    static string GetPreviewData(FileStream fs)
    {
        const int MaxBufSize = 1000;
        byte[] buf = new byte[MaxBufSize];
        byte[] endobj = Encoding.ASCII.GetBytes("endobj");
        int n = fs.Read(buf,0,buf.Length);
        
        if ( n < endobj.Length ) {
            return "";
        }

        var sb = new StringBuilder(n);
        int pos = 0;
        for ( int i=0 ; i<n ; i++ ) {
            if ( IsPrintableChar(buf[i]) ) {
                sb.Append((char)buf[i]);
            }
            else {
                sb.Append('?');
            }

            if ( buf[i] == endobj[pos] ) {
                pos++;
                if ( pos == endobj.Length ) {
                    break;
                }
            }
            else {
                // たまたま "endobj" を構成する文字がすべて別の文字なので、検索アンマッチ時は単純に pos=0に戻しているだけでよいが、
                // "aab"とかだと "aaab"から検索していると"aa"のあと"ab"から検索が始まってとりこぼすおそれがある。
                pos = 0;
            }
        }
        return sb.ToString();
    }


    [STAThread]
    static void Main(string[] args)
    {
        if ( args.Length != 1 ) {
            Console.WriteLine("Argument error.");
            return;
        }

        Application.Run(new PdfStructViewer(args[0]));
        
    }

    static void DumpIntArrayWithIndex(int[] a, bool hex)
    {
        DumpIntArrayWithIndex(a,0,a.Length-1,hex);
    }
    
    static void DumpIntArrayWithIndex(int[] a, int fromI, int endI, bool hex)
    {
        if (fromI<0){fromI=0;}
        for(int i=fromI;i<a.Length&&i<=endI;i++) {
            Console.Write(i.ToString());
            if (hex) {
                Console.Write(": 0x");
                Console.WriteLine(a[i].ToString("X"));
            }
            else {
                Console.Write(": ");
                Console.WriteLine(a[i].ToString(""));
            }
        }
    }
    

    static void DumpByteArray(byte[] a, int n)
    {
        int cnt=0;
        foreach(byte b in a) {
            cnt++;
            Console.Write(b.ToString("X2"));
            if(cnt>=n)break;
        }
        Console.WriteLine();
    }
}

2
4
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
2
4