1 /* This is the MMURTL Debugger disassembler.
\r
3 MMURTL Operating System Source Code
\r
4 Copyright 1991,1992,1993,1994 Richard A. Burgess
\r
9 #define U32 unsigned long
\r
10 #define U16 unsigned short
\r
11 #define U8 unsigned char
\r
12 #define S32 signed long
\r
13 #define S16 signed short
\r
14 #define S8 signed char
\r
16 static U8 getbyte(void);
\r
17 static U8 modrm(void);
\r
18 static U8 sib(void);
\r
19 static int bytes(char c);
\r
20 static void ohex(char c, int extend, int optional, int defsize);
\r
21 static void reg_name(U8 which, char size);
\r
22 static void escape(char c, char t);
\r
23 static void decode(char *s);
\r
24 static void do_sib(int m);
\r
25 static void do_modrm(char t);
\r
26 U32 disassemble(U32 Addr);
\r
28 extern long xprintf(char *fmt, ...); /* From Monitor.c */
\r
33 /* The Intel 386 DX Programmer's Reference Manual provides a table that
\r
34 uses the following codes to assist in disassembly of 386 code (page A-3).
\r
35 The letters below are the same as the codes in the manual. The ~ (tilde)
\r
36 is an escape character to signify expansion of the codes is
\r
37 required when the string is being outut to the screen.
\r
39 Tilde tokens in strings:
\r
40 First char after '~':
\r
42 C - Reg of R/M picks control register
\r
43 D - Reg of R/M picks debug register
\r
44 E - R/M picks operand
\r
46 G - Reg of R/M selects a general register
\r
48 J - Relative IP offset
\r
49 M - R/M picks memory
\r
50 O - No R/M, offset only
\r
51 R - Mod of R/M picks register only
\r
52 S - Reg of R/M picks segment register
\r
53 T - reg of R/M picks test register
\r
56 2 - prefix of two-byte opcode
\r
57 e - put in 'e' if use32 (second char is part of reg name)
\r
58 put in 'w' for use16 or 'd' for use32 (second char is 'w')
\r
59 f - Floating point (second char is esc value)
\r
60 g - do R/M group 'n'
\r
62 s - Size override (second char is a,o)
\r
64 Second char after '~':
\r
65 a - Two words in memory (BOUND)
\r
69 p - 32 or 48 bit pointer
\r
70 s - Six byte pseudo-descriptor
\r
73 1-8 - group number, esc value, etc
\r
76 static char *opmap1[] = {
\r
78 "ADD ~Eb,~Gb", "ADD ~Ev,~Gv", "ADD ~Gb,~Eb", "ADD ~Gv,~Ev",
\r
79 "ADD AL,~Ib", "ADD ~eAX,~Iv", "PUSH ES", "POP ES",
\r
80 "OR ~Eb,~Gb", "OR ~Ev,~Gv", "OR ~Gb,~Eb", "OR ~Gv,~Ev",
\r
81 "OR AL,~Ib", "OR ~eAX,~Iv", "PUSH CS", "~2 ",
\r
83 "ADC ~Eb,~Gb", "ADC ~Ev,~Gv", "ADC ~Gb,~Eb", "ADC ~Gv,~Ev",
\r
84 "ADC AL,~Ib", "ADC ~eAX,~Iv", "PUSH SS", "POP SS",
\r
85 "SBB ~Eb,~Gb", "SBB ~Ev,~Gv", "SBB ~Gb,~Eb", "SBB ~Gv,~Ev",
\r
86 "SBB AL,~Ib", "SBB ~eAX,~Iv", "PUSH DS", "POP DS",
\r
88 "AND ~Eb,~Gb", "AND ~Ev,~Gv", "AND ~Gb,~Eb", "AND ~Gv,~Ev",
\r
89 "AND AL,~Ib", "AND ~eAX,~Iv", "~pE", "DAA",
\r
90 "SUB ~Eb,~Gb", "SUB ~Ev,~Gv", "SUB ~Gb,~Eb", "SUB ~Gv,~Ev",
\r
91 "SUB AL,~Ib", "SUB ~eAX,~Iv", "~pC", "DAS",
\r
93 "XOR ~Eb,~Gb", "XOR ~Ev,~Gv", "XOR ~Gb,~Eb", "XOR ~Gv,~Ev",
\r
94 "XOR AL,~Ib", "XOR ~eAX,~Iv", "~pS", "AAA",
\r
95 "CMP ~Eb,~Gb", "CMP ~Ev,~Gv", "CMP ~Gb,~Eb", "CMP ~Gv,~Ev",
\r
96 "CMP AL,~Ib", "CMP ~eAX,~Iv", "~pD", "AAS",
\r
98 "INC ~eAX", "INC ~eCX", "INC ~eDX", "INC ~eBX",
\r
99 "INC ~eSP", "INC ~eBP", "INC ~eSI", "INC ~eDI",
\r
100 "DEC ~eAX", "DEC ~eCX", "DEC ~eDX", "DEC ~eBX",
\r
101 "DEC ~eSP", "DEC ~eBP", "DEC ~eSI", "DEC ~eDI",
\r
103 "PUSH ~eAX", "PUSH ~eCX", "PUSH ~eDX", "PUSH ~eBX",
\r
104 "PUSH ~eSP", "PUSH ~eBP", "PUSH ~eSI", "PUSH ~eDI",
\r
105 "POP ~eAX", "POP ~eCX", "POP ~eDX", "POP ~eBX",
\r
106 "POP ~eSP", "POP ~eBP", "POP ~eSI", "POP ~eDI",
\r
108 "PUSHA", "POPA", "BOUND ~Gv,~Ma", "ARPL ~Ew,~Rw",
\r
109 "~pF", "~pG", "~so", "~sa",
\r
110 "PUSH ~Iv", "IMUL ~Gv=~Ev*~Iv", "PUSH ~Ib", "IMUL ~Gv=~Ev*~Ib",
\r
111 "INSB ~Yb,DX", "INS~ew ~Yv,DX", "OUTSB DX,~Xb", "OUTS~ew DX,~Xv",
\r
113 "JO ~Jb", "JNO ~Jb", "JNC ~Jb", "JC ~Jb",
\r
114 "JZ ~Jb", "JNZ ~Jb", "JBE ~Jb", "JNBE ~Jb",
\r
115 "JS ~Jb", "JNS ~Jb", "JPE ~Jb", "JPO ~Jb",
\r
116 "JL ~Jb", "JGE ~Jb", "JLE ~Jb", "JG ~Jb",
\r
118 "~g1 ~Eb,~Ib", "~g1 ~Ev,~Iv", "MOV AL,~Ib", "~g1 ~Ev,~Ib",
\r
119 "TEST ~Eb,~Gb", "TEST ~Ev,~Gv", "XCHG ~Eb,~Gb", "XCHG ~Ev,~Gv",
\r
120 "MOV ~Eb,~Gb", "MOV ~Ev,~Gv", "MOV ~Gb,~Eb", "MOV ~Gv,~Ev",
\r
121 "MOV ~Ew,~Sw", "LEA ~Gv,~M ", "MOV ~Sw,~Ew", "POP ~Ev",
\r
123 "NOP", "XCHG ~eAX,~eCX", "XCHG ~eAX,~eDX", "XCHG ~eAX,~eBX",
\r
124 "XCHG ~eAX,~eSP", "XCHG ~eAX,~eBP", "XCHG ~eAX,~eSI", "XCHG ~eAX,~eDI",
\r
125 "CBW", "CDW", "CALL ~Ap", "FWAIT",
\r
126 "PUSH ~eflags", "POP ~eflags", "SAHF", "LAHF",
\r
128 "MOV AL,~Ov", "MOV ~eAX,~Ov", "MOV ~Ov,al", "MOV ~Ov,~eAX",
\r
129 "MOVSB ~Xb,~Yb", "MOVS~ew ~Xv,~Yv", "CMPSB ~Xb,~Yb", "CMPS~ew ~Xv,~Yv",
\r
130 "TEST AL,~Ib", "TEST ~eAX,~Iv", "STOSB ~Yb,AL", "STOS~ew ~Yv,~eAX",
\r
131 "LODSB AL,~Xb", "LODS~ew ~eAX,~Xv", "SCASB AL,~Xb", "SCAS~ew ~eAX,~Xv",
\r
133 "MOV AL,~Ib", "MOV CL,~Ib", "MOV DL,~Ib", "MOV BL,~Ib",
\r
134 "MOV AH,~Ib", "MOV CH,~Ib", "MOV DH,~Ib", "MOV BH,~Ib",
\r
135 "MOV ~eAX,~Iv", "MOV ~eCX,~Iv", "MOV ~eDX,~Iv", "MOV ~eBX,~Iv",
\r
136 "MOV ~eSP,~Iv", "MOV ~eBP,~Iv", "MOV ~eSI,~Iv", "MOV ~eDI,~Iv",
\r
138 "~g2 ~Eb,~Ib", "~g2 ~Ev,~Ib", "RET ~Iw", "RET",
\r
139 "LES ~Gv,~Mp", "LDS ~Gv,~Mp", "MOV ~Eb,~Ib", "MOV ~Ev,~Iv",
\r
140 "ENTER ~Iw,~Ib", "LEAVE", "RETF ~Iw", "retf",
\r
141 "INT 3", "INT ~Ib", "INTO", "IRET",
\r
143 "~g2 ~Eb,1", "~g2 ~Ev,1", "~g2 ~Eb,cl", "~g2 ~Ev,cl",
\r
144 "AAM", "AAD", 0, "XLAT",
\r
147 "ESC 0,~Ib", "ESC 1,~Ib", "ESC 2,~Ib", "ESC 3,~Ib",
\r
148 "ESC 4,~Ib", "ESC 5,~Ib", "ESC 6,~Ib", "ESC 7,~Ib",
\r
151 "~f0", "~f1", "~f2", "~f3",
\r
152 "~f4", "~f5", "~f6", "~f7",
\r
156 "LOOPNE ~Jb", "LOOPE ~Jb", "LOOP ~Jb", "JCXZ ~Jb",
\r
157 "IN AL,~Ib", "IN ~eAX,~Ib", "OUT ~Ib,AL", "OUT ~Ib,~eAX",
\r
158 "CALL ~Jv", "JMP ~Jv", "JMP ~Ap", "JMP ~Jb",
\r
159 "IN AL,DX", "IN ~eAX,DX", "OUT DX,AL", "OUT DX,~eAX",
\r
161 "LOCK~p ", 0, "REPNE~p ", "REP(e)~p ",
\r
162 "HLT", "CMC", "~g3", "~g0",
\r
163 "CLC", "STC", "CLI", "STI",
\r
164 "CLD", "STD", "~g4", "~g5"
\r
167 char *SecOp00[] = {
\r
169 "~g6", "~g7", "LAR ~Gv,~Ew", "LSL ~Gv,~Ew", 0, 0, "CLTS", 0,
\r
170 0, 0, 0, 0, 0, 0, 0, 0 };
\r
172 static char *SecOp20[] = {
\r
174 "MOV ~Rd,~Cd", "MOV ~Rd,~Dd", "MOV ~Cd,~Rd", "MOV ~Dd,~Rd",
\r
175 "MOV ~Rd,~Td", 0, "MOV ~Td,~Rd", 0,
\r
176 0, 0, 0, 0, 0, 0, 0, 0}
\r
178 static char *SecOp80[] = {
\r
179 "JO ~Jv", "JNO ~Jv", "JC ~Jv", "JNC ~Jv",
\r
180 "JZ ~Jv", "JNZ ~Jv", "JBE ~Jv", "JNBE ~Jv",
\r
181 "JS ~Jv", "JNS ~Jv", "JPE ~Jv", "JPO ~Jv",
\r
182 "JL ~Jv", "JGE ~Jv", "JLE ~Jv", "JG ~Jv",
\r
184 "SETO ~Eb", "SETNO ~Eb", "SETNC ~Eb", "SETC ~Eb",
\r
185 "SETZ ~Eb", "SETNZ ~Eb", "SETBE ~Eb", "SETNBE ~Eb",
\r
186 "SETS ~Eb", "SETNS ~Eb", "SETP ~Eb", "SETNP ~Eb",
\r
187 "SETL ~Eb", "SETGE ~Eb", "SETLE ~Eb", "SETG ~Eb",
\r
189 "PUSH FS", "POP FS", 0, "BT ~Ev,~Gv",
\r
190 "SHLD ~Ev,~Gv,~Ib", "SHLD ~Ev,~Gv,cl", 0, 0,
\r
191 "PUSH GS", "POP GS", 0, "BTS ~Ev,~Gv",
\r
192 "SHRD ~Ev,~Gv,~Ib", "SHRD ~Ev,~Gv,cl", 0, "IMUL ~Gv,~Ev",
\r
194 0, 0, "LSS ~Mp", "BTR ~Ev,~Gv",
\r
195 "LFS ~Mp", "LGS ~Mp", "MOVZX ~Gv,~Eb", "MOVZX ~Gv,~Ew",
\r
196 0, 0, "~g8 ~Ev,~Ib", "BTC ~Ev,~Gv",
\r
197 "BSF ~Gv,~Ev", "BSR~Gv,~Ev", "MOVSX ~Gv,~Eb", "MOVSX ~Gv,~Ew",
\r
199 /* NOTE: Second byte of 2 byte OpCodes are Invalid if over 0xBF */
\r
202 static char *groups[9][8] = { /* group 0 is group 3 for ~Ev set */
\r
203 { "TEST ~Ev,~Iv", "TEST ~Ev,~Iv,", "NOT ~Ev", "NEG ~Ev",
\r
204 "MUL ~eAX,~Ev", "IMUL ~eAX,~Ev", "DIV ~eAX,~Ev", "IDIV ~eAX,~Ev" },
\r
205 { "ADD", "OR", "ADC", "SBB", "AND", "SUB", "XOR", "CMP" },
\r
206 { "ROL", "ROR", "RCL", "RCR", "SHL", "SHR", "SHL", "SAR" },
\r
207 { "TEST ~Eb,~Ib", "TEST ~Eb,~Ib,", "NOT ~Eb", "NEG ~Eb",
\r
208 "MUL AL,~Eb", "IMUL AL,~Eb", "DIV AL,~Eb", "IDIV AL,~Eb" },
\r
209 { "INC ~Eb", "DEC ~Eb", 0, 0, 0, 0, 0, 0 },
\r
210 { "INC ~Ev", "DEC ~Ev", "CALL ~Ev", "CALL ~Ep",
\r
211 "JMP ~Ev", "JMP ~Ep", "PUSH ~Ev", 0 },
\r
212 { "SLDT ~Ew", "STR ~Ew", "LLDT ~Ew", "LTR ~Ew",
\r
213 "VERR ~Ew", "VERW ~Ew", 0, 0 },
\r
214 { "SGDT ~Ms", "SIDT ~Ms", "LGDT ~Ms", "LIDT ~Ms",
\r
215 "SMSW ~Ew", 0, "LMSW ~Ew", 0 },
\r
216 { 0, 0, 0, 0, "BT", "BTS", "BTR", "BTC" }
\r
220 static char *seg_names[]= {"ES","CS","SS","DS","FS","GS"};
\r
221 static char *breg_names[]={"AL","CL","DL","BL","AH","CH","DH","BH" };
\r
222 static char *wreg_names[]={"AX","CX","DX","BX","SP","BP","SI","DI" };
\r
223 static char *dreg_names[]={"EAX","ECX","EDX","EBX","ESP","EBP","ESI","EDI" };
\r
232 /*****************************************************
\r
233 Gets a byte to disassemble and update addrIn.
\r
234 ******************************************************/
\r
236 static U8 getbyte(void)
\r
251 /*************************************************/
\r
252 /* Get Mod/RM field byte for current instruction */
\r
254 static U8 modrm(void)
\r
257 modrmv = getbyte();
\r
264 /*************************************************/
\r
265 /* Get 'scale-index-base' byte for current instruction */
\r
267 static U8 sib(void)
\r
276 /**********************************************************/
\r
277 /* The register is encode as bit 3,4,5 in the byte.
\r
279 This macro extracts it. Used in several places.
\r
282 #define reg(a) (((a)>>3)&7)
\r
284 /*------------------------------------------------------------------------*/
\r
286 /*------------------------------------------------------------------------*/
\r
287 /* Determines how many bytes left in the instruction from the
\r
288 letter in the table (which is passed in here).
\r
291 static int bytes(char c)
\r
310 /**************************************************************
\r
311 Get the correct number of bytes for immediate data from the
\r
312 code stream and output it as hex.
\r
313 ***************************************************************/
\r
315 static void ohex(char c, int extend, int optional, int defsize)
\r
318 unsigned char buf[6];
\r
327 case 'b': /* byte */
\r
330 case 'w': /* word */
\r
333 case 'd': /* dword */
\r
336 case 's': /* fword */
\r
346 case 'p': /* 32 or 48 bit pointer */
\r
355 for (i=0; i<n; i++)
\r
356 buf[i] = getbyte();
\r
358 /* sign extend the value into a U32 */
\r
360 for (; i<extend; i++)
\r
361 buf[i] = (buf[i-1] & 0x80) ? 0xff : 0;
\r
363 if (s) /* outputs the segment value of FAR pointer */
\r
365 xprintf("%02x%02x",buf[n-1],buf[n-2]);
\r
378 xprintf("%02x",buf[0]);
\r
382 xprintf("%02x%02x",buf[1],buf[0]);
\r
386 xprintf("%02x%02x%02x%02x",buf[3],buf[2],buf[1],buf[0]);
\r
392 /*------------------------------------------------------------------------*/
\r
394 static void reg_name(U8 which, char size)
\r
398 xprintf( "st(%d)",which);
\r
402 if (((size == 'v') && (opsize == 32)) || (size == 'd'))
\r
408 xprintf( "%s", breg_names[which]);
\r
412 xprintf( "%s", wreg_names[which]);
\r
416 /******************************************************************
\r
417 This takes in two chars that represent part of the op code and
\r
418 puts out the proper text to match what the letter represents.
\r
419 c is the first char after the tilde and t is next one. See
\r
420 opcode1[] strings for what the chars mean.
\r
421 *******************************************************************/
\r
423 static void escape(char c, char t)
\r
432 case 'A': /* Direct Address */
\r
435 case 'C': /* Reg of R/M picks control reg */
\r
436 xprintf("CR%d",reg(modrm()));
\r
438 case 'D': /* Reg of R/M pick debug reg */
\r
439 xprintf("DR%d",modrm());
\r
441 case 'E': /* R/M picks operand */
\r
444 case 'G': /* Reg of R/M picks general reg */
\r
446 reg_name((modrm()&7), t);
\r
448 reg_name(reg(modrm()), t);
\r
450 case 'I': /* Immediate data */
\r
451 ohex(t, 0, 0, opsize);
\r
453 case 'J': /* Relative IP offset */
\r
457 valsb = getbyte(); /* must remian signed! */
\r
461 valsb = getbyte(); /*RAB Made SIGNEd bytes/Words */
\r
462 valsw = getbyte()<<8;
\r
463 vals = valsw + valsb;
\r
467 vals |= getbyte() << 8;
\r
468 vals |= getbyte() << 16;
\r
469 vals |= getbyte() << 24;
\r
472 delta = addrIn + vals;
\r
473 xprintf( "%x",delta);
\r
475 case 'M': /* R/M picks memory */
\r
478 case 'O': /* NO R/M, Offset only */
\r
483 case 'R': /* Mod of R/M pick REG only */
\r
486 case 'S': /* Reg of R/M picks seg reg */
\r
487 xprintf( "%s", seg_names[reg(modrm())]);
\r
489 case 'T': /* Reg of R/M picks test reg */
\r
490 xprintf( "TR%d",modrm());
\r
492 case 'X': /* DS:ESI */
\r
493 xprintf("DS:[ESI]");
\r
495 case 'Y': /* ES:EDI */
\r
496 xprintf("ES:[EDI]");
\r
498 case '2': /* Prefix of 2 byte opcode */
\r
501 decode(SecOp00[b2]);
\r
502 else if ((b2 > 0x1F) && (b2 < 0x30))
\r
503 decode(SecOp20[b2-0x20]);
\r
504 else if ((b2 > 0x7F) && (b2 < 0xC0))
\r
505 decode(SecOp80[b2-0x80]);
\r
507 xprintf("<bogus>");
\r
509 case 'e': /* t is part of reg name */
\r
512 if (t == 'w') /* put out "d" if t is "w" on 32 bit opsize */
\r
516 xprintf("E"); /* put out "E" if not t <> "w" then put t */
\r
524 case 'f': /* floating point */
\r
525 xprintf("<Float Op>");
\r
527 /* floating_point(t-'0'); */
\r
530 case 'g': /* do R/M group 'n' */
\r
531 decode(groups[t-'0'][reg(modrm())]);
\r
533 case 'p': /* Segment prefix */
\r
543 decode(opmap1[getbyte()]);
\r
547 xprintf("%cS:",prefix);
\r
551 decode(opmap1[getbyte()]);
\r
555 case 's': /* Size override */
\r
556 if (t=='o') { /* o is operand */
\r
557 opsize = 48 - opsize;
\r
558 decode(opmap1[getbyte()]);
\r
565 /******************************************
\r
566 This expands and outputs the instruction
\r
567 string passed in if it finds the escape
\r
569 ******************************************/
\r
571 static void decode(char *s)
\r
575 if (s == 0) /* if NULL pointer, then it's BAD */
\r
577 xprintf("<invalid>");
\r
579 while ((c = *s++) != 0) /* put next char in c */
\r
581 if (c == '~') /* if c is ~ then ESCAPE */
\r
583 c = *s++; /* get letter representing value */
\r
587 if (c == ' ') /* space */
\r
590 xprintf("%c",c); /* else put out the char found! */
\r
596 /* outputs 'scale-index-base' instructions */
\r
598 static void do_sib(int m)
\r
601 s = ((sib()) >> 6) & 7; /* SSxxxxxx Scale */
\r
602 i = ((sib()) >> 3) & 7; /* xxIIIxxx Index */
\r
603 b = sib() & 7; /* xxxxxBBB Base */
\r
606 case 0: decode("~p:[EAX"); break;
\r
607 case 1: decode("~p:[ECX"); break;
\r
608 case 2: decode("~p:[EDX"); break;
\r
609 case 3: decode("~p:[EBX"); break;
\r
610 case 4: decode("~p:[ESP"); break;
\r
615 ohex('d', 4, 0, 32);
\r
620 case 6: decode("~p:[ESI"); break;
\r
621 case 7: decode("~p:[EDI"); break;
\r
625 case 0: xprintf("+EAX"); break;
\r
626 case 1: xprintf("+ECX"); break;
\r
627 case 2: xprintf("+EDX"); break;
\r
628 case 3: xprintf("+EBX"); break;
\r
630 case 5: xprintf("+EBP"); break;
\r
631 case 6: xprintf("+ESI"); break;
\r
632 case 7: xprintf("+EDI"); break;
\r
638 case 1: xprintf("*2"); break;
\r
639 case 2: xprintf("*4"); break;
\r
640 case 3: xprintf("*8"); break;
\r
644 /*------------------------------------------------------------------------*/
\r
645 static void do_modrm(char t)
\r
650 m = ((modrm()) >> 6) & 7;
\r
658 if ((m == 0) && (r == 5))
\r
661 ohex('d', 4, 0, 32);
\r
671 case 0: xprintf("EAX"); break;
\r
672 case 1: xprintf("ECX"); break;
\r
673 case 2: xprintf("EDX"); break;
\r
674 case 3: xprintf("EBX"); break;
\r
675 case 4: do_sib(m); break;
\r
676 case 5: xprintf("EBP"); break;
\r
677 case 6: xprintf("ESI"); break;
\r
678 case 7: xprintf("EDI"); break;
\r
683 ohex('b', 4, 0, 32);
\r
687 ohex('v', 4, 0, 32);
\r
693 /***********************************************
\r
694 This disassembles one instruction each time it
\r
696 ************************************************/
\r
698 U32 disassemble(U32 Addr)
\r
703 opsize = SEGSIZE; /* default operand size is DWORD */
\r
706 xprintf( "%08x ", addrIn);
\r
708 decode(opmap1[getbyte()]); /* decode instruction and output */
\r