본문 바로가기

Programming/Linux

[리눅스][C언어] 배열은 포인터다

반응형

 

왜 포인터 데이터타입은 모두 8바이트인가?

 

 

[리눅스][C언어] 배열 포인터

배열은 붙어있다. 변수는 붙어있는 것이 보장되지 않는다. #include int main(){ int arryA[10]; for(int i=0; i root@DESKTOP-O5CM2RJ:/project/22-c_study/04-13# gcc arry.c root@DESKTOP-O5CM2RJ:/project/22-..

mrlazydev.tistory.com

여기에서 왜 포인터 앞 데이터타입들은 모두 8바이트인가, 동일하다면 다양한 데이터타입을 왜 사용해야 하는가에 대한 보충설명에서부터 시작한다.

 

#include <stdio.h>

#define MAX 100

int main(){

        int a;
        int *pa;

        int arry[1];

        pa = &a;
        *pa = 0x77;

        arry[0] = 0x55;




        return 1;
}
root@DESKTOP-O5CM2RJ:/project/22-c_study/04-13# gcc -g arry.c
root@DESKTOP-O5CM2RJ:/project/22-c_study/04-13# ls
a.out  arry.c

gcc -g arry.c 를 하면 디버그 모드로 컴파일을 해서, 파일에 디버그 정보를 넣게 된다.

 

root@DESKTOP-O5CM2RJ:/project/22-c_study/04-13# gdb a.out
GNU gdb (Ubuntu 8.1.1-0ubuntu1) 8.1.1
Copyright (C) 2018 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from a.out...done.
(gdb)

gdb a.out을 하고,

(gdb) diasa main으로 disassemble을 통해서 디버깅모드로 들어간다.

 

Dump of assembler code for function main:
   0x000000000000066a <+0>:     push   %rbp
   0x000000000000066b <+1>:     mov    %rsp,%rbp
   0x000000000000066e <+4>:     sub    $0x20,%rsp
   0x0000000000000672 <+8>:     mov    %fs:0x28,%rax
   0x000000000000067b <+17>:    mov    %rax,-0x8(%rbp)
   0x000000000000067f <+21>:    xor    %eax,%eax
   0x0000000000000681 <+23>:    lea    -0x1c(%rbp),%rax
   0x0000000000000685 <+27>:    mov    %rax,-0x18(%rbp)
   0x0000000000000689 <+31>:    mov    -0x18(%rbp),%rax
   0x000000000000068d <+35>:    movl   $0x77,(%rax)
   0x0000000000000693 <+41>:    movl   $0x55,-0xc(%rbp)
   0x000000000000069a <+48>:    mov    $0x1,%eax
   0x000000000000069f <+53>:    mov    -0x8(%rbp),%rdx
   0x00000000000006a3 <+57>:    xor    %fs:0x28,%rdx
   0x00000000000006ac <+66>:    je     0x6b3 <main+73>
   0x00000000000006ae <+68>:    callq  0x540 <__stack_chk_fail@plt>
   0x00000000000006b3 <+73>:    leaveq
   0x00000000000006b4 <+74>:    retq
End of assembler dump.

 

 $0x77,(%rax) 이 부분이 *pa = 0x77; 에 해당한다

 

rax 라는 레지스터에  a의 주소가 들어가 있다는 뜻이다.

그 사전 작업을 -0x18(%rbp),%rax에서 한 것이다.

 

*의 역할을  $0x77,(%rax)의 ()가 한다.

 

rbp는 스택

*pa = 0x77; =  $0x77,(%rax) 

arry[0] = 0x55; = $0x55,-0xc(%rbp)

 

 

16진수로 표현된 것을 10진수로 변경하면

1c = 28

18 = 24

c = 12

 

 

rbp - 28 == a

rbp - 24 == pa

rbp - 12 == arry

 

 

#include <stdio.h>

#define MAX 100

int main(){

        int arry[5];

        arry[3] = 0x55;

        *(arry + 3) = 0x77;


        return 1;
}
(gdb) disas main
Dump of assembler code for function main:
   0x000000000000066a <+0>:     push   %rbp
   0x000000000000066b <+1>:     mov    %rsp,%rbp
   0x000000000000066e <+4>:     sub    $0x20,%rsp
   0x0000000000000672 <+8>:     mov    %fs:0x28,%rax
   0x000000000000067b <+17>:    mov    %rax,-0x8(%rbp)
   0x000000000000067f <+21>:    xor    %eax,%eax
   0x0000000000000681 <+23>:    movl   $0x55,-0x14(%rbp)
   0x0000000000000688 <+30>:    movl   $0x77,-0x14(%rbp)
   0x000000000000068f <+37>:    mov    $0x1,%eax
   0x0000000000000694 <+42>:    mov    -0x8(%rbp),%rdx
   0x0000000000000698 <+46>:    xor    %fs:0x28,%rdx
   0x00000000000006a1 <+55>:    je     0x6a8 <main+62>
   0x00000000000006a3 <+57>:    callq  0x540 <__stack_chk_fail@plt>
   0x00000000000006a8 <+62>:    leaveq
   0x00000000000006a9 <+63>:    retq
End of assembler dump.

 

배열은 변수 시작주소값을 arry[0]에 넣게 되고 1씩 증가할 때마다, 4바이트씩 증가하게 된다.

 

위 C코드에서 arry[3]은  포인터로 선언된 *(arry +3)와 동일한 주소를 가지고 있음을 알 수 있다.

 

즉 배열은 원래 포인터였던 것이다.

 

#include <stdio.h>

#define MAX 100

int main(){

        int arry[5];

        arry[3] = 0x55;
        *(arry + 3) = 0x77;

        char arry_c[5];

        arry_c[3] = 0x44;
        *(arry_c + 3) = 0x99;



        return 1;
}
Dump of assembler code for function main:
   0x000000000000066a <+0>:     push   %rbp
   0x000000000000066b <+1>:     mov    %rsp,%rbp
   0x000000000000066e <+4>:     sub    $0x30,%rsp
   0x0000000000000672 <+8>:     mov    %fs:0x28,%rax
   0x000000000000067b <+17>:    mov    %rax,-0x8(%rbp)
   0x000000000000067f <+21>:    xor    %eax,%eax
   0x0000000000000681 <+23>:    movl   $0x55,-0x24(%rbp)
   0x0000000000000688 <+30>:    movl   $0x77,-0x24(%rbp)
   0x000000000000068f <+37>:    movb   $0x44,-0xa(%rbp)
   0x0000000000000693 <+41>:    movb   $0x99,-0xa(%rbp)
   0x0000000000000697 <+45>:    mov    $0x1,%eax
   0x000000000000069c <+50>:    mov    -0x8(%rbp),%rdx
   0x00000000000006a0 <+54>:    xor    %fs:0x28,%rdx
   0x00000000000006a9 <+63>:    je     0x6b0 <main+70>
   0x00000000000006ab <+65>:    callq  0x540 <__stack_chk_fail@plt>
   0x00000000000006b0 <+70>:    leaveq
   0x00000000000006b1 <+71>:    retq
End of assembler dump.

똑같은 배열이더라도, 데이터타입에 따라 더해야 하는 값이 달라진다.

 

#include <stdio.h>

#define MAX 100

int main(){

        int arry[5];

        *(arry + 2) = 0x76;
        *(arry + 3) = 0x77;
        *(arry + 4) = 0x78;

        char arry_c[5];

        *(arry_c + 2) = 0x91;
        *(arry_c + 3) = 0x92;
        *(arry_c + 4) = 0x93;



        return 1;
}
(gdb) disas main
Dump of assembler code for function main:
   0x000000000000066a <+0>:     push   %rbp
   0x000000000000066b <+1>:     mov    %rsp,%rbp
   0x000000000000066e <+4>:     sub    $0x30,%rsp
   0x0000000000000672 <+8>:     mov    %fs:0x28,%rax
   0x000000000000067b <+17>:    mov    %rax,-0x8(%rbp)
   0x000000000000067f <+21>:    xor    %eax,%eax
   0x0000000000000681 <+23>:    movl   $0x76,-0x28(%rbp)
   0x0000000000000688 <+30>:    movl   $0x77,-0x24(%rbp)
   0x000000000000068f <+37>:    movl   $0x78,-0x20(%rbp)
   0x0000000000000696 <+44>:    movb   $0x91,-0xb(%rbp)
   0x000000000000069a <+48>:    movb   $0x92,-0xa(%rbp)
   0x000000000000069e <+52>:    movb   $0x93,-0x9(%rbp)
   0x00000000000006a2 <+56>:    mov    $0x1,%eax
   0x00000000000006a7 <+61>:    mov    -0x8(%rbp),%rdx
   0x00000000000006ab <+65>:    xor    %fs:0x28,%rdx
   0x00000000000006b4 <+74>:    je     0x6bb <main+81>
   0x00000000000006b6 <+76>:    callq  0x540 <__stack_chk_fail@plt>
   0x00000000000006bb <+81>:    leaveq
   0x00000000000006bc <+82>:    retq
End of assembler dump.

이걸 보면 알 수 있듯이, 메모리 공간에서 int형은 4바이트씩, char형은 1씩 줄어들고 있는 걸 확인할 수 있다.

 

 

주소 자체는 4바이트고 8바이트이지만, 각 데이터타입마다 배열에서 증가하는 크기가 다르기 때문에 컴파일러에게 미리 어떤 데이터타입인지 알려줌으로써 일을 쉽게 하기 위해, 포인터 변수 앞에도 데이터타입을 적는다.

 

 

 

 

 

 

 

 

 

 

 

 

반응형