drbd问题请教各位高手指点,这是什么问题怎么解决?

etracer 2012-02-14 04:29:37
root@file2 d]# service drbd start
Starting DRBD resources: [
create res: r0
prepare disk: r0
adjust disk: r0:failed(attach:10)
adjust net: r0
]
unable to join drbd events multicast group
.
[root@file2 d]# tail /var/log/messages
Feb 15 02:19:58 file2 kernel: d-con r0: conn( StandAlone -> Unconnected )
Feb 15 02:19:58 file2 kernel: d-con r0: Starting receiver thread (from drbd_w_r0 [5060])
Feb 15 02:19:58 file2 kernel: d-con r0: receiver (re)started
Feb 15 02:19:58 file2 kernel: d-con r0: conn( Unconnected -> WFConnection )
Feb 15 02:19:58 file2 kernel: d-con r0: Handshake successful: Agreed network protocol version 100
Feb 15 02:19:58 file2 kernel: d-con r0: Peer authenticated using 20 bytes HMAC
Feb 15 02:19:58 file2 kernel: d-con r0: conn( WFConnection -> WFReportParams )
Feb 15 02:19:58 file2 kernel: d-con r0: Starting asender thread (from drbd_r_r0 [5062])
Feb 15 02:19:58 file2 kernel: block drbd1: max BIO size = 4096
Feb 15 02:19:58 file2 kernel: block drbd1: peer( Unknown -> Primary ) conn( WFReportParams -> Connected ) pdsk( DUnknown -> UpToDate )
[root@file2 d]# service drbd status
drbd driver loaded OK; device status:
version: 8.4.1 (api:1/proto:86-100)
GIT-hash: 91b4c048c1a0e06777b5f65d312b38d47abaea80 build by root@file2, 2012-02-14 23:40:48
m:res cs ro ds p mounted fstype
1:r0 Connected Secondary/Primary Diskless/UpToDate C
[root@file2 d]#
可能是他unable to join drbd events multicast group的原因我的文件同步不成功
[root@file1 d]# service drbd status
drbd driver loaded OK; device status:
version: 8.4.1 (api:1/proto:86-100)
GIT-hash: 91b4c048c1a0e06777b5f65d312b38d47abaea80 build by root@file1, 2012-02-14 02:42:36
m:res cs ro ds p mounted fstype
1:r0 Connected Primary/Secondary UpToDate/Diskless C /d ext3
[root@file1 d]#
...全文
630 点赞 收藏 13
写回复
13 条回复
切换为时间正序
当前发帖距今超过3年,不再开放新的回复
发表回复
etracer 2012-02-16
上面问题解决了,出现新的问题,adjust disk: r0:failed(attach:10),当我设置drbd的大小的时候出现如下问题,我的硬盘2G,
我设置固定大小为1G
[root@file2 ~]# cat /etc/drbd.conf
global{
usage-count yes;
}
common{
syncer{ rate 1M; }
}
resource r0{
protocol C;
disk{
size 1G;
}

net{
cram-hmac-alg sha1;
shared-secret "FooFunFactory";
}
on file1 {
device /dev/drbd1;
disk /dev/sdb1;
address 192.168.1.29:7789;
meta-disk internal;
}
on file2{
device /dev/drbd1;
disk /dev/sdb1;
address 192.168.1.30:7789;
meta-disk internal;
}
}
-----------从--------
[root@file2 ~]# service drbd start
Starting DRBD resources: [
create res: r0
prepare disk: r0
adjust disk: r0:failed(attach:10)
adjust net: r0
]
unable to join drbd events multicast group
.
[root@file2 ~]#
[root@file2 ~]# cat /proc/drbd
version: 8.4.1 (api:1/proto:86-100)
GIT-hash: 91b4c048c1a0e06777b5f65d312b38d47abaea80 build by root@localhost.localdomain, 2012-02-16 03:27:36

1: cs:Connected ro:Secondary/Primary ds:Diskless/UpToDate C r-----
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
[root@file2 ~]#
logic
[root@file2 ~]# tail /var/log/messages
Feb 16 18:43:38 file2 kernel: d-con r0: Peer authenticated using 20 bytes HMAC
Feb 16 18:43:38 file2 kernel: d-con r0: conn( WFConnection -> WFReportParams )
Feb 16 18:43:38 file2 kernel: d-con r0: Starting asender thread (from drbd_r_r0 [4697])
Feb 16 18:43:38 file2 kernel: block drbd1: max BIO size = 4096
Feb 16 18:43:38 file2 kernel: block drbd1: peer( Unknown -> Secondary ) conn( WFReportParams -> Connected ) pdsk( DUnknown -> UpToDate )
Feb 16 18:45:10 file2 avahi-daemon[4245]: Invalid query packet.
Feb 16 18:45:50 file2 last message repeated 11 times
Feb 16 18:48:34 file2 avahi-daemon[4245]: Invalid query packet.
Feb 16 18:49:14 file2 last message repeated 7 times
Feb 16 18:49:21 file2 kernel: block drbd1: peer( Secondary -> Primary )
[root@file2 ~]# fdisk -l

Disk /dev/sda: 21.4 GB, 21474836480 bytes
255 heads, 63 sectors/track, 2610 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes

Device Boot Start End Blocks Id System
/dev/sda1 * 1 13 104391 83 Linux
/dev/sda2 14 2610 20860402+ 8e Linux LVM

Disk /dev/sdb: 2147 MB, 2147483648 bytes
255 heads, 63 sectors/track, 261 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes

Device Boot Start End Blocks Id System
/dev/sdb1 261 261 8032+ 83 Linux
[root@file2 ~]#
-------主-----
[root@file1 ~]# service drbd start
Starting DRBD resources: [
create res: r0
prepare disk: r0
adjust disk: r0
adjust net: r0
]
unable to join drbd events multicast group
[root@file1 ~]#
[root@file1 ~]# cat /proc/drbd
version: 8.4.1 (api:1/proto:86-100)
GIT-hash: 91b4c048c1a0e06777b5f65d312b38d47abaea80 build by root@localhost.localdomain, 2012-02-16 01:58:48

1: cs:Connected ro:Primary/Secondary ds:UpToDate/Diskless C r-----
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:8

log
[root@file1 ~]# tail /var/log/messages
Feb 16 18:43:37 file1 kernel: d-con r0: conn( WFConnection -> WFReportParams )
Feb 16 18:43:37 file1 kernel: d-con r0: Starting asender thread (from drbd_r_r0 [4699])
Feb 16 18:43:37 file1 kernel: block drbd1: Requested disk size is too big (1048576 > 7992)
Feb 16 18:43:37 file1 kernel: block drbd1: Requested disk size is too big (1048576 > 7992)
Feb 16 18:43:37 file1 kernel: block drbd1: peer( Unknown -> Secondary ) conn( WFReportParams -> Connected ) pdsk( DUnknown -> Diskless )
Feb 16 18:45:09 file1 avahi-daemon[4317]: Invalid query packet.
Feb 16 18:45:49 file1 last message repeated 11 times
Feb 16 18:48:33 file1 avahi-daemon[4317]: Invalid query packet.
Feb 16 18:49:13 file1 last message repeated 7 times
Feb 16 18:49:20 file1 kernel: block drbd1: role( Secondary -> Primary )
[root@file1 ~]# fdisk -l

Disk /dev/sda: 21.4 GB, 21474836480 bytes
255 heads, 63 sectors/track, 2610 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes

Device Boot Start End Blocks Id System
/dev/sda1 * 1 13 104391 83 Linux
/dev/sda2 14 2610 20860402+ 8e Linux LVM

Disk /dev/sdb: 2147 MB, 2147483648 bytes
255 heads, 63 sectors/track, 261 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes

Device Boot Start End Blocks Id System
/dev/sdb1 1 261 2096451 83 Linux
--------------------------
回复
etracer 2012-02-16
对,感谢您的帮助,我也找到原因了,是我对fdisk用法忘了,结果乱分区,分了个8M后来用固定大小1G的时候出错了,另外,我问您的unable to join drbd events multicast group个问题,您说的对,不过我这里升级了内核,是2.6.32.27的,可能是我升级的时候哪里没做好,才提示的,不过这个不影响我的功能,总之谢谢您的解答,我的问题才得以解决,太感谢您了,感谢
回复
Liv2005 2012-02-16
unable to join drbd events multicast group
这个查了下源代码

 
1357 if (cm->continuous_poll) {
1358 if (genl_join_mc_group(drbd_sock, "events") &&
1359 !kernel_older_than(2, 6, 23)) {
1360 fprintf(stderr, "unable to join drbd events multicast group\n");
1361 return 20;
1362 }
1363 }

好像是内核版本要高于2.6.23

至于 adjust disk: r0:failed(attach:10)
从主日志上来看
Feb 16 18:43:37 file1 kernel: block drbd1: Requested disk size is too big (1048576 > 7992)
Feb 16 18:43:37 file1 kernel: block drbd1: peer( Unknown -> Secondary ) conn( WFReportParams -> Connected ) pdsk( DUnknown -> Diskless )

似乎是主和从两边resource限制不一致? 或者是从服务器分区中有一些不一致的内容? 或是硬盘设置方面不一致?


Device Boot Start End Blocks Id System
/dev/sdb1 261 261 8032+ 83 Linux


Device Boot Start End Blocks Id System
/dev/sdb1 1 261 2096451 83 Linux
回复
Liv2005 2012-02-15
一般是drbd+heartbeat是吧 ? heartbeat的日志看下工作正常否
回复
etracer 2012-02-15
[root@localhost /]# cat /etc/drbd.conf该文件主从都是一样的
global{
usage-count yes;
}
common{
syncer{ rate 1M; }
}
resource r0{
protocol C;
net{
cram-hmac-alg sha1;
shared-secret "FooFunFactory";
}
on file1 {
device /dev/drbd1;
disk /dev/sdb1;
address 192.168.1.29:7789;
meta-disk internal;
}
on file2{
device /dev/drbd1;
disk /dev/sdb1;
address 192.168.1.30:7789;
meta-disk internal;
}
}
[root@localhost /]#
------------------------------

[root@localhost pgdir]# cat /proc/drbd
version: 8.4.1 (api:1/proto:86-100)
GIT-hash: 91b4c048c1a0e06777b5f65d312b38d47abaea80 build by root@localhost.localdomain, 2012-02-16 01:58:48

1: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r-----
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
[root@localhost pgdir]#
[root@localhost pgdir]# tail /var/log/messages
Feb 16 04:05:12 localhost kernel: block drbd1: helper command: /sbin/drbdadm before-resync-source minor-1
Feb 16 04:05:12 localhost kernel: block drbd1: helper command: /sbin/drbdadm before-resync-source minor-1 exit code 0 (0x0)
Feb 16 04:05:12 localhost kernel: block drbd1: conn( WFBitMapS -> SyncSource ) pdsk( Consistent -> Inconsistent )
Feb 16 04:05:12 localhost kernel: block drbd1: Began resync as SyncSource (will sync 0 KB [0 bits set]).
Feb 16 04:05:12 localhost kernel: block drbd1: updated sync UUID 068ECC59815619C9:1FBCEA3D044B6D75:1FBBEA3D044B6D75:22404C81633AF8BF
Feb 16 04:05:12 localhost kernel: block drbd1: Resync done (total 1 sec; paused 0 sec; 0 K/sec)
Feb 16 04:05:12 localhost kernel: block drbd1: updated UUIDs 068ECC59815619C9:0000000000000000:1FBCEA3D044B6D75:1FBBEA3D044B6D75
Feb 16 04:05:12 localhost kernel: block drbd1: conn( SyncSource -> Connected ) pdsk( Inconsistent -> UpToDate )
Feb 16 04:05:12 localhost kernel: block drbd1: bitmap WRITE of 0 pages took 0 jiffies
Feb 16 04:05:13 localhost kernel: block drbd1: 0 KB (0 bits) marked out-of-sync by on disk bit-map.
[root@localhost pgdir]#
------------------

[root@localhost /]# cat /proc/drbd
version: 8.4.1 (api:1/proto:86-100)
GIT-hash: 91b4c048c1a0e06777b5f65d312b38d47abaea80 build by root@localhost.localdomain, 2012-02-16 03:27:36

1: cs:Connected ro:Secondary/Primary ds:UpToDate/UpToDate C r-----
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
[root@localhost /]# tail /var/log/messages
Feb 16 04:05:13 localhost kernel: block drbd1: helper command: /sbin/drbdadm before-resync-target minor-1 exit code 0 (0x0)
Feb 16 04:05:14 localhost kernel: block drbd1: conn( WFSyncUUID -> SyncTarget ) disk( Outdated -> Inconsistent )
Feb 16 04:05:14 localhost kernel: block drbd1: Began resync as SyncTarget (will sync 0 KB [0 bits set]).
Feb 16 04:05:14 localhost kernel: block drbd1: Resync done (total 1 sec; paused 0 sec; 0 K/sec)
Feb 16 04:05:14 localhost kernel: block drbd1: updated UUIDs 068ECC59815619C8:0000000000000000:1FBCEA3D044B6D74:1FBBEA3D044B6D75
Feb 16 04:05:14 localhost kernel: block drbd1: conn( SyncTarget -> Connected ) disk( Inconsistent -> UpToDate )
Feb 16 04:05:14 localhost kernel: block drbd1: helper command: /sbin/drbdadm after-resync-target minor-1
Feb 16 04:05:14 localhost kernel: block drbd1: helper command: /sbin/drbdadm after-resync-target minor-1 exit code 0 (0x0)
Feb 16 04:05:14 localhost kernel: block drbd1: bitmap WRITE of 0 pages took 0 jiffies
Feb 16 04:05:14 localhost kernel: block drbd1: 0 KB (0 bits) marked out-of-sync by on disk bit-map.
[root@localhost /]#
[root@localhost /]# iptables -L
Chain INPUT (policy ACCEPT)
target prot opt source destination

Chain FORWARD (policy ACCEPT)
target prot opt source destination

Chain OUTPUT (policy ACCEPT)
target prot opt source destination
[root@localhost /]#
[root@localhost /]# cat /etc/selinux/config
# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
# enforcing - SELinux security policy is enforced.
# permissive - SELinux prints warnings instead of enforcing.
# disabled - SELinux is fully disabled.
SELINUX=disabled
# SELINUXTYPE= type of policy in use. Possible values are:
# targeted - Only targeted network daemons are protected.
# strict - Full SELinux protection.
SELINUXTYPE=targeted
[root@localhost /]#
回复
etracer 2012-02-15
感谢你的耐心指导,我又重新安装呢,一会贴上来哦(估计要明天了,在编译内核呢,我的系统centos5.7(2.6.18-274.el5xen)升级后是2.6.32.27),谢谢你,太谢谢你了,明天一定过来帮帮我哦,我给公司出的主意,结果被自己难住了
回复
Liv2005 2012-02-15
单独安装ledrbd是么?
主备机器上的drdb.conf 和 /proc/drbd 的内容都贴来看下
回复
etracer 2012-02-15
哦,还有nfs服务
回复
etracer 2012-02-15
是的,但是我还没有安装heartbeat呢,只是刚刚做完drbd
回复
etracer 2012-02-14
这个结果,每台看懂,不过感觉好像在互相沟通,为什么不能同步数据呢?

[root@file2 d]# service drbd start
Starting DRBD resources: [
adjust disk: r0:failed(attach:10)
]
unable to join drbd events multicast group
.
[root@file2 d]# tcpdump host 192.168.1.25 and 192.168.1.24
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 96 bytes
03:18:28.195881 IP file1.58627 > file2.7898: P 4191289021:4191289037(16) ack 4222832139 win 183 <nop,nop,timestamp 1621701 1621665>
03:18:28.196425 IP file2.7898 > file1.58627: P 1:17(16) ack 16 win 181 <nop,nop,timestamp 1624164 1621701>
03:18:28.196679 IP file1.58627 > file2.7898: . ack 17 win 183 <nop,nop,timestamp 1621701 1624164>
03:18:38.195663 IP file1.58627 > file2.7898: P 16:32(16) ack 17 win 183 <nop,nop,timestamp 1624201 1624164>
03:18:38.195809 IP file2.7898 > file1.58627: P 17:33(16) ack 32 win 181 <nop,nop,timestamp 1626664 1624201>
03:18:38.196048 IP file1.58627 > file2.7898: . ack 33 win 183 <nop,nop,timestamp 1624201 1626664>
03:18:48.195478 IP file2.7898 > file1.58627: P 33:49(16) ack 32 win 181 <nop,nop,timestamp 1629164 1624201>
03:18:48.195547 IP file1.58627 > file2.7898: P 32:48(16) ack 33 win 183 <nop,nop,timestamp 1626701 1626664>
03:18:48.195576 IP file2.7898 > file1.58627: P 49:65(16) ack 48 win 181 <nop,nop,timestamp 1629164 1626701>
03:18:48.195622 IP file1.58627 > file2.7898: . ack 49 win 183 <nop,nop,timestamp 1626701 1629164>
03:18:48.195748 IP file1.58627 > file2.7898: . ack 65 win 183 <nop,nop,timestamp 1626701 1629164>
03:18:48.195755 IP file1.58627 > file2.7898: P 48:64(16) ack 65 win 183 <nop,nop,timestamp 1626701 1629164>
03:18:48.235348 IP file2.7898 > file1.58627: . ack 64 win 181 <nop,nop,timestamp 1629174 1626701>
03:18:48.937509 IP file1.7898 > file2.snip-slave: P 4190016682:4190016846(164) ack 4226838819 win 215 <nop,nop,timestamp 1626886 1236731>
03:18:48.937643 IP file2.snip-slave > file1.7898: . ack 164 win 250 <nop,nop,timestamp 1629349 1626886>
03:18:58.195369 IP file1.58627 > file2.7898: P 64:80(16) ack 65 win 183 <nop,nop,timestamp 1629201 1629174>
03:18:58.195461 IP file2.7898 > file1.58627: . ack 80 win 181 <nop,nop,timestamp 1631663 1629201>
03:18:58.195533 IP file2.7898 > file1.58627: P 65:81(16) ack 80 win 181 <nop,nop,timestamp 1631663 1629201>
03:18:58.235323 IP file1.58627 > file2.7898: . ack 81 win 183 <nop,nop,timestamp 1629211 1631663>
03:19:08.195009 IP file2.7898 > file1.58627: P 81:97(16) ack 80 win 181 <nop,nop,timestamp 1634163 1629211>
03:19:08.195090 IP file1.58627 > file2.7898: P 80:96(16) ack 81 win 183 <nop,nop,timestamp 1631701 1631663>
03:19:08.195119 IP file2.7898 > file1.58627: P 97:113(16) ack 96 win 181 <nop,nop,timestamp 1634163 1631701>
03:19:08.195165 IP file1.58627 > file2.7898: . ack 97 win 183 <nop,nop,timestamp 1631701 1634163>
03:19:08.195273 IP file1.58627 > file2.7898: . ack 113 win 183 <nop,nop,timestamp 1631701 1634163>
03:19:08.195280 IP file1.58627 > file2.7898: P 96:112(16) ack 113 win 183 <nop,nop,timestamp 1631701 1634163>
03:19:08.235063 IP file2.7898 > file1.58627: . ack 112 win 181 <nop,nop,timestamp 1634173 1631701>
03:19:18.194936 IP file2.7898 > file1.58627: P 113:129(16) ack 112 win 181 <nop,nop,timestamp 1636663 1631701>
03:19:18.195043 IP file1.58627 > file2.7898: P 112:128(16) ack 113 win 183 <nop,nop,timestamp 1634201 1634173>
03:19:18.195062 IP file2.7898 > file1.58627: . ack 128 win 181 <nop,nop,timestamp 1636663 1634201>
03:19:18.195119 IP file1.58627 > file2.7898: P 128:144(16) ack 129 win 183 <nop,nop,timestamp 1634201 1636663>
03:19:18.195127 IP file2.7898 > file1.58627: . ack 144 win 181 <nop,nop,timestamp 1636663 1634201>
03:19:18.195193 IP file2.7898 > file1.58627: P 129:145(16) ack 144 win 181 <nop,nop,timestamp 1636663 1634201>
03:19:18.235008 IP file1.58627 > file2.7898: . ack 145 win 183 <nop,nop,timestamp 1634211 1636663>
03:19:28.194827 IP file1.58627 > file2.7898: P 144:160(16) ack 145 win 183 <nop,nop,timestamp 1636701 1636663>
03:19:28.194925 IP file2.7898 > file1.58627: P 145:161(16) ack 160 win 181 <nop,nop,timestamp 1639162 1636701>
03:19:28.195123 IP file1.58627 > file2.7898: . ack 161 win 183 <nop,nop,timestamp 1636701 1639162>
03:19:38.194614 IP file2.7898 > file1.58627: P 161:177(16) ack 160 win 181 <nop,nop,timestamp 1641662 1636701>
03:19:38.194699 IP file1.58627 > file2.7898: P 160:176(16) ack 161 win 183 <nop,nop,timestamp 1639201 1639162>
03:19:38.194729 IP file2.7898 > file1.58627: P 177:193(16) ack 176 win 181 <nop,nop,timestamp 1641662 1639201>
03:19:38.194775 IP file1.58627 > file2.7898: . ack 177 win 183 <nop,nop,timestamp 1639201 1641662>
03:19:38.194923 IP file1.58627 > file2.7898: . ack 193 win 183 <nop,nop,timestamp 1639201 1641662>
03:19:38.194930 IP file1.58627 > file2.7898: P 176:192(16) ack 193 win 183 <nop,nop,timestamp 1639201 1641662>
03:19:38.234636 IP file2.7898 > file1.58627: . ack 192 win 181 <nop,nop,timestamp 1641672 1639201>
03:19:48.194501 IP file2.7898 > file1.58627: P 193:209(16) ack 192 win 181 <nop,nop,timestamp 1644162 1639201>
03:19:48.194588 IP file1.58627 > file2.7898: P 192:208(16) ack 193 win 183 <nop,nop,timestamp 1641701 1641672>
03:19:48.194607 IP file2.7898 > file1.58627: . ack 208 win 181 <nop,nop,timestamp 1644162 1641701>
03:19:48.194676 IP file1.58627 > file2.7898: P 208:224(16) ack 209 win 183 <nop,nop,timestamp 1641701 1644162>
03:19:48.194685 IP file2.7898 > file1.58627: . ack 224 win 181 <nop,nop,timestamp 1644162 1641701>
03:19:48.194740 IP file2.7898 > file1.58627: P 209:225(16) ack 224 win 181 <nop,nop,timestamp 1644162 1641701>
03:19:48.234638 IP file1.58627 > file2.7898: . ack 225 win 183 <nop,nop,timestamp 1641711 1644162>
回复
etracer 2012-02-14
[Quote=引用 1 楼 liv2005 的回复:]

检查两台机器之间的iptables设定

先用tcpdump抓包看看drbd会向哪个地址发送广播包,然后在两台机器的iptables中加入该地址的ACCEPT
[/Quote]我试试看
回复
etracer 2012-02-14
iptables关掉了
回复
Liv2005 2012-02-14
检查两台机器之间的iptables设定

先用tcpdump抓包看看drbd会向哪个地址发送广播包,然后在两台机器的iptables中加入该地址的ACCEPT
回复
相关推荐
发帖
系统维护与使用区
创建于2007-08-27

1.9w+

社区成员

系统使用、管理、维护问题。可以是Ubuntu, Fedora, Unix等等
申请成为版主
帖子事件
创建了帖子
2012-02-14 04:29
社区公告
暂无公告