Hi,
#Summary
I have what appears to be a working Raid Z1 (3 x 1TB rdm) on a FreeNAS-8.0.4-RELEASE-p3-x64 (11703) virtual machine.
However, when testing a failed drive scenario it is not identified by Freenas. All disks are reported as online.
#System Details
Mobo- Gigabyte Z68AP-D3, CPU - i7 2600, RAM - 24GB
VMware ESXi 5.0.0 build-721882
FreeNAS-8.0.4-RELEASE-p3-x64 (11703)
#rdm disk setup for vdev in ZFS pool
vmkfstools -z /vmfs/devices/disks/t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818 /vmfs/volumes/Disk0-120GB-SSD/rdm/D4-1TB.vmdk -a lsilogic
vmkfstools -z /vmfs/devices/disks/t10.ATA_____WDC_WD10EACS2D00D6B0__________________________WD2DWCAU42711813 /vmfs/volumes/Disk0-120GB-SSD/rdm/D5-1TB.vmdk -a lsilogic
vmkfstools -z /vmfs/devices/disks/t10.ATA_____ST31000520AS________________________________________6VX0KLFG /vmfs/volumes/Disk0-120GB-SSD/rdm/D6-1TB.vmdk -a lsilogic
#Testing
dd if=/dev/zero of=output2.img bs=8k count=256k
dd if=/dev/random of=outputrandom2.img bs=8k count=256k
#2GB file test no prob.
[root@FreeNAS] /mnt/Datapool1# dd if=/dev/zero of=output2.img bs=8k count=256k
262144+0 records in
262144+0 records out
2147483648 bytes transferred in 71.431505 secs (30063536 bytes/sec)
dd if=/dev/zero of=output2.img bs=8k count=10240k
dd if=/dev/random of=outputrandom2.img bs=8k count=10240k
#After some ZFS tuning an 80GB file test and 2 x 80GB simultaniously written files test completes without a problem.
#quite standard ZFS tuning, based on analysis of many posts and testing.
#Testing Scenario - Failed disk
#Before
[root@FreeNAS] /mnt/Datapool1# zpool status
pool: Datapool1
state: ONLINE
scrub: none requested
config:
NAME STATE READ WRITE CKSUM
Datapool1 ONLINE 0 0 0
raidz1 ONLINE 0 0 0
da1p2 ONLINE 0 0 0
da2p2 ONLINE 0 0 0
da3p2 ONLINE 0 0 0
errors: No known data errors
#Physically disconnect disk
#After
FreeNAS
[root@FreeNAS] /var/log# cat messages
Jun 28 23:36:51 FreeNAS kernel: mpt1: request 0xffffff80003da170:42337 timed out for ccb 0xffffff01521b3800 (req->ccb 0xffffff01521b3800)
Jun 28 23:36:51 FreeNAS kernel: mpt1: attempting to abort req 0xffffff80003da170:42337 function 0
Jun 28 23:36:51 FreeNAS kernel:
Jun 28 23:36:51 FreeNAS kernel: mpt1: completing timedout/aborted req 0xffffff80003da170:42337
Jun 28 23:36:51 FreeNAS kernel: mpt1: abort of req 0xffffff80003da170:0 completed
****Appears to repeat in log****
[root@FreeNAS] /mnt/Datapool1# zpool status
pool: Datapool1
state: ONLINE
scrub: none requested
config:
NAME STATE READ WRITE CKSUM
Datapool1 ONLINE 0 0 0
raidz1 ONLINE 0 0 0
da1p2 ONLINE 0 0 0
da2p2 ONLINE 0 0 0
da3p2 ONLINE 0 0 0
errors: No known data errors
###VMware###
2012-06-28T13:36:12.110Z cpu2:4396)NMP: nmp_ThrottleLogForDevice:2318: Cmd 0x2a (0x412400721440, 4396) to dev "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" on path "vmhba0:C0:T1:L0" Failed: H:0x0 D:0x2 P:0x0 Valid sense da$
2012-06-28T13:36:12.111Z cpu2:4396)ScsiDeviceIO: 2309: Cmd(0x412400721440) 0x2a, CmdSN 0x8002001f from world 4396 to dev "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" failed H:0x0 D:0x2 P:0x0 Valid sense data: 0xb 0x0 0x0.
2012-06-28T13:36:12.114Z cpu6:2171)WARNING: NMP: nmp_IssueCommandToDevice:2954:I/O could not be issued to device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" due to Not found
2012-06-28T13:36:12.114Z cpu6:2171)WARNING: NMP: nmp_DeviceRetryCommand:133:Device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818": awaiting fast path state update for failover with I/O blocked. No prior reservation e
2012-06-28T13:36:12.114Z cpu6:2171)WARNING: NMP: nmp_DeviceStartLoop:721:NMP Device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" is blocked. Not starting I/O from device.
2012-06-28T13:36:12.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:599:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - issuing command 0x4124007b81c0
2012-06-28T13:36:12.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:658:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - failed to issue command due to Not found (APD), try again...
2012-06-28T13:36:12.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:708:Logical device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818": awaiting fast path state update...
2012-06-28T13:36:13.891Z cpu0:2665)WARNING: NMP: nmpDeviceAttemptFailover:599:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - issuing command 0x4124007b81c0
2012-06-28T13:36:13.891Z cpu0:2665)WARNING: NMP: nmpDeviceAttemptFailover:658:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - failed to issue command due to Not found (APD), try again...
2012-06-28T13:36:13.891Z cpu0:2665)WARNING: NMP: nmpDeviceAttemptFailover:708:Logical device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818": awaiting fast path state update...
2012-06-28T13:36:43.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:599:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - issuing command 0x4124007b81c0
2012-06-28T13:36:43.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:658:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - failed to issue command due to Not found (APD), try again...
2012-06-28T13:36:43.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:708:Logical device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818": awaiting fast path state update...
2012-06-28T13:36:52.116Z cpu0:2177)ScsiDeviceIO: 2322: Cmd(0x4124007b81c0) 0x12, CmdSN 0x1362 from world 0 to dev "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" failed H:0x5 D:0x0 P:0x0 Possible sense data: 0x5 0x24 0x0.
2012-06-28T13:36:52.116Z cpu0:2177)WARNING: NMP: nmp_DeviceStartLoop:721:NMP Device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" is blocked. Not starting I/O from device.
2012-06-28T13:36:52.891Z cpu4:14420)WARNING: NMP: nmpDeviceAttemptFailover:562:Retry world restore device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - no more commands to retry
2012-06-28T13:36:52.891Z cpu4:14420)WARNING: NMP: nmp_IssueCommandToDevice:2954:I/O could not be issued to device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" due to Not found
2012-06-28T13:36:52.891Z cpu4:14420)WARNING: NMP: nmp_DeviceRetryCommand:133:Device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818": awaiting fast path state update for failover with I/O blocked. No prior reservation
****Appears to repeat in log****
#Moving forward
What other information can I gather for trouble shooting?
Has any one else encountered this behaviour?
Has any one else performed the failure scenario successfully?
My system is in the testing phase, happy to crash and burn.
Thanks in advance.
#Summary
I have what appears to be a working Raid Z1 (3 x 1TB rdm) on a FreeNAS-8.0.4-RELEASE-p3-x64 (11703) virtual machine.
However, when testing a failed drive scenario it is not identified by Freenas. All disks are reported as online.
#System Details
Mobo- Gigabyte Z68AP-D3, CPU - i7 2600, RAM - 24GB
VMware ESXi 5.0.0 build-721882
FreeNAS-8.0.4-RELEASE-p3-x64 (11703)
#rdm disk setup for vdev in ZFS pool
vmkfstools -z /vmfs/devices/disks/t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818 /vmfs/volumes/Disk0-120GB-SSD/rdm/D4-1TB.vmdk -a lsilogic
vmkfstools -z /vmfs/devices/disks/t10.ATA_____WDC_WD10EACS2D00D6B0__________________________WD2DWCAU42711813 /vmfs/volumes/Disk0-120GB-SSD/rdm/D5-1TB.vmdk -a lsilogic
vmkfstools -z /vmfs/devices/disks/t10.ATA_____ST31000520AS________________________________________6VX0KLFG /vmfs/volumes/Disk0-120GB-SSD/rdm/D6-1TB.vmdk -a lsilogic
#Testing
dd if=/dev/zero of=output2.img bs=8k count=256k
dd if=/dev/random of=outputrandom2.img bs=8k count=256k
#2GB file test no prob.
[root@FreeNAS] /mnt/Datapool1# dd if=/dev/zero of=output2.img bs=8k count=256k
262144+0 records in
262144+0 records out
2147483648 bytes transferred in 71.431505 secs (30063536 bytes/sec)
dd if=/dev/zero of=output2.img bs=8k count=10240k
dd if=/dev/random of=outputrandom2.img bs=8k count=10240k
#After some ZFS tuning an 80GB file test and 2 x 80GB simultaniously written files test completes without a problem.
#quite standard ZFS tuning, based on analysis of many posts and testing.
#Testing Scenario - Failed disk
#Before
[root@FreeNAS] /mnt/Datapool1# zpool status
pool: Datapool1
state: ONLINE
scrub: none requested
config:
NAME STATE READ WRITE CKSUM
Datapool1 ONLINE 0 0 0
raidz1 ONLINE 0 0 0
da1p2 ONLINE 0 0 0
da2p2 ONLINE 0 0 0
da3p2 ONLINE 0 0 0
errors: No known data errors
#Physically disconnect disk
#After
FreeNAS
[root@FreeNAS] /var/log# cat messages
Jun 28 23:36:51 FreeNAS kernel: mpt1: request 0xffffff80003da170:42337 timed out for ccb 0xffffff01521b3800 (req->ccb 0xffffff01521b3800)
Jun 28 23:36:51 FreeNAS kernel: mpt1: attempting to abort req 0xffffff80003da170:42337 function 0
Jun 28 23:36:51 FreeNAS kernel:
Jun 28 23:36:51 FreeNAS kernel: mpt1: completing timedout/aborted req 0xffffff80003da170:42337
Jun 28 23:36:51 FreeNAS kernel: mpt1: abort of req 0xffffff80003da170:0 completed
****Appears to repeat in log****
[root@FreeNAS] /mnt/Datapool1# zpool status
pool: Datapool1
state: ONLINE
scrub: none requested
config:
NAME STATE READ WRITE CKSUM
Datapool1 ONLINE 0 0 0
raidz1 ONLINE 0 0 0
da1p2 ONLINE 0 0 0
da2p2 ONLINE 0 0 0
da3p2 ONLINE 0 0 0
errors: No known data errors
###VMware###
2012-06-28T13:36:12.110Z cpu2:4396)NMP: nmp_ThrottleLogForDevice:2318: Cmd 0x2a (0x412400721440, 4396) to dev "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" on path "vmhba0:C0:T1:L0" Failed: H:0x0 D:0x2 P:0x0 Valid sense da$
2012-06-28T13:36:12.111Z cpu2:4396)ScsiDeviceIO: 2309: Cmd(0x412400721440) 0x2a, CmdSN 0x8002001f from world 4396 to dev "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" failed H:0x0 D:0x2 P:0x0 Valid sense data: 0xb 0x0 0x0.
2012-06-28T13:36:12.114Z cpu6:2171)WARNING: NMP: nmp_IssueCommandToDevice:2954:I/O could not be issued to device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" due to Not found
2012-06-28T13:36:12.114Z cpu6:2171)WARNING: NMP: nmp_DeviceRetryCommand:133:Device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818": awaiting fast path state update for failover with I/O blocked. No prior reservation e
2012-06-28T13:36:12.114Z cpu6:2171)WARNING: NMP: nmp_DeviceStartLoop:721:NMP Device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" is blocked. Not starting I/O from device.
2012-06-28T13:36:12.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:599:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - issuing command 0x4124007b81c0
2012-06-28T13:36:12.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:658:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - failed to issue command due to Not found (APD), try again...
2012-06-28T13:36:12.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:708:Logical device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818": awaiting fast path state update...
2012-06-28T13:36:13.891Z cpu0:2665)WARNING: NMP: nmpDeviceAttemptFailover:599:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - issuing command 0x4124007b81c0
2012-06-28T13:36:13.891Z cpu0:2665)WARNING: NMP: nmpDeviceAttemptFailover:658:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - failed to issue command due to Not found (APD), try again...
2012-06-28T13:36:13.891Z cpu0:2665)WARNING: NMP: nmpDeviceAttemptFailover:708:Logical device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818": awaiting fast path state update...
2012-06-28T13:36:43.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:599:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - issuing command 0x4124007b81c0
2012-06-28T13:36:43.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:658:Retry world failover device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - failed to issue command due to Not found (APD), try again...
2012-06-28T13:36:43.891Z cpu1:2665)WARNING: NMP: nmpDeviceAttemptFailover:708:Logical device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818": awaiting fast path state update...
2012-06-28T13:36:52.116Z cpu0:2177)ScsiDeviceIO: 2322: Cmd(0x4124007b81c0) 0x12, CmdSN 0x1362 from world 0 to dev "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" failed H:0x5 D:0x0 P:0x0 Possible sense data: 0x5 0x24 0x0.
2012-06-28T13:36:52.116Z cpu0:2177)WARNING: NMP: nmp_DeviceStartLoop:721:NMP Device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" is blocked. Not starting I/O from device.
2012-06-28T13:36:52.891Z cpu4:14420)WARNING: NMP: nmpDeviceAttemptFailover:562:Retry world restore device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" - no more commands to retry
2012-06-28T13:36:52.891Z cpu4:14420)WARNING: NMP: nmp_IssueCommandToDevice:2954:I/O could not be issued to device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818" due to Not found
2012-06-28T13:36:52.891Z cpu4:14420)WARNING: NMP: nmp_DeviceRetryCommand:133:Device "t10.ATA_____WDC_WD10EARS2D003BB1__________________________WD2DWCAV5P246818": awaiting fast path state update for failover with I/O blocked. No prior reservation
****Appears to repeat in log****
#Moving forward
What other information can I gather for trouble shooting?
Has any one else encountered this behaviour?
Has any one else performed the failure scenario successfully?
My system is in the testing phase, happy to crash and burn.
Thanks in advance.