| # Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the 'License'); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an 'AS IS' BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| """Tests for TFCONFIGClusterResolver.""" |
| |
| import os |
| |
| from tensorflow.python.client import session |
| from tensorflow.python.distribute.cluster_resolver import tfconfig_cluster_resolver |
| from tensorflow.python.eager import context |
| from tensorflow.python.framework import config |
| from tensorflow.python.framework import test_util |
| from tensorflow.python.platform import test |
| from tensorflow.python.training import server_lib |
| |
| mock = test.mock |
| |
| |
| @test_util.run_all_in_graph_and_eager_modes |
| class TFConfigClusterResolverTest(test.TestCase): |
| |
| def _verifyClusterSpecEquality(self, cluster_spec, expected_proto): |
| self.assertProtoEquals(expected_proto, cluster_spec.as_cluster_def()) |
| self.assertProtoEquals( |
| expected_proto, server_lib.ClusterSpec(cluster_spec).as_cluster_def()) |
| self.assertProtoEquals( |
| expected_proto, |
| server_lib.ClusterSpec(cluster_spec.as_cluster_def()).as_cluster_def()) |
| self.assertProtoEquals( |
| expected_proto, |
| server_lib.ClusterSpec(cluster_spec.as_dict()).as_cluster_def()) |
| |
| def testNormalClusterSpecRead(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "ps": ["ps0:2222", "ps1:2222"], |
| "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] |
| }, |
| "task": { |
| "type": "ps", |
| "index": 0 |
| } |
| } |
| """ |
| |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| expected_proto = """ |
| job { name: 'ps' tasks { key: 0 value: 'ps0:2222' } |
| tasks { key: 1 value: 'ps1:2222' } } |
| job { name: 'worker' tasks { key: 0 value: 'worker0:2222' } |
| tasks { key: 1 value: 'worker1:2222' } |
| tasks { key: 2 value: 'worker2:2222' } } |
| """ |
| actual_cluster_spec = cluster_resolver.cluster_spec() |
| self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) |
| |
| def testSparseClusterSpecRead(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "ps": ["ps0:2222", "ps1:2222"], |
| "worker": {"1": "worker1:2222"} |
| }, |
| "task": { |
| "type": "worker", |
| "index": 1 |
| } |
| } |
| """ |
| |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| expected_proto = """ |
| job { name: 'ps' tasks { key: 0 value: 'ps0:2222' } |
| tasks { key: 1 value: 'ps1:2222' } } |
| job { name: 'worker' tasks { key: 1 value: 'worker1:2222' } } |
| """ |
| actual_cluster_spec = cluster_resolver.cluster_spec() |
| self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) |
| |
| def testAutomaticMasterRead(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "ps": ["ps0:2222", "ps1:2222"], |
| "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] |
| }, |
| "task": { |
| "type": "ps", |
| "index": 0 |
| } |
| } |
| """ |
| |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| self.assertEqual('ps0:2222', cluster_resolver.master()) |
| |
| def testSpecifiedTaskTypeAndIndexMasterRead(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "ps": ["ps0:2222", "ps1:2222"], |
| "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] |
| }, |
| "task": { |
| "type": "ps", |
| "index": 0 |
| } |
| } |
| """ |
| |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| self.assertEqual('worker1:2222', cluster_resolver.master('worker', 1)) |
| |
| def testSessionMasterRead(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "ps": ["ps0:2222", "ps1:2222"], |
| "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] |
| }, |
| "session_master": "sessionmaster:2222", |
| "task": { |
| "type": "ps", |
| "index": 0 |
| } |
| } |
| """ |
| |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| self.assertEqual('sessionmaster:2222', cluster_resolver.master()) |
| |
| def testRpcLayerRead(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "ps": ["ps0:2222", "ps1:2222"], |
| "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] |
| }, |
| "rpc_layer": "grpc", |
| "task": { |
| "type": "ps", |
| "index": 0 |
| } |
| } |
| """ |
| |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| self.assertEqual('grpc://ps0:2222', cluster_resolver.master()) |
| |
| def testTaskTypeIndexRpcRead(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "ps": ["ps0:2222", "ps1:2222"], |
| "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] |
| }, |
| "rpc_layer": "grpc", |
| "task": { |
| "type": "ps", |
| "index": 0 |
| } |
| } |
| """ |
| |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| self.assertEqual('ps', cluster_resolver.task_type) |
| self.assertEqual(0, cluster_resolver.task_id) |
| self.assertEqual('grpc', cluster_resolver.rpc_layer) |
| |
| def testParameterOverrides(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "ps": ["ps0:2222", "ps1:2222"], |
| "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] |
| }, |
| "rpc_layer": "grpc", |
| "task": { |
| "type": "ps", |
| "index": 1 |
| } |
| } |
| """ |
| |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver( |
| task_type='ps', task_id=0) |
| |
| self.assertEqual('grpc://ps0:2222', cluster_resolver.master()) |
| self.assertEqual('ps', cluster_resolver.task_type) |
| self.assertEqual(0, cluster_resolver.task_id) |
| |
| cluster_resolver.task_type = 'worker' |
| cluster_resolver.task_id = 1 |
| cluster_resolver.rpc_layer = 'test' |
| |
| self.assertEqual('test://worker1:2222', cluster_resolver.master()) |
| self.assertEqual('worker', cluster_resolver.task_type) |
| self.assertEqual(1, cluster_resolver.task_id) |
| self.assertEqual('test', cluster_resolver.rpc_layer) |
| |
| def testTaskTypeCastToString(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "123456": ["ps0:2222", "ps1:2222"], |
| "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] |
| }, |
| "rpc_layer": "grpc", |
| "task": { |
| "type": 123456, |
| "index": 0 |
| } |
| } |
| """ |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| self.assertEqual('123456', cluster_resolver.task_type) |
| |
| def testTaskIndexCastToInteger(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "ps": ["ps0:2222", "ps1:2222"], |
| "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] |
| }, |
| "rpc_layer": "grpc", |
| "task": { |
| "type": "ps", |
| "index": "1" |
| } |
| } |
| """ |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| self.assertEqual(1, cluster_resolver.task_id) |
| |
| def testTaskIndexOverride(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "worker": ["worker0:2222", "worker1:2222"] |
| }, |
| "task": { |
| "type": "worker", |
| "index": "0" |
| } |
| } |
| """ |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver( |
| task_id=1) |
| self.assertEqual(1, cluster_resolver.task_id) |
| |
| def testZeroItemsInClusterSpecMasterRead(self): |
| os.environ['TF_CONFIG'] = """ |
| {} |
| """ |
| |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| self.assertEqual('', cluster_resolver.master()) |
| |
| def testOneItemInClusterSpecMasterRead(self): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "worker": ["worker0:2222"] |
| } |
| } |
| """ |
| |
| cluster_resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| self.assertEqual('', cluster_resolver.master()) |
| |
| @mock.patch.object(config, 'list_logical_devices') |
| @mock.patch.object(session.BaseSession, 'list_devices') |
| def testNumAcceleratorsFilterTasksByEnvVar(self, mock_list_devices, |
| mock_eager_list_devices): |
| os.environ['TF_CONFIG'] = """ |
| { |
| "cluster": { |
| "worker1": ["w10:2222"], |
| "worker2": ["w21:2222", "w22:2222", "w23:2222", "w24:2222"] |
| }, |
| "rpc_layer": "grpc", |
| "task": { |
| "type": "worker1", |
| "index": "0" |
| } |
| } |
| """ |
| |
| devices = [ |
| context.LogicalDevice('/job:worker1/task:0/device:TPU:0', 'TPU'), |
| context.LogicalDevice('/job:worker1/task:0/device:TPU:1', 'TPU'), |
| context.LogicalDevice('/job:worker1/task:0/device:GPU:0', 'GPU'), |
| context.LogicalDevice('/job:worker1/task:0/device:GPU:1', 'GPU'), |
| context.LogicalDevice('/job:worker2/task:1/device:TPU:2', 'TPU'), |
| context.LogicalDevice('/job:worker2/task:2/device:TPU:3', 'TPU'), |
| context.LogicalDevice('/job:worker2/task:3/device:GPU:2', 'GPU'), |
| context.LogicalDevice('/job:worker2/task:4/device:GPU:3', 'GPU'), |
| ] |
| device_list = [ |
| session._DeviceAttributes(d.name, d.device_type, 1024, 0) |
| for d in devices |
| ] |
| mock_eager_list_devices.return_value = devices |
| mock_list_devices.return_value = device_list |
| |
| resolver = tfconfig_cluster_resolver.TFConfigClusterResolver() |
| |
| # By default we read from TF_CONFIG |
| self.assertEqual(resolver.num_accelerators(), {'TPU': 2, 'GPU': 2}) |
| |
| # Override still works when we want it to |
| self.assertEqual(resolver.num_accelerators(task_type='worker2', task_id=3), |
| {'GPU': 1}) |
| |
| |
| if __name__ == '__main__': |
| test.main() |