InternLM · lvhan028 · Jun 24, 2024 · May 15, 2024 · May 16, 2024 · May 16, 2024
diff --git a/lmdeploy/pytorch/engine/model_agent.py b/lmdeploy/pytorch/engine/model_agent.py
@@ -79,6 +79,21 @@ def __get_free_gpu_mem_size(cache_block_size: int):
                      f' {runtime_cache_size>>20} mb')
         return gpu_mem_physical_free * cache_config.cache_max_entry_count
 
+    def __adjust_block_size():
+        """adjust block_size."""
+        # TODO: support kernel with both large head dim and large block size.
+        if model_config.k_head_dim >= 512 and cache_config.block_size > 32:
+            cache_config.block_size = 32
+            rank = 0
+            if dist.is_initialized():
+                rank = dist.get_rank()
+            if rank == 0:
+                logger.warning(
+                    f'Update `block_size={cache_config.block_size}`'
+                    f' for large `head_dim={model_config.k_head_dim}`.')
+
+    __adjust_block_size()
+
     cache_block_size = CacheEngine.get_cache_block_size(
         cache_config.block_size, model_config, world_size)
     gpu_mem = __get_free_gpu_mem_size(cache_block_size)