generator_vs_dataloader.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. import time
  4. import numpy
  5. import torch
  6. from torch.utils.data import Dataset
  7. from draugr import batched_recycle
  8. from draugr.torch_utilities import to_tensor_generator
  9. __author__ = "Christian Heider Nielsen"
  10. __doc__ = r"""
  11. Created on 28/10/2019
  12. """
  13. def test_d1():
  14. channels_in = 3
  15. channels_out = 3
  16. samples = 10
  17. device = "cuda"
  18. batches = 3
  19. batch_size = 32
  20. data_shape = (batches * batch_size, channels_in, 512, 512)
  21. model = torch.nn.Sequential(
  22. torch.nn.Conv2d(channels_in, channels_out, (3, 3)),
  23. torch.nn.ReLU(),
  24. torch.nn.Conv2d(channels_out, channels_out, (3, 3)),
  25. torch.nn.ReLU(),
  26. torch.nn.Conv2d(channels_out, channels_out, (3, 3)),
  27. torch.nn.ReLU(),
  28. ).to(device)
  29. for _ in range(samples):
  30. s1 = time.time()
  31. for _, a in zip(
  32. range(batches),
  33. to_tensor_generator(
  34. batched_recycle(numpy.random.sample(data_shape), batch_size),
  35. device=device,
  36. preload_next=False,
  37. ),
  38. ):
  39. model(a)
  40. s2 = time.time()
  41. for _, a in zip(
  42. range(batches),
  43. torch.utils.data.DataLoader(
  44. numpy.random.sample(data_shape),
  45. batch_size=batch_size,
  46. shuffle=True,
  47. num_workers=1,
  48. pin_memory=False,
  49. ),
  50. ):
  51. model(a.to(device, dtype=torch.float))
  52. s3 = time.time()
  53. print(f"generator: {s2 - s1}")
  54. print(f"dataloader: {s3 - s2}")
  55. def test_d2():
  56. channels_in = 3
  57. channels_out = 3
  58. samples = 10
  59. device = "cuda"
  60. batches = 3
  61. batch_size = 32
  62. data_shape = (batches * batch_size, channels_in, 512, 512)
  63. model = torch.nn.Sequential(
  64. torch.nn.Conv2d(channels_in, channels_out, (3, 3)),
  65. torch.nn.ReLU(),
  66. torch.nn.Conv2d(channels_out, channels_out, (3, 3)),
  67. torch.nn.ReLU(),
  68. torch.nn.Conv2d(channels_out, channels_out, (3, 3)),
  69. torch.nn.ReLU(),
  70. ).to(device)
  71. generator = to_tensor_generator(
  72. batched_recycle(numpy.random.sample(data_shape), batch_size),
  73. device=device,
  74. preload_next=False,
  75. )
  76. dataloader = torch.utils.data.DataLoader(
  77. numpy.random.sample(data_shape),
  78. batch_size=batch_size,
  79. shuffle=True,
  80. num_workers=1,
  81. pin_memory=False,
  82. )
  83. for _ in range(samples):
  84. s1 = time.time()
  85. for _, a in zip(range(batches), generator):
  86. model(a)
  87. s2 = time.time()
  88. for _, a in zip(range(batches), dataloader):
  89. model(a.to(device, dtype=torch.float))
  90. s3 = time.time()
  91. print(f"generator: {s2 - s1}")
  92. print(f"dataloader: {s3 - s2}")
  93. def test_d3():
  94. channels_in = 3
  95. channels_out = 3
  96. samples = 10
  97. device = "cuda"
  98. batches = 3
  99. batch_size = 32
  100. data_shape = (batches * batch_size, channels_in, 512, 512)
  101. model = torch.nn.Sequential(
  102. torch.nn.Conv2d(channels_in, channels_out, (3, 3)),
  103. torch.nn.ReLU(),
  104. torch.nn.Conv2d(channels_out, channels_out, (3, 3)),
  105. torch.nn.ReLU(),
  106. torch.nn.Conv2d(channels_out, channels_out, (3, 3)),
  107. torch.nn.ReLU(),
  108. ).to(device)
  109. generator = to_tensor_generator(
  110. batched_recycle(numpy.random.sample(data_shape), batch_size), device=device
  111. )
  112. dataloader = torch.utils.data.DataLoader(
  113. numpy.random.sample(data_shape),
  114. batch_size=batch_size,
  115. shuffle=True,
  116. num_workers=4,
  117. pin_memory=True,
  118. )
  119. for _ in range(samples):
  120. s1 = time.time()
  121. for _, a in zip(range(batches), dataloader):
  122. model(a.to(device, dtype=torch.float))
  123. s2 = time.time()
  124. for _, a in zip(range(batches), generator):
  125. model(a)
  126. s3 = time.time()
  127. print(f"dataloader: {s2 - s1}")
  128. print(f"generator: {s3 - s2}")
  129. def test_d4():
  130. from torchvision.transforms import transforms
  131. import numpy
  132. from draugr import inner_map
  133. a_transform = transforms.Compose(
  134. [
  135. transforms.ToPILImage("RGB"),
  136. transforms.Resize(224),
  137. transforms.CenterCrop(224),
  138. transforms.RandomHorizontalFlip(),
  139. transforms.ToTensor(),
  140. ]
  141. )
  142. channels_in = 3
  143. channels_out = 3
  144. samples = 10
  145. device = "cuda"
  146. batches = 3
  147. batch_size = 32
  148. data_shape = (batches * batch_size, 256, 256, channels_in)
  149. batch_shape = torch.Size([batch_size, channels_in, 224, 224])
  150. model = torch.nn.Sequential(
  151. torch.nn.Conv2d(channels_in, channels_out, (3, 3)),
  152. torch.nn.ReLU(),
  153. torch.nn.Conv2d(channels_out, channels_out, (3, 3)),
  154. torch.nn.ReLU(),
  155. torch.nn.Conv2d(channels_out, channels_out, (3, 3)),
  156. torch.nn.ReLU(),
  157. ).to(device)
  158. class RandomDataset(Dataset):
  159. """
  160. """
  161. def __init__(self):
  162. self.d = numpy.random.sample(data_shape)
  163. def __len__(self):
  164. return len(self.d)
  165. def __getitem__(self, item):
  166. return a_transform(self.d[item])
  167. dataloader = torch.utils.data.DataLoader(
  168. RandomDataset(),
  169. batch_size=batch_size,
  170. shuffle=True,
  171. num_workers=1,
  172. pin_memory=False,
  173. )
  174. generator = to_tensor_generator(
  175. inner_map(
  176. a_transform, batched_recycle(numpy.random.sample(data_shape), batch_size)
  177. ),
  178. device=device,
  179. )
  180. for _ in range(samples):
  181. s1 = time.time()
  182. for _, a in zip(range(batches), dataloader):
  183. assert batch_shape == a.shape, a.shape
  184. model(a.to(device, dtype=torch.float))
  185. s2 = time.time()
  186. for _, a in zip(range(batches), generator):
  187. assert batch_shape == a.shape, a.shape
  188. model(a)
  189. s3 = time.time()
  190. print(f"dataloader: {s2 - s1}")
  191. print(f"generator: {s3 - s2}")
  192. def test_d5():
  193. from torchvision.transforms import transforms
  194. import numpy
  195. from draugr import inner_map
  196. a_transform = transforms.Compose(
  197. [
  198. transforms.ToPILImage("RGB"),
  199. transforms.Resize(224),
  200. transforms.CenterCrop(224),
  201. transforms.RandomHorizontalFlip(),
  202. transforms.ToTensor(),
  203. ]
  204. )
  205. channels_in = 3
  206. channels_out = 3
  207. samples = 10
  208. device = "cuda"
  209. batches = 3
  210. batch_size = 32
  211. data_shape = (batches * batch_size, 256, 256, channels_in)
  212. batch_shape = torch.Size([batch_size, channels_in, 224, 224])
  213. class RandomDataset(Dataset):
  214. """
  215. """
  216. def __init__(self):
  217. self.d = numpy.random.sample(data_shape)
  218. def __len__(self):
  219. return len(self.d)
  220. def __getitem__(self, item):
  221. return a_transform(self.d[item])
  222. dataloader = torch.utils.data.DataLoader(
  223. RandomDataset(),
  224. batch_size=batch_size,
  225. shuffle=True,
  226. num_workers=1,
  227. pin_memory=False,
  228. )
  229. generator = to_tensor_generator(
  230. inner_map(
  231. a_transform, batched_recycle(numpy.random.sample(data_shape), batch_size)
  232. ),
  233. device=device,
  234. )
  235. for _ in range(samples):
  236. s1 = time.time()
  237. for _, a in zip(range(batches), generator):
  238. assert batch_shape == a.shape, a.shape
  239. s2 = time.time()
  240. for _, a in zip(range(batches), dataloader):
  241. assert batch_shape == a.shape, a.shape
  242. s3 = time.time()
  243. print(f"generator: {s2 - s1}")
  244. print(f"dataloader: {s3 - s2}")
  245. def test_d6():
  246. from torchvision.transforms import transforms
  247. import numpy
  248. from draugr import inner_map
  249. a_transform = transforms.Compose(
  250. [
  251. transforms.ToPILImage("RGB"),
  252. transforms.Resize(224),
  253. transforms.CenterCrop(224),
  254. transforms.RandomHorizontalFlip(),
  255. transforms.ToTensor(),
  256. ]
  257. )
  258. channels_in = 3
  259. channels_out = 3
  260. samples = 10
  261. device = "cuda"
  262. batches = 3
  263. batch_size = 32
  264. data_shape = (batches * batch_size, 256, 256, channels_in)
  265. batch_shape = torch.Size([batch_size, channels_in, 224, 224])
  266. class RandomDataset(Dataset):
  267. """
  268. """
  269. def __init__(self):
  270. self.d = numpy.random.sample(data_shape)
  271. def __len__(self):
  272. return len(self.d)
  273. def __getitem__(self, item):
  274. return a_transform(self.d[item])
  275. dataloader = torch.utils.data.DataLoader(
  276. RandomDataset(),
  277. batch_size=batch_size,
  278. shuffle=True,
  279. num_workers=1,
  280. pin_memory=True,
  281. )
  282. generator = to_tensor_generator(
  283. inner_map(
  284. a_transform, batched_recycle(numpy.random.sample(data_shape), batch_size)
  285. ),
  286. device=device,
  287. preload_next=True,
  288. )
  289. for _ in range(samples):
  290. s1 = time.time()
  291. for _, a in zip(range(batches), generator):
  292. assert batch_shape == a.shape, a.shape
  293. s2 = time.time()
  294. for _, a in zip(range(batches), dataloader):
  295. assert batch_shape == a.shape, a.shape
  296. s3 = time.time()
  297. print(f"generator: {s2 - s1}")
  298. print(f"dataloader: {s3 - s2}")
  299. def test_d7():
  300. import numpy
  301. channels_in = 3
  302. samples = 10
  303. device = "cuda"
  304. batches = 3
  305. batch_size = 32
  306. data_shape = (batches * batch_size, 256, 256, channels_in)
  307. batch_shape = torch.Size([batch_size, 256, 256, channels_in])
  308. dtype = torch.float
  309. class RandomDataset(Dataset):
  310. """
  311. """
  312. def __init__(self):
  313. self.d = numpy.random.sample(data_shape)
  314. def __len__(self):
  315. return len(self.d)
  316. def __getitem__(self, item):
  317. return self.d[item]
  318. dataloader = torch.utils.data.DataLoader(
  319. RandomDataset(),
  320. batch_size=batch_size,
  321. shuffle=True,
  322. num_workers=1,
  323. pin_memory=True,
  324. )
  325. generator = to_tensor_generator(
  326. batched_recycle(numpy.random.sample(data_shape), batch_size),
  327. device=device,
  328. preload_next=True,
  329. dtype=dtype,
  330. )
  331. for _ in range(samples):
  332. s1 = time.time()
  333. for _, a in zip(range(batches), generator):
  334. assert batch_shape == a.shape, a.shape
  335. s2 = time.time()
  336. for _, a in zip(range(batches), dataloader):
  337. a = a.to(device, dtype=dtype)
  338. assert batch_shape == a.shape, a.shape
  339. s3 = time.time()
  340. print(f"generator: {s2 - s1}")
  341. print(f"dataloader: {s3 - s2}")
  342. if __name__ == "__main__":
  343. test_d7()