Spaces:

Ravi21
/

smart12

Runtime error

App Files Files Community

Ravi21 commited on Jun 17, 2024

Commit

e832084

verified ·

1 Parent(s): 6e858ba

Upload 7 files

Browse files

Files changed (7) hide show

models/__pycache__/afwm.cpython-310.pyc +0 -0
models/__pycache__/networks.cpython-310.pyc +0 -0
models/afwm.py +198 -0
models/correlation/README.md +1 -0
models/correlation/__pycache__/correlation.cpython-310.pyc +0 -0
models/correlation/correlation.py +405 -0
models/networks.py +186 -0

models/__pycache__/afwm.cpython-310.pyc ADDED Viewed

Binary file (6.52 kB). View file

models/__pycache__/networks.cpython-310.pyc ADDED Viewed

Binary file (4.98 kB). View file

models/afwm.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .correlation import correlation
+def apply_offset(offset):
+    sizes = list(offset.size()[2:])
+    grid_list = torch.meshgrid([torch.arange(size, device=offset.device) for size in sizes])
+    grid_list = reversed(grid_list)
+    grid_list = [grid.float().unsqueeze(0) + offset[:, dim, ...]
+        for dim, grid in enumerate(grid_list)]
+    grid_list = [grid / ((size - 1.0) / 2.0) - 1.0
+        for grid, size in zip(grid_list, reversed(sizes))]
+    return torch.stack(grid_list, dim=-1)
+class ResBlock(nn.Module):
+    def __init__(self, in_channels):
+        super(ResBlock, self).__init__()
+        self.block = nn.Sequential(
+            nn.BatchNorm2d(in_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(in_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels, in_channels, kernel_size=3, padding=1, bias=False)
+            )
+    def forward(self, x):
+        return self.block(x) + x
+class DownSample(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(DownSample, self).__init__()
+        self.block=  nn.Sequential(
+            nn.BatchNorm2d(in_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1, bias=False)
+            )
+    def forward(self, x):
+        return self.block(x)
+class FeatureEncoder(nn.Module):
+    def __init__(self, in_channels, chns=[64,128,256,256,256]):
+        super(FeatureEncoder, self).__init__()
+        self.encoders = []
+        for i, out_chns in enumerate(chns):
+            if i == 0:
+                encoder = nn.Sequential(DownSample(in_channels, out_chns),
+                                        ResBlock(out_chns),
+                                        ResBlock(out_chns))
+            else:
+                encoder = nn.Sequential(DownSample(chns[i-1], out_chns),
+                                         ResBlock(out_chns),
+                                         ResBlock(out_chns))
+            self.encoders.append(encoder)
+        self.encoders = nn.ModuleList(self.encoders)
+    def forward(self, x):
+        encoder_features = []
+        for encoder in self.encoders:
+            x = encoder(x)
+            encoder_features.append(x)
+        return encoder_features
+class RefinePyramid(nn.Module):
+    def __init__(self, chns=[64,128,256,256,256], fpn_dim=256):
+        super(RefinePyramid, self).__init__()
+        self.chns = chns
+        self.adaptive = []
+        for in_chns in list(reversed(chns)):
+            adaptive_layer = nn.Conv2d(in_chns, fpn_dim, kernel_size=1)
+            self.adaptive.append(adaptive_layer)
+        self.adaptive = nn.ModuleList(self.adaptive)
+        self.smooth = []
+        for i in range(len(chns)):
+            smooth_layer = nn.Conv2d(fpn_dim, fpn_dim, kernel_size=3, padding=1)
+            self.smooth.append(smooth_layer)
+        self.smooth = nn.ModuleList(self.smooth)
+    def forward(self, x):
+        conv_ftr_list = x
+        feature_list = []
+        last_feature = None
+        for i, conv_ftr in enumerate(list(reversed(conv_ftr_list))):
+            feature = self.adaptive[i](conv_ftr)
+            if last_feature is not None:
+                feature = feature + F.interpolate(last_feature, scale_factor=2, mode='nearest')
+            feature = self.smooth[i](feature)
+            last_feature = feature
+            feature_list.append(feature)
+        return tuple(reversed(feature_list))
+class AFlowNet(nn.Module):
+    def __init__(self, num_pyramid, fpn_dim=256):
+        super(AFlowNet, self).__init__()
+        self.netMain = []
+        self.netRefine = []
+        for i in range(num_pyramid):
+            netMain_layer = torch.nn.Sequential(
+                torch.nn.Conv2d(in_channels=49, out_channels=128, kernel_size=3, stride=1, padding=1),
+                torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
+                torch.nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1),
+                torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
+                torch.nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1),
+                torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
+                torch.nn.Conv2d(in_channels=32, out_channels=2, kernel_size=3, stride=1, padding=1)
+            )
+            netRefine_layer = torch.nn.Sequential(
+                torch.nn.Conv2d(2 * fpn_dim, out_channels=128, kernel_size=3, stride=1, padding=1),
+                torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
+                torch.nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1),
+                torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
+                torch.nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1),
+                torch.nn.LeakyReLU(inplace=False, negative_slope=0.1),
+                torch.nn.Conv2d(in_channels=32, out_channels=2, kernel_size=3, stride=1, padding=1)
+            )
+            self.netMain.append(netMain_layer)
+            self.netRefine.append(netRefine_layer)
+        self.netMain = nn.ModuleList(self.netMain)
+        self.netRefine = nn.ModuleList(self.netRefine)
+    def forward(self, x, x_warps, x_conds, warp_feature=True):
+        last_flow = None
+        for i in range(len(x_warps)):
+          x_warp = x_warps[len(x_warps) - 1 - i]
+          x_cond = x_conds[len(x_warps) - 1 - i]
+          if last_flow is not None and warp_feature:
+              x_warp_after = F.grid_sample(x_warp, last_flow.detach().permute(0, 2, 3, 1),
+                   mode='bilinear', padding_mode='border')
+          else:
+              x_warp_after = x_warp
+          tenCorrelation = F.leaky_relu(input=correlation.FunctionCorrelation(tenFirst=x_warp_after, tenSecond=x_cond, intStride=1), negative_slope=0.1, inplace=False)
+          flow = self.netMain[i](tenCorrelation)
+          flow = apply_offset(flow)
+          if last_flow is not None:
+              flow = F.grid_sample(last_flow, flow, mode='bilinear', padding_mode='border')
+          else:
+              flow = flow.permute(0, 3, 1, 2)
+          last_flow = flow
+          x_warp = F.grid_sample(x_warp, flow.permute(0, 2, 3, 1),mode='bilinear', padding_mode='border')
+          concat = torch.cat([x_warp,x_cond],1)
+          flow = self.netRefine[i](concat)
+          flow = apply_offset(flow)
+          flow = F.grid_sample(last_flow, flow, mode='bilinear', padding_mode='border')
+          last_flow = F.interpolate(flow, scale_factor=2, mode='bilinear')
+        x_warp = F.grid_sample(x, last_flow.permute(0, 2, 3, 1),
+                     mode='bilinear', padding_mode='border')
+        return x_warp, last_flow,
+class AFWM(nn.Module):
+    def __init__(self, opt, input_nc):
+        super(AFWM, self).__init__()
+        num_filters = [64,128,256,256,256]
+        self.image_features = FeatureEncoder(3, num_filters)
+        self.cond_features = FeatureEncoder(input_nc, num_filters)
+        self.image_FPN = RefinePyramid(num_filters)
+        self.cond_FPN = RefinePyramid(num_filters)
+        self.aflow_net = AFlowNet(len(num_filters))
+    def forward(self, cond_input, image_input):
+        cond_pyramids = self.cond_FPN(self.cond_features(cond_input)) # maybe use nn.Sequential
+        image_pyramids = self.image_FPN(self.image_features(image_input))
+        x_warp, last_flow  = self.aflow_net(image_input, image_pyramids, cond_pyramids)
+        return x_warp, last_flow

models/correlation/README.md ADDED Viewed

	@@ -0,0 +1 @@

+ This is an adaptation of the <a href="https://github.com/lmb-freiburg/flownet2">FlowNet2 implementation</a> in order to compute cost volumes. Should you be making use of this work, please make sure to adhere to the <a href="https://github.com/lmb-freiburg/flownet2#license-and-citation">licensing terms</a> of the original authors. Should you be making use or modify this particular implementation, please acknowledge it appropriately.

models/correlation/__pycache__/correlation.cpython-310.pyc ADDED Viewed

Binary file (13.7 kB). View file

models/correlation/correlation.py ADDED Viewed

	@@ -0,0 +1,405 @@

+#!/usr/bin/env python
+import torch
+import cupy
+import math
+import re
+kernel_Correlation_rearrange = '''
+	extern "C" __global__ void kernel_Correlation_rearrange(
+		const int n,
+		const float* input,
+		float* output
+	) {
+	  int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x;
+	  if (intIndex >= n) {
+	    return;
+	  }
+	  int intSample = blockIdx.z;
+	  int intChannel = blockIdx.y;
+	  float fltValue = input[(((intSample * SIZE_1(input)) + intChannel) * SIZE_2(input) * SIZE_3(input)) + intIndex];
+	  __syncthreads();
+	  int intPaddedY = (intIndex / SIZE_3(input)) + 3*{{intStride}};
+	  int intPaddedX = (intIndex % SIZE_3(input)) + 3*{{intStride}};
+	  int intRearrange = ((SIZE_3(input) + 6*{{intStride}}) * intPaddedY) + intPaddedX;
+	  output[(((intSample * SIZE_1(output) * SIZE_2(output)) + intRearrange) * SIZE_1(input)) + intChannel] = fltValue;
+	}
+'''
+kernel_Correlation_updateOutput = '''
+	extern "C" __global__ void kernel_Correlation_updateOutput(
+	  const int n,
+	  const float* rbot0,
+	  const float* rbot1,
+	  float* top
+	) {
+	  extern __shared__ char patch_data_char[];
+	  float *patch_data = (float *)patch_data_char;
+	  // First (upper left) position of kernel upper-left corner in current center position of neighborhood in image 1
+	  int x1 = (blockIdx.x + 3) * {{intStride}};
+	  int y1 = (blockIdx.y + 3) * {{intStride}};
+	  int item = blockIdx.z;
+	  int ch_off = threadIdx.x;
+	  // Load 3D patch into shared shared memory
+	  for (int j = 0; j < 1; j++) { // HEIGHT
+	    for (int i = 0; i < 1; i++) { // WIDTH
+	      int ji_off = (j + i) * SIZE_3(rbot0);
+	      for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS
+	        int idx1 = ((item * SIZE_1(rbot0) + y1+j) * SIZE_2(rbot0) + x1+i) * SIZE_3(rbot0) + ch;
+	        int idxPatchData = ji_off + ch;
+	        patch_data[idxPatchData] = rbot0[idx1];
+	      }
+	    }
+	  }
+	  __syncthreads();
+	  __shared__ float sum[32];
+	  // Compute correlation
+	  for (int top_channel = 0; top_channel < SIZE_1(top); top_channel++) {
+	    sum[ch_off] = 0;
+	    int s2o = (top_channel % 7 - 3) * {{intStride}};
+	    int s2p = (top_channel / 7 - 3) * {{intStride}};
+	    for (int j = 0; j < 1; j++) { // HEIGHT
+	      for (int i = 0; i < 1; i++) { // WIDTH
+	        int ji_off = (j + i) * SIZE_3(rbot0);
+	        for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS
+	          int x2 = x1 + s2o;
+	          int y2 = y1 + s2p;
+	          int idxPatchData = ji_off + ch;
+	          int idx2 = ((item * SIZE_1(rbot0) + y2+j) * SIZE_2(rbot0) + x2+i) * SIZE_3(rbot0) + ch;
+	          sum[ch_off] += patch_data[idxPatchData] * rbot1[idx2];
+	        }
+	      }
+	    }
+	    __syncthreads();
+	    if (ch_off == 0) {
+	      float total_sum = 0;
+	      for (int idx = 0; idx < 32; idx++) {
+	        total_sum += sum[idx];
+	      }
+	      const int sumelems = SIZE_3(rbot0);
+	      const int index = ((top_channel*SIZE_2(top) + blockIdx.y)*SIZE_3(top))+blockIdx.x;
+	      top[index + item*SIZE_1(top)*SIZE_2(top)*SIZE_3(top)] = total_sum / (float)sumelems;
+	    }
+	  }
+	}
+'''
+kernel_Correlation_updateGradFirst = '''
+	#define ROUND_OFF 50000
+	extern "C" __global__ void kernel_Correlation_updateGradFirst(
+	  const int n,
+	  const int intSample,
+	  const float* rbot0,
+	  const float* rbot1,
+	  const float* gradOutput,
+	  float* gradFirst,
+	  float* gradSecond
+	) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
+	  int n = intIndex % SIZE_1(gradFirst); // channels
+	  int l = (intIndex / SIZE_1(gradFirst)) % SIZE_3(gradFirst) + 3*{{intStride}}; // w-pos
+	  int m = (intIndex / SIZE_1(gradFirst) / SIZE_3(gradFirst)) % SIZE_2(gradFirst) + 3*{{intStride}}; // h-pos
+	  // round_off is a trick to enable integer division with ceil, even for negative numbers
+	  // We use a large offset, for the inner part not to become negative.
+	  const int round_off = ROUND_OFF;
+	  const int round_off_s1 = {{intStride}} * round_off;
+	  // We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:
+	  int xmin = (l - 3*{{intStride}} + round_off_s1 - 1) / {{intStride}} + 1 - round_off; // ceil (l - 3*{{intStride}}) / {{intStride}}
+	  int ymin = (m - 3*{{intStride}} + round_off_s1 - 1) / {{intStride}} + 1 - round_off; // ceil (l - 3*{{intStride}}) / {{intStride}}
+	  // Same here:
+	  int xmax = (l - 3*{{intStride}} + round_off_s1) / {{intStride}} - round_off; // floor (l - 3*{{intStride}}) / {{intStride}}
+	  int ymax = (m - 3*{{intStride}} + round_off_s1) / {{intStride}} - round_off; // floor (m - 3*{{intStride}}) / {{intStride}}
+	  float sum = 0;
+	  if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {
+	    xmin = max(0,xmin);
+	    xmax = min(SIZE_3(gradOutput)-1,xmax);
+	    ymin = max(0,ymin);
+	    ymax = min(SIZE_2(gradOutput)-1,ymax);
+	    for (int p = -3; p <= 3; p++) {
+	      for (int o = -3; o <= 3; o++) {
+	        // Get rbot1 data:
+	        int s2o = {{intStride}} * o;
+	        int s2p = {{intStride}} * p;
+	        int idxbot1 = ((intSample * SIZE_1(rbot0) + (m+s2p)) * SIZE_2(rbot0) + (l+s2o)) * SIZE_3(rbot0) + n;
+	        float bot1tmp = rbot1[idxbot1]; // rbot1[l+s2o,m+s2p,n]
+	        // Index offset for gradOutput in following loops:
+	        int op = (p+3) * 7 + (o+3); // index[o,p]
+	        int idxopoffset = (intSample * SIZE_1(gradOutput) + op);
+	        for (int y = ymin; y <= ymax; y++) {
+	          for (int x = xmin; x <= xmax; x++) {
+	            int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]
+	            sum += gradOutput[idxgradOutput] * bot1tmp;
+	          }
+	        }
+	      }
+	    }
+	  }
+	  const int sumelems = SIZE_1(gradFirst);
+	  const int bot0index = ((n * SIZE_2(gradFirst)) + (m-3*{{intStride}})) * SIZE_3(gradFirst) + (l-3*{{intStride}});
+	  gradFirst[bot0index + intSample*SIZE_1(gradFirst)*SIZE_2(gradFirst)*SIZE_3(gradFirst)] = sum / (float)sumelems;
+	} }
+'''
+kernel_Correlation_updateGradSecond = '''
+	#define ROUND_OFF 50000
+	extern "C" __global__ void kernel_Correlation_updateGradSecond(
+	  const int n,
+	  const int intSample,
+	  const float* rbot0,
+	  const float* rbot1,
+	  const float* gradOutput,
+	  float* gradFirst,
+	  float* gradSecond
+	) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
+	  int n = intIndex % SIZE_1(gradSecond); // channels
+	  int l = (intIndex / SIZE_1(gradSecond)) % SIZE_3(gradSecond) + 3*{{intStride}}; // w-pos
+	  int m = (intIndex / SIZE_1(gradSecond) / SIZE_3(gradSecond)) % SIZE_2(gradSecond) + 3*{{intStride}}; // h-pos
+	  // round_off is a trick to enable integer division with ceil, even for negative numbers
+	  // We use a large offset, for the inner part not to become negative.
+	  const int round_off = ROUND_OFF;
+	  const int round_off_s1 = {{intStride}} * round_off;
+	  float sum = 0;
+	  for (int p = -3; p <= 3; p++) {
+	    for (int o = -3; o <= 3; o++) {
+	      int s2o = {{intStride}} * o;
+	      int s2p = {{intStride}} * p;
+	      //Get X,Y ranges and clamp
+	      // We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:
+	      int xmin = (l - 3*{{intStride}} - s2o + round_off_s1 - 1) / {{intStride}} + 1 - round_off; // ceil (l - 3*{{intStride}} - s2o) / {{intStride}}
+	      int ymin = (m - 3*{{intStride}} - s2p + round_off_s1 - 1) / {{intStride}} + 1 - round_off; // ceil (l - 3*{{intStride}} - s2o) / {{intStride}}
+	      // Same here:
+	      int xmax = (l - 3*{{intStride}} - s2o + round_off_s1) / {{intStride}} - round_off; // floor (l - 3*{{intStride}} - s2o) / {{intStride}}
+	      int ymax = (m - 3*{{intStride}} - s2p + round_off_s1) / {{intStride}} - round_off; // floor (m - 3*{{intStride}} - s2p) / {{intStride}}
+	      if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {
+	        xmin = max(0,xmin);
+	        xmax = min(SIZE_3(gradOutput)-1,xmax);
+	        ymin = max(0,ymin);
+	        ymax = min(SIZE_2(gradOutput)-1,ymax);
+	        // Get rbot0 data:
+	        int idxbot0 = ((intSample * SIZE_1(rbot0) + (m-s2p)) * SIZE_2(rbot0) + (l-s2o)) * SIZE_3(rbot0) + n;
+	        float bot0tmp = rbot0[idxbot0]; // rbot1[l+s2o,m+s2p,n]
+	        // Index offset for gradOutput in following loops:
+	        int op = (p+3) * 7 + (o+3); // index[o,p]
+	        int idxopoffset = (intSample * SIZE_1(gradOutput) + op);
+	        for (int y = ymin; y <= ymax; y++) {
+	          for (int x = xmin; x <= xmax; x++) {
+	            int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]
+	            sum += gradOutput[idxgradOutput] * bot0tmp;
+	          }
+	        }
+	      }
+	    }
+	  }
+	  const int sumelems = SIZE_1(gradSecond);
+	  const int bot1index = ((n * SIZE_2(gradSecond)) + (m-3*{{intStride}})) * SIZE_3(gradSecond) + (l-3*{{intStride}});
+	  gradSecond[bot1index + intSample*SIZE_1(gradSecond)*SIZE_2(gradSecond)*SIZE_3(gradSecond)] = sum / (float)sumelems;
+	} }
+'''
+def cupy_kernel(strFunction, objVariables):
+	strKernel = globals()[strFunction].replace('{{intStride}}', str(objVariables['intStride']))
+	while True:
+		objMatch = re.search('(SIZE_)([0-4])(\()([^\)]*)(\))', strKernel)
+		if objMatch is None:
+			break
+		# end
+		intArg = int(objMatch.group(2))
+		strTensor = objMatch.group(4)
+		intSizes = objVariables[strTensor].size()
+		strKernel = strKernel.replace(objMatch.group(), str(intSizes[intArg]))
+	# end
+	while True:
+		objMatch = re.search('(VALUE_)([0-4])(\()([^\)]+)(\))', strKernel)
+		if objMatch is None:
+			break
+		# end
+		intArgs = int(objMatch.group(2))
+		strArgs = objMatch.group(4).split(',')
+		strTensor = strArgs[0]
+		intStrides = objVariables[strTensor].stride()
+		strIndex = [ '((' + strArgs[intArg + 1].replace('{', '(').replace('}', ')').strip() + ')*' + str(intStrides[intArg]) + ')' for intArg in range(intArgs) ]
+		strKernel = strKernel.replace(objMatch.group(0), strTensor + '[' + str.join('+', strIndex) + ']')
+	# end
+	return strKernel
+# end
+@cupy.util.memoize(for_each_device=True)
+def cupy_launch(strFunction, strKernel):
+	return cupy.cuda.compile_with_cache(strKernel).get_function(strFunction)
+# end
+class _FunctionCorrelation(torch.autograd.Function):
+	@staticmethod
+	def forward(self, first, second, intStride):
+		rbot0 = first.new_zeros([ first.shape[0], first.shape[2] + (6 * intStride), first.shape[3] + (6 * intStride), first.shape[1] ])
+		rbot1 = first.new_zeros([ first.shape[0], first.shape[2] + (6 * intStride), first.shape[3] + (6 * intStride), first.shape[1] ])
+		self.save_for_backward(first, second, rbot0, rbot1)
+		self.intStride = intStride
+		assert(first.is_contiguous() == True)
+		assert(second.is_contiguous() == True)
+		output = first.new_zeros([ first.shape[0], 49, int(math.ceil(first.shape[2] / intStride)), int(math.ceil(first.shape[3] / intStride)) ])
+		if first.is_cuda == True:
+			n = first.shape[2] * first.shape[3]
+			cupy_launch('kernel_Correlation_rearrange', cupy_kernel('kernel_Correlation_rearrange', {
+				'intStride': self.intStride,
+				'input': first,
+				'output': rbot0
+			}))(
+				grid=tuple([ int((n + 16 - 1) / 16), first.shape[1], first.shape[0] ]),
+				block=tuple([ 16, 1, 1 ]),
+				args=[ n, first.data_ptr(), rbot0.data_ptr() ]
+			)
+			n = second.shape[2] * second.shape[3]
+			cupy_launch('kernel_Correlation_rearrange', cupy_kernel('kernel_Correlation_rearrange', {
+				'intStride': self.intStride,
+				'input': second,
+				'output': rbot1
+			}))(
+				grid=tuple([ int((n + 16 - 1) / 16), second.shape[1], second.shape[0] ]),
+				block=tuple([ 16, 1, 1 ]),
+				args=[ n, second.data_ptr(), rbot1.data_ptr() ]
+			)
+			n = output.shape[1] * output.shape[2] * output.shape[3]
+			cupy_launch('kernel_Correlation_updateOutput', cupy_kernel('kernel_Correlation_updateOutput', {
+				'intStride': self.intStride,
+				'rbot0': rbot0,
+				'rbot1': rbot1,
+				'top': output
+			}))(
+				grid=tuple([ output.shape[3], output.shape[2], output.shape[0] ]),
+				block=tuple([ 32, 1, 1 ]),
+				shared_mem=first.shape[1] * 4,
+				args=[ n, rbot0.data_ptr(), rbot1.data_ptr(), output.data_ptr() ]
+			)
+		elif first.is_cuda == False:
+			raise NotImplementedError()
+		# end
+		return output
+	# end
+	@staticmethod
+	def backward(self, gradOutput):
+		first, second, rbot0, rbot1 = self.saved_tensors
+		assert(gradOutput.is_contiguous() == True)
+		gradFirst = first.new_zeros([ first.shape[0], first.shape[1], first.shape[2], first.shape[3] ]) if self.needs_input_grad[0] == True else None
+		gradSecond = first.new_zeros([ first.shape[0], first.shape[1], first.shape[2], first.shape[3] ]) if self.needs_input_grad[1] == True else None
+		if first.is_cuda == True:#
+			if gradFirst is not None:
+				for intSample in range(first.shape[0]):
+					n = first.shape[1] * first.shape[2] * first.shape[3]
+					cupy_launch('kernel_Correlation_updateGradFirst', cupy_kernel('kernel_Correlation_updateGradFirst', {
+						'intStride': self.intStride,
+						'rbot0': rbot0,
+						'rbot1': rbot1,
+						'gradOutput': gradOutput,
+						'gradFirst': gradFirst,
+						'gradSecond': None
+					}))(
+						grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
+						block=tuple([ 512, 1, 1 ]),
+						args=[ n, intSample, rbot0.data_ptr(), rbot1.data_ptr(), gradOutput.data_ptr(), gradFirst.data_ptr(), None ]
+					)
+				# end
+			# end
+			if gradSecond is not None:
+				for intSample in range(first.shape[0]):
+					n = first.shape[1] * first.shape[2] * first.shape[3]
+					cupy_launch('kernel_Correlation_updateGradSecond', cupy_kernel('kernel_Correlation_updateGradSecond', {
+						'intStride': self.intStride,
+						'rbot0': rbot0,
+						'rbot1': rbot1,
+						'gradOutput': gradOutput,
+						'gradFirst': None,
+						'gradSecond': gradSecond
+					}))(
+						grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
+						block=tuple([ 512, 1, 1 ]),
+						args=[ n, intSample, rbot0.data_ptr(), rbot1.data_ptr(), gradOutput.data_ptr(), None, gradSecond.data_ptr() ]
+					)
+				# end
+			# end
+		elif first.is_cuda == False:
+			raise NotImplementedError()
+		# end
+		return gradFirst, gradSecond, None
+	# end
+# end
+def FunctionCorrelation(tenFirst, tenSecond, intStride):
+	return _FunctionCorrelation.apply(tenFirst, tenSecond, intStride)
+# end
+class ModuleCorrelation(torch.nn.Module):
+	def __init__(self):
+		super(ModuleCorrelation, self).__init__()
+	# end
+	def forward(self, tenFirst, tenSecond, intStride):
+		return _FunctionCorrelation.apply(tenFirst, tenSecond, intStride)
+	# end
+# end

models/networks.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import os
+class UnetSkipConnectionBlock(nn.Module):
+    def __init__(self, outer_nc, inner_nc, input_nc=None,
+                 submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        super(UnetSkipConnectionBlock, self).__init__()
+        self.outermost = outermost
+        use_bias = norm_layer == nn.InstanceNorm2d
+        if input_nc is None:
+            input_nc = outer_nc
+        downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=4,
+                             stride=2, padding=1, bias=use_bias)
+        downrelu = nn.LeakyReLU(0.2, True)
+        uprelu = nn.ReLU(True)
+        if norm_layer != None:
+            downnorm = norm_layer(inner_nc)
+            upnorm = norm_layer(outer_nc)
+        if outermost:
+            upsample = nn.Upsample(scale_factor=2, mode='bilinear')
+            upconv = nn.Conv2d(inner_nc * 2, outer_nc, kernel_size=3, stride=1, padding=1, bias=use_bias)
+            down = [downconv]
+            up = [uprelu, upsample, upconv]
+            model = down + [submodule] + up
+        elif innermost:
+            upsample = nn.Upsample(scale_factor=2, mode='bilinear')
+            upconv = nn.Conv2d(inner_nc, outer_nc, kernel_size=3, stride=1, padding=1, bias=use_bias)
+            down = [downrelu, downconv]
+            if norm_layer == None:
+                up = [uprelu, upsample, upconv]
+            else:
+                up = [uprelu, upsample, upconv, upnorm]
+            model = down + up
+        else:
+            upsample = nn.Upsample(scale_factor=2, mode='bilinear')
+            upconv = nn.Conv2d(inner_nc*2, outer_nc, kernel_size=3, stride=1, padding=1, bias=use_bias)
+            if norm_layer == None:
+                down = [downrelu, downconv]
+                up = [uprelu, upsample, upconv]
+            else:
+                down = [downrelu, downconv, downnorm]
+                up = [uprelu, upsample, upconv, upnorm]
+            if use_dropout:
+                model = down + [submodule] + up + [nn.Dropout(0.5)]
+            else:
+                model = down + [submodule] + up
+        self.model = nn.Sequential(*model)
+    def forward(self, x):
+        if self.outermost:
+            return self.model(x)
+        else:
+            return torch.cat([x, self.model(x)], 1)
+class ResidualBlock(nn.Module):
+    def __init__(self, in_features=64, norm_layer=nn.BatchNorm2d):
+        super(ResidualBlock, self).__init__()
+        self.relu = nn.ReLU(True)
+        if norm_layer == None:
+            self.block = nn.Sequential(
+                nn.Conv2d(in_features, in_features, 3, 1, 1, bias=False),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(in_features, in_features, 3, 1, 1, bias=False),
+            )
+        else:
+            self.block = nn.Sequential(
+                nn.Conv2d(in_features, in_features, 3, 1, 1, bias=False),
+                norm_layer(in_features),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(in_features, in_features, 3, 1, 1, bias=False),
+                norm_layer(in_features)
+            )
+    def forward(self, x):
+        residual = x
+        out = self.block(x)
+        out += residual
+        out = self.relu(out)
+        return out
+class ResUnetGenerator(nn.Module):
+    def __init__(self, input_nc, output_nc, num_downs, ngf=64,
+                 norm_layer=nn.BatchNorm2d, use_dropout=False):
+        super(ResUnetGenerator, self).__init__()
+        unet_block = ResUnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=None, norm_layer=norm_layer, innermost=True)
+        for i in range(num_downs - 5):
+            unet_block = ResUnetSkipConnectionBlock(ngf * 8, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer, use_dropout=use_dropout)
+        unet_block = ResUnetSkipConnectionBlock(ngf * 4, ngf * 8, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = ResUnetSkipConnectionBlock(ngf * 2, ngf * 4, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = ResUnetSkipConnectionBlock(ngf, ngf * 2, input_nc=None, submodule=unet_block, norm_layer=norm_layer)
+        unet_block = ResUnetSkipConnectionBlock(output_nc, ngf, input_nc=input_nc, submodule=unet_block, outermost=True, norm_layer=norm_layer)
+        self.model = unet_block
+    def forward(self, input):
+        return self.model(input)
+class ResUnetSkipConnectionBlock(nn.Module):
+    def __init__(self, outer_nc, inner_nc, input_nc=None,
+                 submodule=None, outermost=False, innermost=False, norm_layer=nn.BatchNorm2d, use_dropout=False):
+        super(ResUnetSkipConnectionBlock, self).__init__()
+        self.outermost = outermost
+        use_bias = norm_layer == nn.InstanceNorm2d
+        if input_nc is None:
+            input_nc = outer_nc
+        downconv = nn.Conv2d(input_nc, inner_nc, kernel_size=3,
+                             stride=2, padding=1, bias=use_bias)
+        res_downconv = [ResidualBlock(inner_nc, norm_layer), ResidualBlock(inner_nc, norm_layer)]
+        res_upconv = [ResidualBlock(outer_nc, norm_layer), ResidualBlock(outer_nc, norm_layer)]
+        downrelu = nn.ReLU(True)
+        uprelu = nn.ReLU(True)
+        if norm_layer != None:
+            downnorm = norm_layer(inner_nc)
+            upnorm = norm_layer(outer_nc)
+        if outermost:
+            upsample = nn.Upsample(scale_factor=2, mode='nearest')
+            upconv = nn.Conv2d(inner_nc * 2, outer_nc, kernel_size=3, stride=1, padding=1, bias=use_bias)
+            down = [downconv, downrelu] + res_downconv
+            up = [upsample, upconv]
+            model = down + [submodule] + up
+        elif innermost:
+            upsample = nn.Upsample(scale_factor=2, mode='nearest')
+            upconv = nn.Conv2d(inner_nc, outer_nc, kernel_size=3, stride=1, padding=1, bias=use_bias)
+            down = [downconv, downrelu] + res_downconv
+            if norm_layer == None:
+                up = [upsample, upconv, uprelu] + res_upconv
+            else:
+                up = [upsample, upconv, upnorm, uprelu] + res_upconv
+            model = down + up
+        else:
+            upsample = nn.Upsample(scale_factor=2, mode='nearest')
+            upconv = nn.Conv2d(inner_nc*2, outer_nc, kernel_size=3, stride=1, padding=1, bias=use_bias)
+            if norm_layer == None:
+                down = [downconv, downrelu] + res_downconv
+                up = [upsample, upconv, uprelu] + res_upconv
+            else:
+                down = [downconv, downnorm, downrelu] + res_downconv
+                up = [upsample, upconv, upnorm, uprelu] + res_upconv
+            if use_dropout:
+                model = down + [submodule] + up + [nn.Dropout(0.5)]
+            else:
+                model = down + [submodule] + up
+        self.model = nn.Sequential(*model)
+    def forward(self, x):
+        if self.outermost:
+            return self.model(x)
+        else:
+            return torch.cat([x, self.model(x)], 1)
+def save_checkpoint(model, save_path):
+    if not os.path.exists(os.path.dirname(save_path)):
+        os.makedirs(os.path.dirname(save_path))
+    torch.save(model.state_dict(), save_path)
+def load_checkpoint(model, checkpoint_path):
+    if not os.path.exists(checkpoint_path):
+        print('No checkpoint!')
+        return
+    checkpoint = torch.load(checkpoint_path)
+    checkpoint_new = model.state_dict()
+    for param in checkpoint_new:
+        checkpoint_new[param] = checkpoint[param]
+    model.load_state_dict(checkpoint_new)