1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.5"/>
<title>Faiss: /data/users/matthijs/github_faiss/faiss/gpu/utils/nvidia/fp16_emu.cuh Source File</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { searchBox.OnSelectItem(0); });
</script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td style="padding-left: 0.5em;">
<div id="projectname">Faiss
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.5 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<div id="navrow1" class="tabs">
<ul class="tablist">
<li><a href="index.html"><span>Main Page</span></a></li>
<li><a href="namespaces.html"><span>Namespaces</span></a></li>
<li><a href="annotated.html"><span>Classes</span></a></li>
<li class="current"><a href="files.html"><span>Files</span></a></li>
<li>
<div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</li>
</ul>
</div>
<div id="navrow2" class="tabs2">
<ul class="tablist">
<li><a href="files.html"><span>File List</span></a></li>
</ul>
</div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark"> </span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark"> </span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark"> </span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark"> </span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark"> </span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark"> </span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark"> </span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark"> </span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark"> </span>Friends</a></div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="dir_6b3ae6988449b0834e9596fad5d75199.html">gpu</a></li><li class="navelem"><a class="el" href="dir_498271007b03b2a0521055e88776887b.html">utils</a></li><li class="navelem"><a class="el" href="dir_ea867109ea74d645e3e3cf2a6de09f37.html">nvidia</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="headertitle">
<div class="title">fp16_emu.cuh</div> </div>
</div><!--header-->
<div class="contents">
<div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno"> 1</span> <span class="comment">/**</span></div>
<div class="line"><a name="l00002"></a><span class="lineno"> 2</span> <span class="comment"> * Copyright (c) 2015-present, Facebook, Inc.</span></div>
<div class="line"><a name="l00003"></a><span class="lineno"> 3</span> <span class="comment"> * All rights reserved.</span></div>
<div class="line"><a name="l00004"></a><span class="lineno"> 4</span> <span class="comment"> *</span></div>
<div class="line"><a name="l00005"></a><span class="lineno"> 5</span> <span class="comment"> * This source code is licensed under the CC-by-NC license found in the</span></div>
<div class="line"><a name="l00006"></a><span class="lineno"> 6</span> <span class="comment"> * LICENSE file in the root directory of this source tree.</span></div>
<div class="line"><a name="l00007"></a><span class="lineno"> 7</span> <span class="comment"> */</span></div>
<div class="line"><a name="l00008"></a><span class="lineno"> 8</span> </div>
<div class="line"><a name="l00009"></a><span class="lineno"> 9</span> <span class="comment">// from Nvidia cuDNN library samples; modified to compile within faiss</span></div>
<div class="line"><a name="l00010"></a><span class="lineno"> 10</span> </div>
<div class="line"><a name="l00011"></a><span class="lineno"> 11</span> <span class="preprocessor">#pragma once</span></div>
<div class="line"><a name="l00012"></a><span class="lineno"> 12</span> <span class="preprocessor"></span></div>
<div class="line"><a name="l00013"></a><span class="lineno"> 13</span> <span class="keyword">namespace </span>faiss { <span class="keyword">namespace </span>gpu {</div>
<div class="line"><a name="l00014"></a><span class="lineno"> 14</span> </div>
<div class="line"><a name="l00015"></a><span class="lineno"> 15</span> <span class="comment">/*</span></div>
<div class="line"><a name="l00016"></a><span class="lineno"> 16</span> <span class="comment"> * Copyright 1993-2014 NVIDIA Corporation. All rights reserved.</span></div>
<div class="line"><a name="l00017"></a><span class="lineno"> 17</span> <span class="comment"> *</span></div>
<div class="line"><a name="l00018"></a><span class="lineno"> 18</span> <span class="comment"> * NOTICE TO LICENSEE:</span></div>
<div class="line"><a name="l00019"></a><span class="lineno"> 19</span> <span class="comment"> *</span></div>
<div class="line"><a name="l00020"></a><span class="lineno"> 20</span> <span class="comment"> * This source code and/or documentation ("Licensed Deliverables") are</span></div>
<div class="line"><a name="l00021"></a><span class="lineno"> 21</span> <span class="comment"> * subject to NVIDIA intellectual property rights under U.S. and</span></div>
<div class="line"><a name="l00022"></a><span class="lineno"> 22</span> <span class="comment"> * international Copyright laws.</span></div>
<div class="line"><a name="l00023"></a><span class="lineno"> 23</span> <span class="comment"> *</span></div>
<div class="line"><a name="l00024"></a><span class="lineno"> 24</span> <span class="comment"> * These Licensed Deliverables contained herein is PROPRIETARY and</span></div>
<div class="line"><a name="l00025"></a><span class="lineno"> 25</span> <span class="comment"> * CONFIDENTIAL to NVIDIA and is being provided under the terms and</span></div>
<div class="line"><a name="l00026"></a><span class="lineno"> 26</span> <span class="comment"> * conditions of a form of NVIDIA software license agreement by and</span></div>
<div class="line"><a name="l00027"></a><span class="lineno"> 27</span> <span class="comment"> * between NVIDIA and Licensee ("License Agreement") or electronically</span></div>
<div class="line"><a name="l00028"></a><span class="lineno"> 28</span> <span class="comment"> * accepted by Licensee. Notwithstanding any terms or conditions to</span></div>
<div class="line"><a name="l00029"></a><span class="lineno"> 29</span> <span class="comment"> * the contrary in the License Agreement, reproduction or disclosure</span></div>
<div class="line"><a name="l00030"></a><span class="lineno"> 30</span> <span class="comment"> * of the Licensed Deliverables to any third party without the express</span></div>
<div class="line"><a name="l00031"></a><span class="lineno"> 31</span> <span class="comment"> * written consent of NVIDIA is prohibited.</span></div>
<div class="line"><a name="l00032"></a><span class="lineno"> 32</span> <span class="comment"> *</span></div>
<div class="line"><a name="l00033"></a><span class="lineno"> 33</span> <span class="comment"> * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE</span></div>
<div class="line"><a name="l00034"></a><span class="lineno"> 34</span> <span class="comment"> * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE</span></div>
<div class="line"><a name="l00035"></a><span class="lineno"> 35</span> <span class="comment"> * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS</span></div>
<div class="line"><a name="l00036"></a><span class="lineno"> 36</span> <span class="comment"> * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.</span></div>
<div class="line"><a name="l00037"></a><span class="lineno"> 37</span> <span class="comment"> * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED</span></div>
<div class="line"><a name="l00038"></a><span class="lineno"> 38</span> <span class="comment"> * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,</span></div>
<div class="line"><a name="l00039"></a><span class="lineno"> 39</span> <span class="comment"> * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.</span></div>
<div class="line"><a name="l00040"></a><span class="lineno"> 40</span> <span class="comment"> * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE</span></div>
<div class="line"><a name="l00041"></a><span class="lineno"> 41</span> <span class="comment"> * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY</span></div>
<div class="line"><a name="l00042"></a><span class="lineno"> 42</span> <span class="comment"> * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY</span></div>
<div class="line"><a name="l00043"></a><span class="lineno"> 43</span> <span class="comment"> * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,</span></div>
<div class="line"><a name="l00044"></a><span class="lineno"> 44</span> <span class="comment"> * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS</span></div>
<div class="line"><a name="l00045"></a><span class="lineno"> 45</span> <span class="comment"> * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE</span></div>
<div class="line"><a name="l00046"></a><span class="lineno"> 46</span> <span class="comment"> * OF THESE LICENSED DELIVERABLES.</span></div>
<div class="line"><a name="l00047"></a><span class="lineno"> 47</span> <span class="comment"> *</span></div>
<div class="line"><a name="l00048"></a><span class="lineno"> 48</span> <span class="comment"> * U.S. Government End Users. These Licensed Deliverables are a</span></div>
<div class="line"><a name="l00049"></a><span class="lineno"> 49</span> <span class="comment"> * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT</span></div>
<div class="line"><a name="l00050"></a><span class="lineno"> 50</span> <span class="comment"> * 1995), consisting of "commercial computer software" and "commercial</span></div>
<div class="line"><a name="l00051"></a><span class="lineno"> 51</span> <span class="comment"> * computer software documentation" as such terms are used in 48</span></div>
<div class="line"><a name="l00052"></a><span class="lineno"> 52</span> <span class="comment"> * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government</span></div>
<div class="line"><a name="l00053"></a><span class="lineno"> 53</span> <span class="comment"> * only as a commercial end item. Consistent with 48 C.F.R.12.212 and</span></div>
<div class="line"><a name="l00054"></a><span class="lineno"> 54</span> <span class="comment"> * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all</span></div>
<div class="line"><a name="l00055"></a><span class="lineno"> 55</span> <span class="comment"> * U.S. Government End Users acquire the Licensed Deliverables with</span></div>
<div class="line"><a name="l00056"></a><span class="lineno"> 56</span> <span class="comment"> * only those rights set forth herein.</span></div>
<div class="line"><a name="l00057"></a><span class="lineno"> 57</span> <span class="comment"> *</span></div>
<div class="line"><a name="l00058"></a><span class="lineno"> 58</span> <span class="comment"> * Any use of the Licensed Deliverables in individual and commercial</span></div>
<div class="line"><a name="l00059"></a><span class="lineno"> 59</span> <span class="comment"> * software must include, in the user documentation and internal</span></div>
<div class="line"><a name="l00060"></a><span class="lineno"> 60</span> <span class="comment"> * comments to the code, the above Disclaimer and U.S. Government End</span></div>
<div class="line"><a name="l00061"></a><span class="lineno"> 61</span> <span class="comment"> * Users Notice.</span></div>
<div class="line"><a name="l00062"></a><span class="lineno"> 62</span> <span class="comment"> */</span></div>
<div class="line"><a name="l00063"></a><span class="lineno"> 63</span> </div>
<div class="line"><a name="l00064"></a><span class="lineno"> 64</span> <span class="comment">// Conversion from/to 16-bit floating point (half-precision).</span></div>
<div class="line"><a name="l00065"></a><span class="lineno"> 65</span> </div>
<div class="line"><a name="l00066"></a><span class="lineno"> 66</span> <span class="preprocessor">#define HLF_EPSILON 4.887581E-04</span></div>
<div class="line"><a name="l00067"></a><span class="lineno"> 67</span> <span class="preprocessor"></span></div>
<div class="line"><a name="l00068"></a><span class="lineno"> 68</span> <span class="keyword">typedef</span> <span class="keyword">struct </span>__align__(2) {</div>
<div class="line"><a name="l00069"></a><span class="lineno"> 69</span>  <span class="keywordtype">unsigned</span> <span class="keywordtype">short</span> x;</div>
<div class="line"><a name="l00070"></a><span class="lineno"> 70</span> } half1;</div>
<div class="line"><a name="l00071"></a><span class="lineno"> 71</span> </div>
<div class="line"><a name="l00072"></a><span class="lineno"> 72</span> half1 cpu_float2half_rn(<span class="keywordtype">float</span> f);</div>
<div class="line"><a name="l00073"></a><span class="lineno"> 73</span> </div>
<div class="line"><a name="l00074"></a><span class="lineno"> 74</span> <span class="keywordtype">float</span> cpu_half2float(half1 h);</div>
<div class="line"><a name="l00075"></a><span class="lineno"> 75</span> </div>
<div class="line"><a name="l00076"></a><span class="lineno"> 76</span> <span class="keyword">static</span> __inline__ __device__ __host__ half1 habs(half1 h)</div>
<div class="line"><a name="l00077"></a><span class="lineno"> 77</span> {</div>
<div class="line"><a name="l00078"></a><span class="lineno"> 78</span>  h.x &= 0x7fffU;</div>
<div class="line"><a name="l00079"></a><span class="lineno"> 79</span>  <span class="keywordflow">return</span> h;</div>
<div class="line"><a name="l00080"></a><span class="lineno"> 80</span> }</div>
<div class="line"><a name="l00081"></a><span class="lineno"> 81</span> </div>
<div class="line"><a name="l00082"></a><span class="lineno"> 82</span> <span class="keyword">static</span> __inline__ __device__ __host__ half1 hneg(half1 h)</div>
<div class="line"><a name="l00083"></a><span class="lineno"> 83</span> {</div>
<div class="line"><a name="l00084"></a><span class="lineno"> 84</span>  h.x ^= 0x8000U;</div>
<div class="line"><a name="l00085"></a><span class="lineno"> 85</span>  <span class="keywordflow">return</span> h;</div>
<div class="line"><a name="l00086"></a><span class="lineno"> 86</span> }</div>
<div class="line"><a name="l00087"></a><span class="lineno"> 87</span> </div>
<div class="line"><a name="l00088"></a><span class="lineno"> 88</span> <span class="keyword">static</span> __inline__ __device__ __host__ <span class="keywordtype">int</span> ishnan(half1 h)</div>
<div class="line"><a name="l00089"></a><span class="lineno"> 89</span> {</div>
<div class="line"><a name="l00090"></a><span class="lineno"> 90</span>  <span class="comment">// When input is NaN, exponent is all ones and mantissa is non-zero.</span></div>
<div class="line"><a name="l00091"></a><span class="lineno"> 91</span>  <span class="keywordflow">return</span> (h.x & 0x7c00U) == 0x7c00U && (h.x & 0x03ffU) != 0;</div>
<div class="line"><a name="l00092"></a><span class="lineno"> 92</span> }</div>
<div class="line"><a name="l00093"></a><span class="lineno"> 93</span> </div>
<div class="line"><a name="l00094"></a><span class="lineno"> 94</span> <span class="keyword">static</span> __inline__ __device__ __host__ <span class="keywordtype">int</span> ishinf(half1 h)</div>
<div class="line"><a name="l00095"></a><span class="lineno"> 95</span> {</div>
<div class="line"><a name="l00096"></a><span class="lineno"> 96</span>  <span class="comment">// When input is +/- inf, exponent is all ones and mantissa is zero.</span></div>
<div class="line"><a name="l00097"></a><span class="lineno"> 97</span>  <span class="keywordflow">return</span> (h.x & 0x7c00U) == 0x7c00U && (h.x & 0x03ffU) == 0;</div>
<div class="line"><a name="l00098"></a><span class="lineno"> 98</span> }</div>
<div class="line"><a name="l00099"></a><span class="lineno"> 99</span> </div>
<div class="line"><a name="l00100"></a><span class="lineno"> 100</span> <span class="keyword">static</span> __inline__ __device__ __host__ <span class="keywordtype">int</span> ishequ(half1 x, half1 y)</div>
<div class="line"><a name="l00101"></a><span class="lineno"> 101</span> {</div>
<div class="line"><a name="l00102"></a><span class="lineno"> 102</span>  <span class="keywordflow">return</span> ishnan(x) == 0 && ishnan(y) == 0 && x.x == y.x;</div>
<div class="line"><a name="l00103"></a><span class="lineno"> 103</span> }</div>
<div class="line"><a name="l00104"></a><span class="lineno"> 104</span> </div>
<div class="line"><a name="l00105"></a><span class="lineno"> 105</span> <span class="keyword">static</span> __inline__ __device__ __host__ half1 hzero()</div>
<div class="line"><a name="l00106"></a><span class="lineno"> 106</span> {</div>
<div class="line"><a name="l00107"></a><span class="lineno"> 107</span>  half1 ret;</div>
<div class="line"><a name="l00108"></a><span class="lineno"> 108</span>  ret.x = 0x0000U;</div>
<div class="line"><a name="l00109"></a><span class="lineno"> 109</span>  <span class="keywordflow">return</span> ret;</div>
<div class="line"><a name="l00110"></a><span class="lineno"> 110</span> }</div>
<div class="line"><a name="l00111"></a><span class="lineno"> 111</span> </div>
<div class="line"><a name="l00112"></a><span class="lineno"> 112</span> <span class="keyword">static</span> __inline__ __device__ __host__ half1 hone()</div>
<div class="line"><a name="l00113"></a><span class="lineno"> 113</span> {</div>
<div class="line"><a name="l00114"></a><span class="lineno"> 114</span>  half1 ret;</div>
<div class="line"><a name="l00115"></a><span class="lineno"> 115</span>  ret.x = 0x3c00U;</div>
<div class="line"><a name="l00116"></a><span class="lineno"> 116</span>  <span class="keywordflow">return</span> ret;</div>
<div class="line"><a name="l00117"></a><span class="lineno"> 117</span> }</div>
<div class="line"><a name="l00118"></a><span class="lineno"> 118</span> </div>
<div class="line"><a name="l00119"></a><span class="lineno"> 119</span> } } <span class="comment">// namespace</span></div>
</div><!-- fragment --></div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated by  <a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.8.5
</small></address>
</body>
</html>