user_namespace.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335
  1. /*
  2. * This program is free software; you can redistribute it and/or
  3. * modify it under the terms of the GNU General Public License as
  4. * published by the Free Software Foundation, version 2 of the
  5. * License.
  6. */
  7. #include <linux/export.h>
  8. #include <linux/nsproxy.h>
  9. #include <linux/slab.h>
  10. #include <linux/sched/signal.h>
  11. #include <linux/user_namespace.h>
  12. #include <linux/proc_ns.h>
  13. #include <linux/highuid.h>
  14. #include <linux/cred.h>
  15. #include <linux/securebits.h>
  16. #include <linux/keyctl.h>
  17. #include <linux/key-type.h>
  18. #include <keys/user-type.h>
  19. #include <linux/seq_file.h>
  20. #include <linux/fs.h>
  21. #include <linux/uaccess.h>
  22. #include <linux/ctype.h>
  23. #include <linux/projid.h>
  24. #include <linux/fs_struct.h>
  25. #include <linux/bsearch.h>
  26. #include <linux/sort.h>
  27. /* sysctl */
  28. int unprivileged_userns_clone;
  29. static struct kmem_cache *user_ns_cachep __read_mostly;
  30. static DEFINE_MUTEX(userns_state_mutex);
  31. static bool new_idmap_permitted(const struct file *file,
  32. struct user_namespace *ns, int cap_setid,
  33. struct uid_gid_map *map);
  34. static void free_user_ns(struct work_struct *work);
  35. static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid)
  36. {
  37. return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES);
  38. }
  39. static void dec_user_namespaces(struct ucounts *ucounts)
  40. {
  41. return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES);
  42. }
  43. static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
  44. {
  45. /* Start with the same capabilities as init but useless for doing
  46. * anything as the capabilities are bound to the new user namespace.
  47. */
  48. cred->securebits = SECUREBITS_DEFAULT;
  49. cred->cap_inheritable = CAP_EMPTY_SET;
  50. cred->cap_permitted = CAP_FULL_SET;
  51. cred->cap_effective = CAP_FULL_SET;
  52. cred->cap_ambient = CAP_EMPTY_SET;
  53. cred->cap_bset = CAP_FULL_SET;
  54. #ifdef CONFIG_KEYS
  55. key_put(cred->request_key_auth);
  56. cred->request_key_auth = NULL;
  57. #endif
  58. /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
  59. cred->user_ns = user_ns;
  60. }
  61. /*
  62. * Create a new user namespace, deriving the creator from the user in the
  63. * passed credentials, and replacing that user with the new root user for the
  64. * new namespace.
  65. *
  66. * This is called by copy_creds(), which will finish setting the target task's
  67. * credentials.
  68. */
  69. int create_user_ns(struct cred *new)
  70. {
  71. struct user_namespace *ns, *parent_ns = new->user_ns;
  72. kuid_t owner = new->euid;
  73. kgid_t group = new->egid;
  74. struct ucounts *ucounts;
  75. int ret, i;
  76. ret = -ENOSPC;
  77. if (parent_ns->level > 32)
  78. goto fail;
  79. ucounts = inc_user_namespaces(parent_ns, owner);
  80. if (!ucounts)
  81. goto fail;
  82. /*
  83. * Verify that we can not violate the policy of which files
  84. * may be accessed that is specified by the root directory,
  85. * by verifing that the root directory is at the root of the
  86. * mount namespace which allows all files to be accessed.
  87. */
  88. ret = -EPERM;
  89. if (current_chrooted())
  90. goto fail_dec;
  91. /* The creator needs a mapping in the parent user namespace
  92. * or else we won't be able to reasonably tell userspace who
  93. * created a user_namespace.
  94. */
  95. ret = -EPERM;
  96. if (!kuid_has_mapping(parent_ns, owner) ||
  97. !kgid_has_mapping(parent_ns, group))
  98. goto fail_dec;
  99. ret = -ENOMEM;
  100. ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
  101. if (!ns)
  102. goto fail_dec;
  103. ret = ns_alloc_inum(&ns->ns);
  104. if (ret)
  105. goto fail_free;
  106. ns->ns.ops = &userns_operations;
  107. atomic_set(&ns->count, 1);
  108. /* Leave the new->user_ns reference with the new user namespace. */
  109. ns->parent = parent_ns;
  110. ns->level = parent_ns->level + 1;
  111. ns->owner = owner;
  112. ns->group = group;
  113. INIT_WORK(&ns->work, free_user_ns);
  114. for (i = 0; i < UCOUNT_COUNTS; i++) {
  115. ns->ucount_max[i] = INT_MAX;
  116. }
  117. ns->ucounts = ucounts;
  118. /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
  119. mutex_lock(&userns_state_mutex);
  120. ns->flags = parent_ns->flags;
  121. mutex_unlock(&userns_state_mutex);
  122. #ifdef CONFIG_PERSISTENT_KEYRINGS
  123. init_rwsem(&ns->persistent_keyring_register_sem);
  124. #endif
  125. ret = -ENOMEM;
  126. if (!setup_userns_sysctls(ns))
  127. goto fail_keyring;
  128. set_cred_user_ns(new, ns);
  129. return 0;
  130. fail_keyring:
  131. #ifdef CONFIG_PERSISTENT_KEYRINGS
  132. key_put(ns->persistent_keyring_register);
  133. #endif
  134. ns_free_inum(&ns->ns);
  135. fail_free:
  136. kmem_cache_free(user_ns_cachep, ns);
  137. fail_dec:
  138. dec_user_namespaces(ucounts);
  139. fail:
  140. return ret;
  141. }
  142. int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
  143. {
  144. struct cred *cred;
  145. int err = -ENOMEM;
  146. if (!(unshare_flags & CLONE_NEWUSER))
  147. return 0;
  148. cred = prepare_creds();
  149. if (cred) {
  150. err = create_user_ns(cred);
  151. if (err)
  152. put_cred(cred);
  153. else
  154. *new_cred = cred;
  155. }
  156. return err;
  157. }
  158. static void free_user_ns(struct work_struct *work)
  159. {
  160. struct user_namespace *parent, *ns =
  161. container_of(work, struct user_namespace, work);
  162. do {
  163. struct ucounts *ucounts = ns->ucounts;
  164. parent = ns->parent;
  165. if (ns->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
  166. kfree(ns->gid_map.forward);
  167. kfree(ns->gid_map.reverse);
  168. }
  169. if (ns->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
  170. kfree(ns->uid_map.forward);
  171. kfree(ns->uid_map.reverse);
  172. }
  173. if (ns->projid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
  174. kfree(ns->projid_map.forward);
  175. kfree(ns->projid_map.reverse);
  176. }
  177. retire_userns_sysctls(ns);
  178. #ifdef CONFIG_PERSISTENT_KEYRINGS
  179. key_put(ns->persistent_keyring_register);
  180. #endif
  181. ns_free_inum(&ns->ns);
  182. kmem_cache_free(user_ns_cachep, ns);
  183. dec_user_namespaces(ucounts);
  184. ns = parent;
  185. } while (atomic_dec_and_test(&parent->count));
  186. }
  187. void __put_user_ns(struct user_namespace *ns)
  188. {
  189. schedule_work(&ns->work);
  190. }
  191. EXPORT_SYMBOL(__put_user_ns);
  192. /**
  193. * idmap_key struct holds the information necessary to find an idmapping in a
  194. * sorted idmap array. It is passed to cmp_map_id() as first argument.
  195. */
  196. struct idmap_key {
  197. bool map_up; /* true -> id from kid; false -> kid from id */
  198. u32 id; /* id to find */
  199. u32 count; /* == 0 unless used with map_id_range_down() */
  200. };
  201. /**
  202. * cmp_map_id - Function to be passed to bsearch() to find the requested
  203. * idmapping. Expects struct idmap_key to be passed via @k.
  204. */
  205. static int cmp_map_id(const void *k, const void *e)
  206. {
  207. u32 first, last, id2;
  208. const struct idmap_key *key = k;
  209. const struct uid_gid_extent *el = e;
  210. id2 = key->id + key->count - 1;
  211. /* handle map_id_{down,up}() */
  212. if (key->map_up)
  213. first = el->lower_first;
  214. else
  215. first = el->first;
  216. last = first + el->count - 1;
  217. if (key->id >= first && key->id <= last &&
  218. (id2 >= first && id2 <= last))
  219. return 0;
  220. if (key->id < first || id2 < first)
  221. return -1;
  222. return 1;
  223. }
  224. /**
  225. * map_id_range_down_max - Find idmap via binary search in ordered idmap array.
  226. * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
  227. */
  228. static struct uid_gid_extent *
  229. map_id_range_down_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
  230. {
  231. struct idmap_key key;
  232. key.map_up = false;
  233. key.count = count;
  234. key.id = id;
  235. return bsearch(&key, map->forward, extents,
  236. sizeof(struct uid_gid_extent), cmp_map_id);
  237. }
  238. /**
  239. * map_id_range_down_base - Find idmap via binary search in static extent array.
  240. * Can only be called if number of mappings is equal or less than
  241. * UID_GID_MAP_MAX_BASE_EXTENTS.
  242. */
  243. static struct uid_gid_extent *
  244. map_id_range_down_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count)
  245. {
  246. unsigned idx;
  247. u32 first, last, id2;
  248. id2 = id + count - 1;
  249. /* Find the matching extent */
  250. for (idx = 0; idx < extents; idx++) {
  251. first = map->extent[idx].first;
  252. last = first + map->extent[idx].count - 1;
  253. if (id >= first && id <= last &&
  254. (id2 >= first && id2 <= last))
  255. return &map->extent[idx];
  256. }
  257. return NULL;
  258. }
  259. static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
  260. {
  261. struct uid_gid_extent *extent;
  262. unsigned extents = map->nr_extents;
  263. smp_rmb();
  264. if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  265. extent = map_id_range_down_base(extents, map, id, count);
  266. else
  267. extent = map_id_range_down_max(extents, map, id, count);
  268. /* Map the id or note failure */
  269. if (extent)
  270. id = (id - extent->first) + extent->lower_first;
  271. else
  272. id = (u32) -1;
  273. return id;
  274. }
  275. static u32 map_id_down(struct uid_gid_map *map, u32 id)
  276. {
  277. return map_id_range_down(map, id, 1);
  278. }
  279. /**
  280. * map_id_up_base - Find idmap via binary search in static extent array.
  281. * Can only be called if number of mappings is equal or less than
  282. * UID_GID_MAP_MAX_BASE_EXTENTS.
  283. */
  284. static struct uid_gid_extent *
  285. map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id)
  286. {
  287. unsigned idx;
  288. u32 first, last;
  289. /* Find the matching extent */
  290. for (idx = 0; idx < extents; idx++) {
  291. first = map->extent[idx].lower_first;
  292. last = first + map->extent[idx].count - 1;
  293. if (id >= first && id <= last)
  294. return &map->extent[idx];
  295. }
  296. return NULL;
  297. }
  298. /**
  299. * map_id_up_max - Find idmap via binary search in ordered idmap array.
  300. * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
  301. */
  302. static struct uid_gid_extent *
  303. map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id)
  304. {
  305. struct idmap_key key;
  306. key.map_up = true;
  307. key.count = 1;
  308. key.id = id;
  309. return bsearch(&key, map->reverse, extents,
  310. sizeof(struct uid_gid_extent), cmp_map_id);
  311. }
  312. static u32 map_id_up(struct uid_gid_map *map, u32 id)
  313. {
  314. struct uid_gid_extent *extent;
  315. unsigned extents = map->nr_extents;
  316. smp_rmb();
  317. if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  318. extent = map_id_up_base(extents, map, id);
  319. else
  320. extent = map_id_up_max(extents, map, id);
  321. /* Map the id or note failure */
  322. if (extent)
  323. id = (id - extent->lower_first) + extent->first;
  324. else
  325. id = (u32) -1;
  326. return id;
  327. }
  328. /**
  329. * make_kuid - Map a user-namespace uid pair into a kuid.
  330. * @ns: User namespace that the uid is in
  331. * @uid: User identifier
  332. *
  333. * Maps a user-namespace uid pair into a kernel internal kuid,
  334. * and returns that kuid.
  335. *
  336. * When there is no mapping defined for the user-namespace uid
  337. * pair INVALID_UID is returned. Callers are expected to test
  338. * for and handle INVALID_UID being returned. INVALID_UID
  339. * may be tested for using uid_valid().
  340. */
  341. kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
  342. {
  343. /* Map the uid to a global kernel uid */
  344. return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
  345. }
  346. EXPORT_SYMBOL(make_kuid);
  347. /**
  348. * from_kuid - Create a uid from a kuid user-namespace pair.
  349. * @targ: The user namespace we want a uid in.
  350. * @kuid: The kernel internal uid to start with.
  351. *
  352. * Map @kuid into the user-namespace specified by @targ and
  353. * return the resulting uid.
  354. *
  355. * There is always a mapping into the initial user_namespace.
  356. *
  357. * If @kuid has no mapping in @targ (uid_t)-1 is returned.
  358. */
  359. uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
  360. {
  361. /* Map the uid from a global kernel uid */
  362. return map_id_up(&targ->uid_map, __kuid_val(kuid));
  363. }
  364. EXPORT_SYMBOL(from_kuid);
  365. /**
  366. * from_kuid_munged - Create a uid from a kuid user-namespace pair.
  367. * @targ: The user namespace we want a uid in.
  368. * @kuid: The kernel internal uid to start with.
  369. *
  370. * Map @kuid into the user-namespace specified by @targ and
  371. * return the resulting uid.
  372. *
  373. * There is always a mapping into the initial user_namespace.
  374. *
  375. * Unlike from_kuid from_kuid_munged never fails and always
  376. * returns a valid uid. This makes from_kuid_munged appropriate
  377. * for use in syscalls like stat and getuid where failing the
  378. * system call and failing to provide a valid uid are not an
  379. * options.
  380. *
  381. * If @kuid has no mapping in @targ overflowuid is returned.
  382. */
  383. uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
  384. {
  385. uid_t uid;
  386. uid = from_kuid(targ, kuid);
  387. if (uid == (uid_t) -1)
  388. uid = overflowuid;
  389. return uid;
  390. }
  391. EXPORT_SYMBOL(from_kuid_munged);
  392. /**
  393. * make_kgid - Map a user-namespace gid pair into a kgid.
  394. * @ns: User namespace that the gid is in
  395. * @gid: group identifier
  396. *
  397. * Maps a user-namespace gid pair into a kernel internal kgid,
  398. * and returns that kgid.
  399. *
  400. * When there is no mapping defined for the user-namespace gid
  401. * pair INVALID_GID is returned. Callers are expected to test
  402. * for and handle INVALID_GID being returned. INVALID_GID may be
  403. * tested for using gid_valid().
  404. */
  405. kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
  406. {
  407. /* Map the gid to a global kernel gid */
  408. return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
  409. }
  410. EXPORT_SYMBOL(make_kgid);
  411. /**
  412. * from_kgid - Create a gid from a kgid user-namespace pair.
  413. * @targ: The user namespace we want a gid in.
  414. * @kgid: The kernel internal gid to start with.
  415. *
  416. * Map @kgid into the user-namespace specified by @targ and
  417. * return the resulting gid.
  418. *
  419. * There is always a mapping into the initial user_namespace.
  420. *
  421. * If @kgid has no mapping in @targ (gid_t)-1 is returned.
  422. */
  423. gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
  424. {
  425. /* Map the gid from a global kernel gid */
  426. return map_id_up(&targ->gid_map, __kgid_val(kgid));
  427. }
  428. EXPORT_SYMBOL(from_kgid);
  429. /**
  430. * from_kgid_munged - Create a gid from a kgid user-namespace pair.
  431. * @targ: The user namespace we want a gid in.
  432. * @kgid: The kernel internal gid to start with.
  433. *
  434. * Map @kgid into the user-namespace specified by @targ and
  435. * return the resulting gid.
  436. *
  437. * There is always a mapping into the initial user_namespace.
  438. *
  439. * Unlike from_kgid from_kgid_munged never fails and always
  440. * returns a valid gid. This makes from_kgid_munged appropriate
  441. * for use in syscalls like stat and getgid where failing the
  442. * system call and failing to provide a valid gid are not options.
  443. *
  444. * If @kgid has no mapping in @targ overflowgid is returned.
  445. */
  446. gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
  447. {
  448. gid_t gid;
  449. gid = from_kgid(targ, kgid);
  450. if (gid == (gid_t) -1)
  451. gid = overflowgid;
  452. return gid;
  453. }
  454. EXPORT_SYMBOL(from_kgid_munged);
  455. /**
  456. * make_kprojid - Map a user-namespace projid pair into a kprojid.
  457. * @ns: User namespace that the projid is in
  458. * @projid: Project identifier
  459. *
  460. * Maps a user-namespace uid pair into a kernel internal kuid,
  461. * and returns that kuid.
  462. *
  463. * When there is no mapping defined for the user-namespace projid
  464. * pair INVALID_PROJID is returned. Callers are expected to test
  465. * for and handle handle INVALID_PROJID being returned. INVALID_PROJID
  466. * may be tested for using projid_valid().
  467. */
  468. kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
  469. {
  470. /* Map the uid to a global kernel uid */
  471. return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid));
  472. }
  473. EXPORT_SYMBOL(make_kprojid);
  474. /**
  475. * from_kprojid - Create a projid from a kprojid user-namespace pair.
  476. * @targ: The user namespace we want a projid in.
  477. * @kprojid: The kernel internal project identifier to start with.
  478. *
  479. * Map @kprojid into the user-namespace specified by @targ and
  480. * return the resulting projid.
  481. *
  482. * There is always a mapping into the initial user_namespace.
  483. *
  484. * If @kprojid has no mapping in @targ (projid_t)-1 is returned.
  485. */
  486. projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid)
  487. {
  488. /* Map the uid from a global kernel uid */
  489. return map_id_up(&targ->projid_map, __kprojid_val(kprojid));
  490. }
  491. EXPORT_SYMBOL(from_kprojid);
  492. /**
  493. * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair.
  494. * @targ: The user namespace we want a projid in.
  495. * @kprojid: The kernel internal projid to start with.
  496. *
  497. * Map @kprojid into the user-namespace specified by @targ and
  498. * return the resulting projid.
  499. *
  500. * There is always a mapping into the initial user_namespace.
  501. *
  502. * Unlike from_kprojid from_kprojid_munged never fails and always
  503. * returns a valid projid. This makes from_kprojid_munged
  504. * appropriate for use in syscalls like stat and where
  505. * failing the system call and failing to provide a valid projid are
  506. * not an options.
  507. *
  508. * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned.
  509. */
  510. projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid)
  511. {
  512. projid_t projid;
  513. projid = from_kprojid(targ, kprojid);
  514. if (projid == (projid_t) -1)
  515. projid = OVERFLOW_PROJID;
  516. return projid;
  517. }
  518. EXPORT_SYMBOL(from_kprojid_munged);
  519. static int uid_m_show(struct seq_file *seq, void *v)
  520. {
  521. struct user_namespace *ns = seq->private;
  522. struct uid_gid_extent *extent = v;
  523. struct user_namespace *lower_ns;
  524. uid_t lower;
  525. lower_ns = seq_user_ns(seq);
  526. if ((lower_ns == ns) && lower_ns->parent)
  527. lower_ns = lower_ns->parent;
  528. lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
  529. seq_printf(seq, "%10u %10u %10u\n",
  530. extent->first,
  531. lower,
  532. extent->count);
  533. return 0;
  534. }
  535. static int gid_m_show(struct seq_file *seq, void *v)
  536. {
  537. struct user_namespace *ns = seq->private;
  538. struct uid_gid_extent *extent = v;
  539. struct user_namespace *lower_ns;
  540. gid_t lower;
  541. lower_ns = seq_user_ns(seq);
  542. if ((lower_ns == ns) && lower_ns->parent)
  543. lower_ns = lower_ns->parent;
  544. lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
  545. seq_printf(seq, "%10u %10u %10u\n",
  546. extent->first,
  547. lower,
  548. extent->count);
  549. return 0;
  550. }
  551. static int projid_m_show(struct seq_file *seq, void *v)
  552. {
  553. struct user_namespace *ns = seq->private;
  554. struct uid_gid_extent *extent = v;
  555. struct user_namespace *lower_ns;
  556. projid_t lower;
  557. lower_ns = seq_user_ns(seq);
  558. if ((lower_ns == ns) && lower_ns->parent)
  559. lower_ns = lower_ns->parent;
  560. lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first));
  561. seq_printf(seq, "%10u %10u %10u\n",
  562. extent->first,
  563. lower,
  564. extent->count);
  565. return 0;
  566. }
  567. static void *m_start(struct seq_file *seq, loff_t *ppos,
  568. struct uid_gid_map *map)
  569. {
  570. loff_t pos = *ppos;
  571. unsigned extents = map->nr_extents;
  572. smp_rmb();
  573. if (pos >= extents)
  574. return NULL;
  575. if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  576. return &map->extent[pos];
  577. return &map->forward[pos];
  578. }
  579. static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
  580. {
  581. struct user_namespace *ns = seq->private;
  582. return m_start(seq, ppos, &ns->uid_map);
  583. }
  584. static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
  585. {
  586. struct user_namespace *ns = seq->private;
  587. return m_start(seq, ppos, &ns->gid_map);
  588. }
  589. static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
  590. {
  591. struct user_namespace *ns = seq->private;
  592. return m_start(seq, ppos, &ns->projid_map);
  593. }
  594. static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
  595. {
  596. (*pos)++;
  597. return seq->op->start(seq, pos);
  598. }
  599. static void m_stop(struct seq_file *seq, void *v)
  600. {
  601. return;
  602. }
  603. const struct seq_operations proc_uid_seq_operations = {
  604. .start = uid_m_start,
  605. .stop = m_stop,
  606. .next = m_next,
  607. .show = uid_m_show,
  608. };
  609. const struct seq_operations proc_gid_seq_operations = {
  610. .start = gid_m_start,
  611. .stop = m_stop,
  612. .next = m_next,
  613. .show = gid_m_show,
  614. };
  615. const struct seq_operations proc_projid_seq_operations = {
  616. .start = projid_m_start,
  617. .stop = m_stop,
  618. .next = m_next,
  619. .show = projid_m_show,
  620. };
  621. static bool mappings_overlap(struct uid_gid_map *new_map,
  622. struct uid_gid_extent *extent)
  623. {
  624. u32 upper_first, lower_first, upper_last, lower_last;
  625. unsigned idx;
  626. upper_first = extent->first;
  627. lower_first = extent->lower_first;
  628. upper_last = upper_first + extent->count - 1;
  629. lower_last = lower_first + extent->count - 1;
  630. for (idx = 0; idx < new_map->nr_extents; idx++) {
  631. u32 prev_upper_first, prev_lower_first;
  632. u32 prev_upper_last, prev_lower_last;
  633. struct uid_gid_extent *prev;
  634. if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  635. prev = &new_map->extent[idx];
  636. else
  637. prev = &new_map->forward[idx];
  638. prev_upper_first = prev->first;
  639. prev_lower_first = prev->lower_first;
  640. prev_upper_last = prev_upper_first + prev->count - 1;
  641. prev_lower_last = prev_lower_first + prev->count - 1;
  642. /* Does the upper range intersect a previous extent? */
  643. if ((prev_upper_first <= upper_last) &&
  644. (prev_upper_last >= upper_first))
  645. return true;
  646. /* Does the lower range intersect a previous extent? */
  647. if ((prev_lower_first <= lower_last) &&
  648. (prev_lower_last >= lower_first))
  649. return true;
  650. }
  651. return false;
  652. }
  653. /**
  654. * insert_extent - Safely insert a new idmap extent into struct uid_gid_map.
  655. * Takes care to allocate a 4K block of memory if the number of mappings exceeds
  656. * UID_GID_MAP_MAX_BASE_EXTENTS.
  657. */
  658. static int insert_extent(struct uid_gid_map *map, struct uid_gid_extent *extent)
  659. {
  660. struct uid_gid_extent *dest;
  661. if (map->nr_extents == UID_GID_MAP_MAX_BASE_EXTENTS) {
  662. struct uid_gid_extent *forward;
  663. /* Allocate memory for 340 mappings. */
  664. forward = kmalloc_array(UID_GID_MAP_MAX_EXTENTS,
  665. sizeof(struct uid_gid_extent),
  666. GFP_KERNEL);
  667. if (!forward)
  668. return -ENOMEM;
  669. /* Copy over memory. Only set up memory for the forward pointer.
  670. * Defer the memory setup for the reverse pointer.
  671. */
  672. memcpy(forward, map->extent,
  673. map->nr_extents * sizeof(map->extent[0]));
  674. map->forward = forward;
  675. map->reverse = NULL;
  676. }
  677. if (map->nr_extents < UID_GID_MAP_MAX_BASE_EXTENTS)
  678. dest = &map->extent[map->nr_extents];
  679. else
  680. dest = &map->forward[map->nr_extents];
  681. *dest = *extent;
  682. map->nr_extents++;
  683. return 0;
  684. }
  685. /* cmp function to sort() forward mappings */
  686. static int cmp_extents_forward(const void *a, const void *b)
  687. {
  688. const struct uid_gid_extent *e1 = a;
  689. const struct uid_gid_extent *e2 = b;
  690. if (e1->first < e2->first)
  691. return -1;
  692. if (e1->first > e2->first)
  693. return 1;
  694. return 0;
  695. }
  696. /* cmp function to sort() reverse mappings */
  697. static int cmp_extents_reverse(const void *a, const void *b)
  698. {
  699. const struct uid_gid_extent *e1 = a;
  700. const struct uid_gid_extent *e2 = b;
  701. if (e1->lower_first < e2->lower_first)
  702. return -1;
  703. if (e1->lower_first > e2->lower_first)
  704. return 1;
  705. return 0;
  706. }
  707. /**
  708. * sort_idmaps - Sorts an array of idmap entries.
  709. * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS.
  710. */
  711. static int sort_idmaps(struct uid_gid_map *map)
  712. {
  713. if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  714. return 0;
  715. /* Sort forward array. */
  716. sort(map->forward, map->nr_extents, sizeof(struct uid_gid_extent),
  717. cmp_extents_forward, NULL);
  718. /* Only copy the memory from forward we actually need. */
  719. map->reverse = kmemdup(map->forward,
  720. map->nr_extents * sizeof(struct uid_gid_extent),
  721. GFP_KERNEL);
  722. if (!map->reverse)
  723. return -ENOMEM;
  724. /* Sort reverse array. */
  725. sort(map->reverse, map->nr_extents, sizeof(struct uid_gid_extent),
  726. cmp_extents_reverse, NULL);
  727. return 0;
  728. }
  729. static ssize_t map_write(struct file *file, const char __user *buf,
  730. size_t count, loff_t *ppos,
  731. int cap_setid,
  732. struct uid_gid_map *map,
  733. struct uid_gid_map *parent_map)
  734. {
  735. struct seq_file *seq = file->private_data;
  736. struct user_namespace *ns = seq->private;
  737. struct uid_gid_map new_map;
  738. unsigned idx;
  739. struct uid_gid_extent extent;
  740. char *kbuf = NULL, *pos, *next_line;
  741. ssize_t ret;
  742. /* Only allow < page size writes at the beginning of the file */
  743. if ((*ppos != 0) || (count >= PAGE_SIZE))
  744. return -EINVAL;
  745. /* Slurp in the user data */
  746. kbuf = memdup_user_nul(buf, count);
  747. if (IS_ERR(kbuf))
  748. return PTR_ERR(kbuf);
  749. /*
  750. * The userns_state_mutex serializes all writes to any given map.
  751. *
  752. * Any map is only ever written once.
  753. *
  754. * An id map fits within 1 cache line on most architectures.
  755. *
  756. * On read nothing needs to be done unless you are on an
  757. * architecture with a crazy cache coherency model like alpha.
  758. *
  759. * There is a one time data dependency between reading the
  760. * count of the extents and the values of the extents. The
  761. * desired behavior is to see the values of the extents that
  762. * were written before the count of the extents.
  763. *
  764. * To achieve this smp_wmb() is used on guarantee the write
  765. * order and smp_rmb() is guaranteed that we don't have crazy
  766. * architectures returning stale data.
  767. */
  768. mutex_lock(&userns_state_mutex);
  769. memset(&new_map, 0, sizeof(struct uid_gid_map));
  770. ret = -EPERM;
  771. /* Only allow one successful write to the map */
  772. if (map->nr_extents != 0)
  773. goto out;
  774. /*
  775. * Adjusting namespace settings requires capabilities on the target.
  776. */
  777. if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
  778. goto out;
  779. /* Parse the user data */
  780. ret = -EINVAL;
  781. pos = kbuf;
  782. for (; pos; pos = next_line) {
  783. /* Find the end of line and ensure I don't look past it */
  784. next_line = strchr(pos, '\n');
  785. if (next_line) {
  786. *next_line = '\0';
  787. next_line++;
  788. if (*next_line == '\0')
  789. next_line = NULL;
  790. }
  791. pos = skip_spaces(pos);
  792. extent.first = simple_strtoul(pos, &pos, 10);
  793. if (!isspace(*pos))
  794. goto out;
  795. pos = skip_spaces(pos);
  796. extent.lower_first = simple_strtoul(pos, &pos, 10);
  797. if (!isspace(*pos))
  798. goto out;
  799. pos = skip_spaces(pos);
  800. extent.count = simple_strtoul(pos, &pos, 10);
  801. if (*pos && !isspace(*pos))
  802. goto out;
  803. /* Verify there is not trailing junk on the line */
  804. pos = skip_spaces(pos);
  805. if (*pos != '\0')
  806. goto out;
  807. /* Verify we have been given valid starting values */
  808. if ((extent.first == (u32) -1) ||
  809. (extent.lower_first == (u32) -1))
  810. goto out;
  811. /* Verify count is not zero and does not cause the
  812. * extent to wrap
  813. */
  814. if ((extent.first + extent.count) <= extent.first)
  815. goto out;
  816. if ((extent.lower_first + extent.count) <=
  817. extent.lower_first)
  818. goto out;
  819. /* Do the ranges in extent overlap any previous extents? */
  820. if (mappings_overlap(&new_map, &extent))
  821. goto out;
  822. if ((new_map.nr_extents + 1) == UID_GID_MAP_MAX_EXTENTS &&
  823. (next_line != NULL))
  824. goto out;
  825. ret = insert_extent(&new_map, &extent);
  826. if (ret < 0)
  827. goto out;
  828. ret = -EINVAL;
  829. }
  830. /* Be very certaint the new map actually exists */
  831. if (new_map.nr_extents == 0)
  832. goto out;
  833. ret = -EPERM;
  834. /* Validate the user is allowed to use user id's mapped to. */
  835. if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
  836. goto out;
  837. ret = -EPERM;
  838. /* Map the lower ids from the parent user namespace to the
  839. * kernel global id space.
  840. */
  841. for (idx = 0; idx < new_map.nr_extents; idx++) {
  842. struct uid_gid_extent *e;
  843. u32 lower_first;
  844. if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
  845. e = &new_map.extent[idx];
  846. else
  847. e = &new_map.forward[idx];
  848. lower_first = map_id_range_down(parent_map,
  849. e->lower_first,
  850. e->count);
  851. /* Fail if we can not map the specified extent to
  852. * the kernel global id space.
  853. */
  854. if (lower_first == (u32) -1)
  855. goto out;
  856. e->lower_first = lower_first;
  857. }
  858. /*
  859. * If we want to use binary search for lookup, this clones the extent
  860. * array and sorts both copies.
  861. */
  862. ret = sort_idmaps(&new_map);
  863. if (ret < 0)
  864. goto out;
  865. /* Install the map */
  866. if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
  867. memcpy(map->extent, new_map.extent,
  868. new_map.nr_extents * sizeof(new_map.extent[0]));
  869. } else {
  870. map->forward = new_map.forward;
  871. map->reverse = new_map.reverse;
  872. }
  873. smp_wmb();
  874. map->nr_extents = new_map.nr_extents;
  875. *ppos = count;
  876. ret = count;
  877. out:
  878. if (ret < 0 && new_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
  879. kfree(new_map.forward);
  880. kfree(new_map.reverse);
  881. map->forward = NULL;
  882. map->reverse = NULL;
  883. map->nr_extents = 0;
  884. }
  885. mutex_unlock(&userns_state_mutex);
  886. kfree(kbuf);
  887. return ret;
  888. }
  889. ssize_t proc_uid_map_write(struct file *file, const char __user *buf,
  890. size_t size, loff_t *ppos)
  891. {
  892. struct seq_file *seq = file->private_data;
  893. struct user_namespace *ns = seq->private;
  894. struct user_namespace *seq_ns = seq_user_ns(seq);
  895. if (!ns->parent)
  896. return -EPERM;
  897. if ((seq_ns != ns) && (seq_ns != ns->parent))
  898. return -EPERM;
  899. return map_write(file, buf, size, ppos, CAP_SETUID,
  900. &ns->uid_map, &ns->parent->uid_map);
  901. }
  902. ssize_t proc_gid_map_write(struct file *file, const char __user *buf,
  903. size_t size, loff_t *ppos)
  904. {
  905. struct seq_file *seq = file->private_data;
  906. struct user_namespace *ns = seq->private;
  907. struct user_namespace *seq_ns = seq_user_ns(seq);
  908. if (!ns->parent)
  909. return -EPERM;
  910. if ((seq_ns != ns) && (seq_ns != ns->parent))
  911. return -EPERM;
  912. return map_write(file, buf, size, ppos, CAP_SETGID,
  913. &ns->gid_map, &ns->parent->gid_map);
  914. }
  915. ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
  916. size_t size, loff_t *ppos)
  917. {
  918. struct seq_file *seq = file->private_data;
  919. struct user_namespace *ns = seq->private;
  920. struct user_namespace *seq_ns = seq_user_ns(seq);
  921. if (!ns->parent)
  922. return -EPERM;
  923. if ((seq_ns != ns) && (seq_ns != ns->parent))
  924. return -EPERM;
  925. /* Anyone can set any valid project id no capability needed */
  926. return map_write(file, buf, size, ppos, -1,
  927. &ns->projid_map, &ns->parent->projid_map);
  928. }
  929. static bool new_idmap_permitted(const struct file *file,
  930. struct user_namespace *ns, int cap_setid,
  931. struct uid_gid_map *new_map)
  932. {
  933. const struct cred *cred = file->f_cred;
  934. /* Don't allow mappings that would allow anything that wouldn't
  935. * be allowed without the establishment of unprivileged mappings.
  936. */
  937. if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) &&
  938. uid_eq(ns->owner, cred->euid)) {
  939. u32 id = new_map->extent[0].lower_first;
  940. if (cap_setid == CAP_SETUID) {
  941. kuid_t uid = make_kuid(ns->parent, id);
  942. if (uid_eq(uid, cred->euid))
  943. return true;
  944. } else if (cap_setid == CAP_SETGID) {
  945. kgid_t gid = make_kgid(ns->parent, id);
  946. if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) &&
  947. gid_eq(gid, cred->egid))
  948. return true;
  949. }
  950. }
  951. /* Allow anyone to set a mapping that doesn't require privilege */
  952. if (!cap_valid(cap_setid))
  953. return true;
  954. /* Allow the specified ids if we have the appropriate capability
  955. * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
  956. * And the opener of the id file also had the approprpiate capability.
  957. */
  958. if (ns_capable(ns->parent, cap_setid) &&
  959. file_ns_capable(file, ns->parent, cap_setid))
  960. return true;
  961. return false;
  962. }
  963. int proc_setgroups_show(struct seq_file *seq, void *v)
  964. {
  965. struct user_namespace *ns = seq->private;
  966. unsigned long userns_flags = READ_ONCE(ns->flags);
  967. seq_printf(seq, "%s\n",
  968. (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
  969. "allow" : "deny");
  970. return 0;
  971. }
  972. ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
  973. size_t count, loff_t *ppos)
  974. {
  975. struct seq_file *seq = file->private_data;
  976. struct user_namespace *ns = seq->private;
  977. char kbuf[8], *pos;
  978. bool setgroups_allowed;
  979. ssize_t ret;
  980. /* Only allow a very narrow range of strings to be written */
  981. ret = -EINVAL;
  982. if ((*ppos != 0) || (count >= sizeof(kbuf)))
  983. goto out;
  984. /* What was written? */
  985. ret = -EFAULT;
  986. if (copy_from_user(kbuf, buf, count))
  987. goto out;
  988. kbuf[count] = '\0';
  989. pos = kbuf;
  990. /* What is being requested? */
  991. ret = -EINVAL;
  992. if (strncmp(pos, "allow", 5) == 0) {
  993. pos += 5;
  994. setgroups_allowed = true;
  995. }
  996. else if (strncmp(pos, "deny", 4) == 0) {
  997. pos += 4;
  998. setgroups_allowed = false;
  999. }
  1000. else
  1001. goto out;
  1002. /* Verify there is not trailing junk on the line */
  1003. pos = skip_spaces(pos);
  1004. if (*pos != '\0')
  1005. goto out;
  1006. ret = -EPERM;
  1007. mutex_lock(&userns_state_mutex);
  1008. if (setgroups_allowed) {
  1009. /* Enabling setgroups after setgroups has been disabled
  1010. * is not allowed.
  1011. */
  1012. if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
  1013. goto out_unlock;
  1014. } else {
  1015. /* Permanently disabling setgroups after setgroups has
  1016. * been enabled by writing the gid_map is not allowed.
  1017. */
  1018. if (ns->gid_map.nr_extents != 0)
  1019. goto out_unlock;
  1020. ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
  1021. }
  1022. mutex_unlock(&userns_state_mutex);
  1023. /* Report a successful write */
  1024. *ppos = count;
  1025. ret = count;
  1026. out:
  1027. return ret;
  1028. out_unlock:
  1029. mutex_unlock(&userns_state_mutex);
  1030. goto out;
  1031. }
  1032. bool userns_may_setgroups(const struct user_namespace *ns)
  1033. {
  1034. bool allowed;
  1035. mutex_lock(&userns_state_mutex);
  1036. /* It is not safe to use setgroups until a gid mapping in
  1037. * the user namespace has been established.
  1038. */
  1039. allowed = ns->gid_map.nr_extents != 0;
  1040. /* Is setgroups allowed? */
  1041. allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
  1042. mutex_unlock(&userns_state_mutex);
  1043. return allowed;
  1044. }
  1045. /*
  1046. * Returns true if @child is the same namespace or a descendant of
  1047. * @ancestor.
  1048. */
  1049. bool in_userns(const struct user_namespace *ancestor,
  1050. const struct user_namespace *child)
  1051. {
  1052. const struct user_namespace *ns;
  1053. for (ns = child; ns->level > ancestor->level; ns = ns->parent)
  1054. ;
  1055. return (ns == ancestor);
  1056. }
  1057. bool current_in_userns(const struct user_namespace *target_ns)
  1058. {
  1059. return in_userns(target_ns, current_user_ns());
  1060. }
  1061. EXPORT_SYMBOL(current_in_userns);
  1062. static inline struct user_namespace *to_user_ns(struct ns_common *ns)
  1063. {
  1064. return container_of(ns, struct user_namespace, ns);
  1065. }
  1066. static struct ns_common *userns_get(struct task_struct *task)
  1067. {
  1068. struct user_namespace *user_ns;
  1069. rcu_read_lock();
  1070. user_ns = get_user_ns(__task_cred(task)->user_ns);
  1071. rcu_read_unlock();
  1072. return user_ns ? &user_ns->ns : NULL;
  1073. }
  1074. static void userns_put(struct ns_common *ns)
  1075. {
  1076. put_user_ns(to_user_ns(ns));
  1077. }
  1078. static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
  1079. {
  1080. struct user_namespace *user_ns = to_user_ns(ns);
  1081. struct cred *cred;
  1082. /* Don't allow gaining capabilities by reentering
  1083. * the same user namespace.
  1084. */
  1085. if (user_ns == current_user_ns())
  1086. return -EINVAL;
  1087. /* Tasks that share a thread group must share a user namespace */
  1088. if (!thread_group_empty(current))
  1089. return -EINVAL;
  1090. if (current->fs->users != 1)
  1091. return -EINVAL;
  1092. if (!ns_capable(user_ns, CAP_SYS_ADMIN))
  1093. return -EPERM;
  1094. cred = prepare_creds();
  1095. if (!cred)
  1096. return -ENOMEM;
  1097. put_user_ns(cred->user_ns);
  1098. set_cred_user_ns(cred, get_user_ns(user_ns));
  1099. return commit_creds(cred);
  1100. }
  1101. struct ns_common *ns_get_owner(struct ns_common *ns)
  1102. {
  1103. struct user_namespace *my_user_ns = current_user_ns();
  1104. struct user_namespace *owner, *p;
  1105. /* See if the owner is in the current user namespace */
  1106. owner = p = ns->ops->owner(ns);
  1107. for (;;) {
  1108. if (!p)
  1109. return ERR_PTR(-EPERM);
  1110. if (p == my_user_ns)
  1111. break;
  1112. p = p->parent;
  1113. }
  1114. return &get_user_ns(owner)->ns;
  1115. }
  1116. static struct user_namespace *userns_owner(struct ns_common *ns)
  1117. {
  1118. return to_user_ns(ns)->parent;
  1119. }
  1120. const struct proc_ns_operations userns_operations = {
  1121. .name = "user",
  1122. .type = CLONE_NEWUSER,
  1123. .get = userns_get,
  1124. .put = userns_put,
  1125. .install = userns_install,
  1126. .owner = userns_owner,
  1127. .get_parent = ns_get_owner,
  1128. };
  1129. static __init int user_namespaces_init(void)
  1130. {
  1131. user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
  1132. return 0;
  1133. }
  1134. subsys_initcall(user_namespaces_init);