forked from dagwieers/mvfs71
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmvfs_procops.c
1613 lines (1424 loc) · 54.7 KB
/
mvfs_procops.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* * (C) Copyright IBM Corporation 1991, 2012. */
/*
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
Author: IBM Corporation
This module is part of the IBM (R) Rational (R) ClearCase (R)
Multi-version file system (MVFS).
For support, please visit http://www.ibm.com/software/support
*/
/* mvfs_procops.c */
#include "mvfs_systm.h"
#include "mvfs.h"
STATIC void mvfs_procinherit_copy(mvfs_proc_t *to,
mvfs_proc_t *from);
STATIC mvfs_proc_t* mvfs_myproc(MVFS_THREADID_T tid);
STATIC mvfs_thread_t * mvfs_threadalloc(MVFS_THREADID_T *thrid);
STATIC void mvfs_threadfree(mvfs_thread_t *thr, tbs_boolean_t needs_dequeue);
STATIC void mvfs_threadrele(mvfs_thread_t *thr);
STATIC mvfs_proc_t * mvfs_procalloc(MVFS_PROCID_T *procid,
MVFS_PROCTAG_T *proctag);
STATIC void mvfs_procfree(mvfs_proc_t *, mvfs_thread_t *);
STATIC void mvfs_procfree_int(mvfs_proc_t *);
STATIC void mvfs_procrele(mvfs_proc_t *, mvfs_thread_t *);
STATIC void mvfs_snapshot_thread(mvfs_thread_t *thr);
STATIC int mvfs_purgechain(int bucket, int purge,
mvfs_thread_t *mythread);
STATIC void mvfs_procpurge_afps(P_NONE);
STATIC unsigned int mvfs_pidhash(MVFS_PROCID_T *pidp);
/*
* Routine to manipulate the per-process MVFS state (other than current
* view which is stored as the u_rdir vnode).
*
* Here's the scheme for process and thread state auditing in the MVFS:
*
* THREADS:
*
* (a) Each thread has a private mvfs_thread_t structure which
* contains the current MVFS-specific thread state. Nobody else ever
* looks at this structure so there is no locking required. The
* thread state persists until the process exits.
* mvfs_thread_t.thr_threadid uniquely identifies the thread to which
* it belongs.
*
* (b) Each time the MVFS is entered from outside code (i.e. every
* VOP_xxx), the thread's state is found by lookup in a hash table,
* and refreshed from the process's state. [see mvfs_enter_fs()] If
* necessary, a new thread state structure is created and linked into
* the process's chains. If necessary, a new process state structure
* is created, its fields inherited, and it is linked into the process
* hash table. New thread allocation is not protected by any lock
* (there is no potential to race another thread, since the
* mvfs_thread_t is thread-specific).
*
* (c) When a thread sets any MVFS process state, it first updates the
* state in its private mvfs_thread_t, then synchronizes it to the
* process state [see mvfs_sync_procstate()]
*
* (d) mvfs_thread_t.thr_proc is always valid. Process structures never
* disappear without removing all their threads, so the thr_proc will
* always be directly usable without validation
*
* (e) All mvfs_thread_t's are chained into a hash table by
* mvfs_thread_t.thr_hashnxt. Operations on the thread hash table
* are protected by a pool of spin locks.
*
* (f) Some system threads must not block in paging contexts. In case
* they ever get into this code, their thread and process structures
* are allocated from a static table which is configured based on a
* particular port's mdep.h file.
*
* XXX/FIXME:
*
* This code used to assume that a thread never leaves its
* originating process. This isn't true on some systems. There is
* also an implicit assumption that the thread always wants to
* synchronize its state from a process, which also isn't true if a
* process is acting as an agent for other processes, with one thread
* per client process.
*
* We try to handle the first case in the mvfs_mythread() routine. If
* we detect a thread has changed processes, we treat it as a miss,
* discard its current state, and create a new thread and attach it to
* the new process.
*
* The second case is not handled at present---all threads in a given process
* share the auditing state of the process.
*
* PROCESSES:
*
* Since the MVFS does not get called on fork/exit under UNIX, these
* structures are 'loosely' consistent with the process table.
* With the exception of the view vnode pointer, these structures
* (or any they points to) should not hold any resources (except
* memory) due to its loose consistency. Holding resources can result
* in strange user problems because they are not freed on process exit.
* Example: if you held a vnode on an NFS mount point, the NFS mount
* point would unmountable, even though all processes using that
* mount point had exited. Very strange for administrators!
*
* (a) Each process has an mvfs_proc_t structure which contains the
* latest MVFS process state. The mvfs_proc_t contains a spinlock,
* which must be held during any reading or writing of the structure
* contents. (mvfs_proc_t.mp_procid,mvfs_proc_t.mp_proctag) uniquely
* identifies the process to which it belongs. mp_procid is immutable
* once an mvfs_proc_t is created and linked to a hash chain;
* mp_proctag may be changed (under the mvfs_proc_t's spinlock)
*
* (b) The mvfs_proc_t is created the first time a process is
* discovered to have entered the MVFS, and its state initialized from
* an ancestor process. [see mvfs_procinherit]
*
* (c) Threads initialize their state from the process's mvfs_proc_t,
* using the spinlock to protect the reading of the mvfs_proc_t.
*
* (d) All thread structures belonging to a process are linked on a
* singly-linked list, rooted at mvfs_proc_t.mp_threads and connected
* by mvfs_thread_t.thr_next.
*
* (e) All mvfs_proc_t's are chained into a hash table by
* mvfs_proc_t.mp_hashnxt. The hash table is only needed to find
* ancestor process state for inheritance and to clean up dead process
* state; most lookups of process state come directly from the
* mvfs_thread_t.thr_proc. All process hash table operations are
* protected by mvfs_proclock (a sleep lock).
*
* (f) When a process's state is created, the mvfs_proclock is held
* while inheritance searching continues, both because the lock is
* required for hash table lookups, and to insure no processes are
* recycled while inheritance is searching. If an ancestor is found,
* it's data are spinlocked while the inheritance copy proceeds.
*
* (g) Periodically [either due to a sync() on a filesystem, which calls
* mvfs_procpurge(), or due to an audit ioctl() operation, which calls
* mvfs_procpurge()], the hash table is walked in search of stale
* processes; any such have all their mvfs_thread_t's recycled and then
* the mvfs_proc_t itself recycled. This procedure requires the
* mvfs_proclock to walk the hash table, and dropping the lock while
* recycling a process. If the caller cannot wait (for instance,
* mfs_periodic_maintenace() calls mvfs_procpurge() with the NOSLEEP flag),
* conditional locking is used and the operation prematurely terminated if the
* lock cannot be acquired.
*
* (h) each thread holds one ref count on the process structure. The
* hash table link also holds one ref count.
*/
/*
* Locking summary:
*
* To look at your own thread state (mvfs_mythread()->*) you
* don't need any locks.
*
* To do anything to any process state (proc->*), you must
* MVFS_LOCK(&proc->mp_lock) around read or write access. This
* includes inheritance copies.
*
* To search or alter the thread hash table, you must hold the
* chain's spin lock. (includes inserting new entries)
*
* To search or alter the process hash table, you must hold the
* mvfs_proclock. (includes inserting new entries--which
* unfortunately means that a hash table rescan at creation will
* be required, since allocating a new entry may do page I/O and
* we must drop the lock for the duration of the allocation operation)
*
* Other older notes on process state:
* Because the MVFS does not get called on UNIX when a process dies,
* you must be careful not to store information in the thread or process
* state block which would be a problem if it were retained
* after process death. The call "mvfs_procpurge()" is called
* from sync() every minute to garbage collect any dead process
* structures.
*
* One potentially risky area is the held vnode ptr for
* the audit file. This has to be done because of the
* restriction below about looking up pnames. In general
* it is not a problem because these files are only active
* during a build, and are files in /tmp or /usr/tmp whose
* FS's aren't unmounted except at reboot, and the reboot
* sync() calls will flush any stale process state and cleanup
* any residual vnode ptrs from dead processes.
*
* Restriction of calls:
* Because these proc ops are called from anywhere in the code
* with potentially any MVFS resource held/locked, these routines
* must be very careful about what they call. Forbidden operations
* include:
* 1) Activating/locking any MVFS vnode/mnode.
* 2) Any calls to lookup pathnames (even outside of the MVFS
* i.e. audit file pnames), because lookup can require
* (1) above.
*
* In general, try to keep this code restricted to memory
* operations (e.g. allocate/free memory and copying data around).
*/
/* Max size of mfs proctab */
extern int mvfs_maxnproc;
/*
* mvfs_threadid_spl_pool -- splock pool -- protects:
* all operations on hash buckets in mvfs_threadid_hashtable
*
* If you need both this and the mvfs_proclock, you must get the
* mvfs_proclock (sleep lock) first.
*/
#define THREADID_SPLOCK(hash_val, _mcdp, lockpp, st) { \
SPLOCK_SELECT(&(_mcdp->proc_thr.mvfs_threadid_spl_pool), hash_val, HASH_SPLOCK_MAP, lockpp); \
SPLOCK(**(lockpp), st); }
#define THREADID_SPUNLOCK(hash_val, _mcdp, lockpp, st) { \
SPLOCK_SELECT(&(_mcdp->proc_thr.mvfs_threadid_spl_pool), hash_val, HASH_SPLOCK_MAP, lockpp); \
SPUNLOCK(**(lockpp), st); }
#ifndef MVFS_SYSTEM_KMEM
struct mvfs_slab_list *mvfs_thread_slabs;
struct mvfs_slab_list *mvfs_proc_slabs;
#endif
int
mvfs_procinit(mvfs_cache_sizes_t *mma_sizes)
{
u_long len;
int sp_poolsize, error = 0;
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
mcdp->mvfs_init_sizes.size[MVFS_SETCACHE_THREADHASHTAB_SZ] = mcdp->mvfs_threadhash_sz;
MVFS_SIZE_DEFLOAD_NONZERO(mcdp->mvfs_threadhash_sz, mma_sizes, THREADHASHTAB_SZ,
MVFS_THREADHASH_SZ_DEFAULT);
len = sizeof(mvfs_thread_t *) * MVFS_THREADHASH_SZ(mcdp);
mcdp->proc_thr.mvfs_threadid_hashtable = (mvfs_thread_t **)KMEM_ALLOC(len, KM_SLEEP);
if (mcdp->proc_thr.mvfs_threadid_hashtable == (mvfs_thread_t **)NULL) {
mvfs_log(MFS_LOG_ERR,"mvfs_procinit: no memory (threadhash)");
error = ENOMEM;
}
if (error == 0) {
BZERO((caddr_t) mcdp->proc_thr.mvfs_threadid_hashtable, len);
HASH_SPLOCK_SET_POOLSIZE(sp_poolsize, MVFS_THREADHASH_SZ(mcdp));
if (mvfs_splock_pool_init(&(mcdp->proc_thr.mvfs_threadid_spl_pool), sp_poolsize, NULL,
"mvfs_thr_hash_spl") != 0)
{
mvfs_log(MFS_LOG_ERR,"mvfs_procinit: no memory (threadhash locks)");
KMEM_FREE(mcdp->proc_thr.mvfs_threadid_hashtable,
sizeof(mvfs_thread_t *)*MVFS_THREADHASH_SZ(mcdp));
error = ENOMEM;
}
}
if (error == 0) {
len = sizeof(mvfs_proc_t *) * MVFS_PROCHASH_SZ;
mcdp->proc_thr.mvfs_procid_hashtable = (mvfs_proc_t **)KMEM_ALLOC(len, KM_SLEEP);
if (mcdp->proc_thr.mvfs_procid_hashtable == (mvfs_proc_t **)NULL) {
mvfs_log(MFS_LOG_ERR,"mvfs_procinit: no memory (prochash)");
KMEM_FREE(mcdp->proc_thr.mvfs_threadid_hashtable,
sizeof(mvfs_thread_t *)*MVFS_THREADHASH_SZ(mcdp));
error = ENOMEM;
}
}
if (error == 0) {
BZERO((caddr_t) mcdp->proc_thr.mvfs_procid_hashtable, len);
INITLOCK(&(mcdp->proc_thr.mvfs_proclock), "mvfsproc");
INITSPLOCK(mcdp->proc_thr.mvfs_proc_alloclock, "mvfs_procalloc_spl");
ASSERT(sizeof(mvfs_thread_t) >= sizeof(void *));
ASSERT(sizeof(mvfs_proc_t) >= sizeof(void *));
mcdp->proc_thr.mvfs_nproc_alloced = 0;
mcdp->proc_thr.mvfs_nthr_alloced = 0;
}
if (error != 0) {
mcdp->mvfs_threadhash_sz = mcdp->mvfs_init_sizes.size[MVFS_SETCACHE_THREADHASHTAB_SZ];
}
return error;
}
/*
* MVFS_PROCDATA_FREE - dispose of allocated proc state
*/
void
mvfs_procdata_free()
{
mvfs_thread_t *mythread;
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
if (mcdp->proc_thr.mvfs_threadid_hashtable == NULL) return; /* Not inited yet */
mvfs_procpurge_afps();
mythread = mvfs_mythread(NULL);
mvfs_procpurge(MVFS_PROCPURGE_FLUSH);
/*
* only our process remains now; It has no auditing stuff. Safe to
* discard it.
*/
MVFS_LOCK(&(mcdp->proc_thr.mvfs_proclock));
mvfs_procrele(mythread->thr_proc, mythread); /* unlocks it for us */
ASSERT(mcdp->proc_thr.mvfs_nproc_alloced == 0 && mcdp->proc_thr.mvfs_nthr_alloced == 0);
KMEM_FREE(mcdp->proc_thr.mvfs_threadid_hashtable,
sizeof(mvfs_thread_t *)*MVFS_THREADHASH_SZ(mcdp));
KMEM_FREE(mcdp->proc_thr.mvfs_procid_hashtable,
sizeof(mvfs_proc_t *)*MVFS_PROCHASH_SZ);
FREELOCK(&(mcdp->proc_thr.mvfs_proclock));
mvfs_splock_pool_free(&(mcdp->proc_thr.mvfs_threadid_spl_pool));
FREESPLOCK(mcdp->proc_thr.mvfs_proc_alloclock);
mcdp->mvfs_threadhash_sz = mcdp->mvfs_init_sizes.size[MVFS_SETCACHE_THREADHASHTAB_SZ];
return;
}
int
mvfs_proc_getcaches(mvfs_cache_sizes_t *szp)
{
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
szp->size[MVFS_SETCACHE_THREADHASHTAB_SZ] = MVFS_THREADHASH_SZ(mcdp);
szp->size[MVFS_SETCACHE_PROCHASHTAB_SZ] = MVFS_PROCHASH_SZ;
return 0;
}
int
mvfs_proc_compute_caches(
ks_int32_t scale_factor,
mvfs_cache_sizes_t *szp
)
{
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
if ((szp->mask & MVFS_CACHEBIT(THREADHASHTAB_SZ)) == 0) {
szp->size[MVFS_SETCACHE_THREADHASHTAB_SZ] = MVFS_THREADHASH_SZ_DEFAULT;
szp->mask |= MVFS_CACHEBIT(THREADHASHTAB_SZ);
}
/* not tunable */
szp->size[MVFS_SETCACHE_PROCHASHTAB_SZ] = MVFS_PROCHASH_SZ;
szp->mask |= MVFS_CACHEBIT(PROCHASHTAB_SZ);
return 0;
}
/*
* Zap a particular process. Clean up all state (except for the
* active thread).
*
* REQUIRES: caller must insure that any audit file stuff is gone before
* calling here.
*/
void
mvfs_zap_proc(proc)
mvfs_proc_t *proc;
{
register mvfs_proc_t *mpchase, **mpp;
register mvfs_thread_t *others;
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
unsigned int bucket;
ASSERT(ISLOCKEDBYME(&(mcdp->proc_thr.mvfs_proclock)));
bucket = mvfs_pidhash(&proc->mp_procid);
mpp = &(mcdp->proc_thr.mvfs_procid_hashtable[bucket]);
MDB_XLOG((MDB_PROCOPS,"zapping proc %"KS_FMT_PTR_T" bucket %d\n", proc, bucket));
ASSERT(*mpp != NULL);
/*
* mpp points to the cell holding the pointer we should replace
* to remove an elt from this singly-linked queue.
* This starts out as the address of the bucket ptr, and
* is changed to the address of each hashnxt pointer as we walk
* down the chain.
*/
for (mpchase = *mpp;
mpchase;
mpp = &mpchase->mp_hashnxt, mpchase = mpchase->mp_hashnxt) {
if (mpchase == proc) {
/* remove this proc from hash queue: */
*mpp = mpchase->mp_hashnxt;
mpchase->mp_hashnxt = 0;
MDB_XLOG((MDB_PROCOPS,"zapping proc %"KS_FMT_PTR_T"\n", proc));
while ((others = proc->mp_threads) != NULL)
mvfs_threadrele(others);
/* all threads on this process are gone, now clean up
* process state:
*/
ASSERT(proc->mp_threads == NULL); /* no more threads left */
ASSERT(proc->mp_afp == NULL); /* caller must have dumped afp */
/* use internal form to avoid afp flushing */
mvfs_procfree_int(proc);
return;
}
}
/* We'd better have dropped it */
MDKI_PANIC("zapped proc not on hash bucket");
return;
}
/*
* Zap all the invalid processes on a given process hash chain.
*
* If it dropped the lock and reaquired it, returns 1.
* If it dropped the lock and couldn't reacquire it, returns -1.
* Otherwise returns 0.
*/
STATIC int
mvfs_purgechain(bucket, purge, mythread)
int bucket;
int purge;
mvfs_thread_t *mythread;
{
register mvfs_proc_t *mpchase, **mpp;
register mvfs_proc_t *myproc = mythread->thr_proc;
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
int unlocked = 0;
#ifdef MVFS_DEBUG
mvfs_thread_t *thr;
#endif
ASSERT(ISLOCKEDBYME(&(mcdp->proc_thr.mvfs_proclock)));
repurge:
mpp = &(mcdp->proc_thr.mvfs_procid_hashtable[bucket]);
MDB_XLOG((MDB_PROCOPS2,"purging bucket %d, %"KS_FMT_PTR_T"\n", bucket, *mpp));
/*
* mpp points to the cell holding the pointer we should replace
* to remove an elt from this singly-linked queue.
* This starts out as the address of the bucket ptr, and
* is changed to the address of each hashnxt pointer as we walk
* down the chain.
*/
for (mpchase = *mpp;
mpchase;
mpp = &mpchase->mp_hashnxt, mpchase = mpchase->mp_hashnxt) {
#ifdef MVFS_DEBUG
MVFS_LOCK(&mpchase->mp_lock);
for (thr = mpchase->mp_threads; thr; thr = thr->thr_next)
ASSERT(thr->thr_proc == mpchase);
MVFS_UNLOCK(&mpchase->mp_lock);
#endif
if (purge == MVFS_PROCPURGE_FLUSH ||
!MVFS_PROCVALID(mpchase, mythread)) {
MDB_XLOG((MDB_PROCOPS,
"purge: (pid not found) mp=%"KS_FMT_PTR_T", pid=%"MVFS_FMT_PROCID_T_D",tag=%d\n",
mpchase,mpchase->mp_procid, mpchase->mp_proctag));
/* remove this proc from hash queue: */
*mpp = mpchase->mp_hashnxt;
mpchase->mp_hashnxt = 0;
/*
* mvfs_procrele drops the mvfs_proclock in case it ends
* up doing audit file I/O while releasing the process and
* its threads. It needs the lock held to prevent races
* to diddle the process's threads; normally it would
* hold the spinlock but it calls things which expect to
* diddle the spinlock themselves, so it relies on
* the table lock to prevent this race.
*/
/*
* If we're not purging everything, we better not purge
* ourselves.
*/
ASSERT(purge == MVFS_PROCPURGE_FLUSH || mpchase != myproc);
if (purge != MVFS_PROCPURGE_FLUSH || mpchase != myproc) {
/* procrele may take thread hash chain spinlock */
mvfs_procrele(mpchase, mythread); /* drops lock */
if (purge == MVFS_PROCPURGE_NOSLEEP) {
/* only conditionally reacquire the lock in this case: */
if (!CONDITIONAL_LOCK(&(mcdp->proc_thr.mvfs_proclock)))
return -1;
} else {
MVFS_LOCK(&(mcdp->proc_thr.mvfs_proclock));
}
unlocked = 1;
}
goto repurge; /* the hash chain may have changed */
}
}
/* must clean up completely if flushing */
ASSERT((purge != MVFS_PROCPURGE_FLUSH) || (*mpp == NULL));
return unlocked;
}
/*
* MVFS_PROCPURGE - call procrele on all proc structs for which the
* process is no longer running. This is called periodically under UNIX
* since the proc stuff doesn't get called at process exit.
* It is also called when the filesystem is about to be unloaded.
*/
void
mvfs_procpurge(slp)
int slp;
{
int i;
int rescan, relocked;
register mvfs_proc_t *mp;
SPL_T s;
mvfs_thread_t *mythread = mvfs_mythread(NULL);
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
switch (slp) {
case MVFS_PROCPURGE_SLEEP:
case MVFS_PROCPURGE_FLUSH:
MDB_XLOG((MDB_PROCOPS,"mvfs_procpurge\n"));
MVFS_LOCK(&(mcdp->proc_thr.mvfs_proclock));
break;
case MVFS_PROCPURGE_NOSLEEP:
if (!CONDITIONAL_LOCK(&(mcdp->proc_thr.mvfs_proclock))) return;
break;
}
rescan = 1;
while (rescan != 0) {
rescan = 0;
relocked = 0;
for (i=0, mp = mcdp->proc_thr.mvfs_procid_hashtable[0];
i < MVFS_PROCHASH_SZ;
mp = mcdp->proc_thr.mvfs_procid_hashtable[++i]) {
if (mp == (mvfs_proc_t *)NULL)
continue; /* none on this bucket */
/*
* we must drop the mvfs_proclock before calling purge,
* because when we purge a process we may end up pushing
* auditing information to a file, and that requires that
* we get our own thread info (in order to inhibit
* auditing while we write the auditing information). The
* result may be a new thread/proc which need to be added
* to the chains, which requires getting the locks. Now,
* if we're not sleeping, we just go on to the next chain
* pointer until we're done (not caring about new
* additions); if we are flushing we keep cycling until
* we've scanned the entire list without dropping the
* lock. */
relocked = mvfs_purgechain(i, slp, mythread);
if (relocked == -1)
return; /* couldn't get lock back. */
if (relocked == 1) switch (slp) {
case MVFS_PROCPURGE_FLUSH:
/*
* Mark flag so we run through the table again, in
* case someone slipped in.
*/
rescan = 1;
case MVFS_PROCPURGE_SLEEP:
case MVFS_PROCPURGE_NOSLEEP:
/* don't care that it unlocked/relocked */
break;
}
}
}
MVFS_UNLOCK(&(mcdp->proc_thr.mvfs_proclock));
}
/*
* MVFS_PROCPURGE_AFPS - run down all the proc and thread structures,
* flushing all the audit file pointers.
*
* We're called by the process which wishes to unload the module.
* This won't happen if any MVFS file systems are still mounted, so we
* can safely assume we're the only active process in this filesystem.
* (I think?)
*/
STATIC void
mvfs_procpurge_afps()
{
int i;
int rescan, relocked;
register mvfs_proc_t *mp, *mpchase;
register mvfs_thread_t *thrchase;
mvfs_thread_t *mythread;
extern LOCK_T mfs_unload_lock;
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
/*
* we must only be called by the unloading thread.
*/
ASSERT(ISLOCKEDBYME(&mfs_unload_lock));
mythread = mvfs_mythread(NULL); /* force an allocate now,
in case we need it */
MVFS_LOCK(&(mcdp->proc_thr.mvfs_proclock));
for (i=0, mp = mcdp->proc_thr.mvfs_procid_hashtable[0];
i < MVFS_PROCHASH_SZ;
mp = mcdp->proc_thr.mvfs_procid_hashtable[++i]) {
rescan_chain:
for (mpchase = mp;
mpchase; /* handles case of empty bucket */
mpchase = mpchase->mp_hashnxt)
{
if (mpchase->mp_afp != NULL) {
/* This call drops the proc lock. */
mvfs_afprele_proc(mpchase, mythread);
MVFS_LOCK(&(mcdp->proc_thr.mvfs_proclock));
goto rescan_chain;
}
for (thrchase = mpchase->mp_threads;
thrchase;
thrchase = thrchase->thr_next)
{
if (thrchase->thr_afp != NULL) {
mvfs_log(MFS_LOG_ERR,
"thread %"KS_FMT_PTR_T" still has audit ptr when "
"files unmounted?", thrchase);
MVFS_UNLOCK(&(mcdp->proc_thr.mvfs_proclock));
/*
* may do some blocking on writes, etc.
* these two lines are essentially mvfs_afprele_thr(),
* but for some other thread, not us.
* We're also depending on the statement above that we're
* the last thread running so it's OK to access somebody
* else's thread whithout a lock. Normally, we don't use
* locking to access thread data because the owning thread
* is the only one that is suppose to access it (see the
* big comment above).
*/
mvfs_afprele(thrchase->thr_afp, mythread);
thrchase->thr_afp = NULL;
MVFS_LOCK(&(mcdp->proc_thr.mvfs_proclock));
goto rescan_chain;
}
}
}
}
MVFS_UNLOCK(&(mcdp->proc_thr.mvfs_proclock));
}
/* MFS_PROCINHERIT - inherit/initialize proc info for current process */
#define MAXPARENT 1024
mvfs_proc_t *
mvfs_procinherit_from(mp)
mvfs_proc_t *mp;
{
int i = 0;
MVFS_PROCESS_T *parent, *nextparent;
mvfs_proc_t *pmp;
MVFS_PROCID_T procid, pprocid;
MVFS_PROCTAG_T proctag;
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
ASSERT(ISLOCKEDBYME(&(mcdp->proc_thr.mvfs_proclock))); /* caller holds lock while we work */
/* As long as there is a parent pid (non-zero) then continue
looking for a parent with state to inherit from. Limit
this loop to a max just for extra safety. */
parent = MDKI_PARENT_PROC(MDKI_CURPROC()); /* Start with parent (locked) */
MDKI_MYPROCID(&procid, (MVFS_THREADID_T *)NULL);
MDB_XLOG((MDB_PROCOPS,"inherit for %"MVFS_FMT_PROCID_T_D": parent %"KS_FMT_PTR_T"\n", MDKI_CURPID(), parent));
while (parent) {
if (++i >= MAXPARENT)
break; /* recursed too far, give up */
if (!MDKI_PRISACTIVE(parent)) {
MDB_XLOG((MDB_PROCOPS,"inherit for %d: parent dead\n",
MDKI_CURPID()));
break; /* end of the chain */
}
MDKI_PROCID(&pprocid,parent);
if (MDKI_PROCID_EQ(&procid, &pprocid)) {
/* do this early test, to avoid any case where the
* MDKI_PARENT_PROC() would try to acquire a lock held by
* the previous call to MDKI_PARENT_PROC() that hasn't
* been released yet by MDKI_PRUNLOCK().
*/
mvfs_log(MFS_LOG_DEBUG,
"procinherit: terminating search at self loop, 0x%"MVFS_FMT_PROCID_T_D"\n",
procid);
break; /* parent == current, give up */
}
procid = pprocid;
MDKI_PROCTAG(&proctag,parent);
pmp = mvfs_findproc(&pprocid, &proctag, NULL);
if (pmp) {
MDKI_PRUNLOCK(parent);
MDB_XLOG((MDB_PROCOPS,"procinherit %"KS_FMT_PTR_T" from %"KS_FMT_PTR_T" (%"MVFS_FMT_PROCID_T_D"/%x)\n",
mp, pmp, pmp->mp_procid, pmp->mp_proctag));
return pmp;
}
MDKI_PRUNLOCK(parent); /* done with parent */
nextparent = MDKI_PARENT_PROC(parent);
if (!nextparent) {
mvfs_log(MFS_LOG_DEBUG,
"procinherit: can't find parent proc for %"MVFS_FMT_PROCID_T_D"\n",
MDKI_PRPID(parent));
parent = NULL;
break;
}
if (parent == nextparent) {
/* top of chain, leave */
parent = NULL;
MDKI_PRUNLOCK(nextparent);
break;
}
parent = nextparent;
MDB_XLOG((MDB_PROCOPS,"inherit for %"MVFS_FMT_PROCID_T_D": nextparent "KS_FMT_PTR_T" (%"MVFS_FMT_PROCID_T_D", %d)\n",
MDKI_CURPID(), parent, MDKI_PRPID(parent), MDKI_PRSTATE(parent)));
}
if (parent) /* haven't unlocked yet: */
MDKI_PRUNLOCK(parent);
/* Print a message if we messed up and looked at too many parents */
if (i >= MAXPARENT) {
mvfs_log(MFS_LOG_DEBUG, "procinherit: pid %"MVFS_FMT_PROCID_T_D": too many parents!\n",
MDKI_CURPID());
}
/* No suitable parent state found, leave it as initialized by caller. */
mvfs_log(MFS_LOG_DEBUG, "procinherit: no inheritance: pid=%"MVFS_FMT_PROCID_T_D"\n",
MDKI_CURPID());
MDB_XLOG((MDB_PROCOPS, "procinherit: no inheritance: pid=%"MVFS_FMT_PROCID_T_D"\n",
MDKI_CURPID()));
return NULL;
}
STATIC void
mvfs_procinherit_copy(to, from)
mvfs_proc_t *to;
mvfs_proc_t *from;
{
mfs_auditfile_t *afp = NULL;
/* Found valid state to use */
MVFS_LOCK(&from->mp_lock);
to->mp_inherit = from->mp_inherit;
if (to->mp_afp) {
mfs_afphold(to->mp_afp);
afp = to->mp_afp;
}
MVFS_UNLOCK(&from->mp_lock);
if (afp) {
/* pulled out here 'cuz we can't print with spinlock held */
MDB_XLOG((MDB_AUDITF, "afphold: afp=%"KS_FMT_PTR_T" refcnt=%d pid=%"MVFS_FMT_PROCID_T_D"\n",
afp, afp->refcnt, MDKI_CURPID()));
}
MDB_XLOG((MDB_PROCOPS, "inherit: mp=0x%"KS_FMT_PTR_T", pid=%"MVFS_FMT_PROCID_T_D", from=%"KS_FMT_PTR_T"\n",
to, MDKI_CURPID(), from));
return;
}
void
mvfs_procinherit(mp)
mvfs_proc_t *mp;
{
register mvfs_proc_t *procfrom;
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
ASSERT(ISLOCKEDBYME(&(mcdp->proc_thr.mvfs_proclock))); /* caller holds lock while we work */
procfrom = MVFS_PROCINHERIT_FROM(mp);
if (procfrom) {
MVFS_PROCINHERIT_COPY(mp, procfrom);
}
}
/* MVFS_MYTHREAD - find/create valid thread info for this process.
* This routine always returns a valid thread info state (even if
* all reset) for this process.
*
* This routine MUST NOT DISTURB the u-area in any of
* its activities, since it is called from page-fault
* code (or its equivalent).
*/
mvfs_thread_t *
mvfs_mythread(MVFS_THREADID_T *threadid_in)
{
register mvfs_thread_t *mth;
mvfs_proc_t *mproc;
unsigned int hashindex;
MVFS_THREADID_T threadid;
SPL_T s;
MVFS_PROCTAG_T metag;
MVFS_PROCID_T mepid;
SPLOCK_T *lockp;
tbs_boolean_t foundrealthr;
MVFS_PROCTAG_T mthproctag;
MVFS_PROCID_T mthpid;
mvfs_proc_t *mthproc = NULL;
register mvfs_proc_t *mpchase, **mpp;
mvfs_common_data_t *mcdp = MDKI_COMMON_GET_DATAP();
/*
* We must not hold the mvfs_proclock upon calling this function.
* If we do, then we block allocating a thread structure; doing so
* while holding that lock may result in a deadlock if, for example,
* the current thread is a VM page push operation.
*/
ASSERT(NOTLOCKEDBYME(&(mcdp->proc_thr.mvfs_proclock)));
/*
* If no thread ID was passed as a paremeter, consider the current one.
*/
if (threadid_in == NULL) {
BZERO(&threadid,sizeof(MVFS_THREADID_T));
MDKI_MYTHREADID(&threadid);
} else {
threadid = *threadid_in;
}
MDKI_MYPROCID(&mepid, &threadid);
MDKI_MYPROCTAG(&metag, &mepid, &threadid);
hashindex = MDKI_THREADHASH(&threadid, mcdp);
THREADID_SPLOCK(hashindex, mcdp, &lockp, s);
for (mth = mcdp->proc_thr.mvfs_threadid_hashtable[hashindex];
mth;
mth = mth->thr_hashnxt) {
ASSERT(mth->thr_hashbucket == hashindex);
if (MDKI_THREADID_EQ(&threadid, &mth->thr_threadid)) {
foundrealthr = (MDKI_PROC_EQ(mth->thr_proc, &mepid, &metag));
mthproctag = mth->thr_proc->mp_proctag;
mthpid = mth->thr_proc->mp_procid;
THREADID_SPUNLOCK(hashindex, mcdp, &lockp, s);
if (foundrealthr)
return(mth);
mvfs_log(MFS_LOG_DEBUG,
"mvfs_mythread: changing allegiance, mthr %"KS_FMT_PTR_T" procid %"MVFS_FMT_PROCID_T_D" proctag %d\n",
mth, mepid, metag);
/*
* This thread has apparently changed allegiance and is
* now in a different process. Lets grab the
* mvfs_proclock and see if its mvfs_proc is on the hash queue.
* If it is, release the thread. If it isn't, the procpurge
* has cleaned up or is in the process of cleaning up,
* so just allocate a new one.
*/
MVFS_LOCK(&(mcdp->proc_thr.mvfs_proclock));
if ((mthproc = mvfs_findproc(&mthpid, &mthproctag, NULL))
!= NULL)
{
/*
* If the process is dead, we also want to release the
* proc, but we can't do it until we get our own new
* thread created. Use mthproc != NULL as an
* indicator for later.
*/
if (MVFS_PROCVALID(mthproc, mth))
mthproc = NULL;
mvfs_threadrele(mth);
}
MVFS_UNLOCK(&(mcdp->proc_thr.mvfs_proclock));
goto realloc; /* we don't hold spinlock */
}
}
/* none there yet, get a new one */
/*
* nobody else could be allocating a thread struct for us (by
* definition it's thread-specific), so we don't need to worry
* about racing ourselves for a thread. Therefore it's safe to
* drop and reacquire the hash table lock around this call (the
* lock just keeps the hash table pointers intact, and we don't
* care where in the hash chain we end up). There are other
* things that happen in the process part of allocating a new
* thread that are easier to not worry about if we don't hold this
* lock.
*/
THREADID_SPUNLOCK(hashindex, mcdp, &lockp, s);
realloc:
mth = mvfs_threadalloc(&threadid);
MDB_XLOG((MDB_PROCOPS,"pid %"MVFS_FMT_PROCID_T_D": alloc new thread %"KS_FMT_PTR_T"\n",
mth->thr_proc->mp_procid, mth));
#ifdef MVFS_DEBUG
MDKI_MYPROCID(&mepid, &threadid);
MDKI_MYPROCTAG(&metag, &mepid, &threadid);
if (!(MDKI_PROC_EQ(mth->thr_proc, &mepid, &metag)))
mvfs_log(MFS_LOG_WARN,
"mvfs_mythread: different procinfo mthr 0x%x procid 0x%x proctag 0x%x mprocid 0x%x mproctag 0x%x\n",
mth, mepid, metag, mth->thr_proc->mp_procid, mth->thr_proc->mp_proctag);
#endif
/* RATLC01308802: short lived mvfs_thread used for inactive during proc
* exit does not go onto hash chain. (Solaris only)
*/
if (!MDKI_IS_SOL_EXITPROC(mepid,mcdp)) {
/* attach new thread to hash chains */
THREADID_SPLOCK(hashindex, mcdp, &lockp, s);
mth->thr_hashbucket = hashindex;
mth->thr_hashnxt = mcdp->proc_thr.mvfs_threadid_hashtable[hashindex];
mcdp->proc_thr.mvfs_threadid_hashtable[hashindex] = mth;
THREADID_SPUNLOCK(hashindex, mcdp, &lockp, s);
}
if (mthproc != NULL) {
/* delayed flush of potentially dead process. Look it up again. */
MVFS_LOCK(&(mcdp->proc_thr.mvfs_proclock));
if ((mthproc = mvfs_findproc(&mthpid, &mthproctag, NULL)) != NULL &&
!MVFS_PROCVALID(mthproc, mth))
{
/*
* unhash it first, then release it. We don't use
* mvfs_purgechain() since we don't want to spend time now
* looking at other procs--we only care about the one we
* identified.
*/
hashindex = mvfs_pidhash(&mthpid);
mpp = &(mcdp->proc_thr.mvfs_procid_hashtable[hashindex]);
for (mpchase = *mpp;
mpchase;
mpp = &mpchase->mp_hashnxt, mpchase = mpchase->mp_hashnxt)
{
if (mpchase == mthproc) {
/* remove this proc from hash queue: */
*mpp = mpchase->mp_hashnxt;
mpchase->mp_hashnxt = 0;
break;
}
}
ASSERT(mpchase == mthproc);
mvfs_procrele(mthproc, mth); /* drops proclock */
mvfs_log(MFS_LOG_DEBUG,
"mvfs_mythread: dumped stale mvfs_proc"
" procid %"MVFS_FMT_PROCID_T_D" proctag %d\n",
mthpid, mthproctag);
} else {
/* we have to drop proclock */
MVFS_UNLOCK(&(mcdp->proc_thr.mvfs_proclock));
}
}
return mth;
}
/*
* Take a snapshot from the thread's related process.
*/
STATIC void
mvfs_snapshot_thread(mvfs_thread_t *thr)
{
register mvfs_proc_t *proc = thr->thr_proc;
ASSERT(proc != NULL); /* just in case */
mvfs_afprele_thr(thr); /* it checks for NULL */
MVFS_LOCK(&proc->mp_lock);
thr->thr_inherit = proc->mp_inherit;
if (thr->thr_afp != NULL)
mfs_afphold(thr->thr_afp);
MVFS_UNLOCK(&proc->mp_lock);
if (thr->thr_afp != NULL) {
/* pulled out here 'cuz we can't print with spinlock held */
MDB_XLOG((MDB_AUDITF, "afphold: afp=%"KS_FMT_PTR_T" refcnt=%d pid=%"MVFS_FMT_PROCID_T_D"\n",
thr->thr_afp, thr->thr_afp->refcnt,
MDKI_CURPID()));
}
MDB_XLOG((MDB_PROCOPS2,"snapshot: thr %"KS_FMT_PTR_T" to proc %"KS_FMT_PTR_T"\n",
thr, thr->thr_proc));
}
#ifdef MVFS_HAS_DELAYED_PROCESSING
/*
* This routine allocates and copy the data from the actual thread structure
* to the cloned one. Cloned thread will not be no the thread hash table and
* they will be freed as soon as mvfs_exit_fs() is called.
*/
mvfs_thread_t*
mvfs_clone_thread(mvfs_thread_t *actual_thread)
{
mvfs_thread_t *clone_thread = NULL;
ASSERT(actual_thread != NULL);
clone_thread = MVFS_THREAD_ALLOC();