From 7aa231e7ecdab4f4e74d7559ecaf5e7bcd1941d4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 19 Aug 2023 11:27:12 +0200 Subject: [PATCH 1/7] add first version of component layout which can help guide feature toggles The idea is to toggle entire components which may affect one or more crates. That way, users can tailor the experience according to their needs, including compile times. --- etc/gix-components.monopic | Bin 0 -> 7925 bytes etc/gix-components.txt | 3 +++ 2 files changed, 3 insertions(+) create mode 100644 etc/gix-components.monopic create mode 100644 etc/gix-components.txt diff --git a/etc/gix-components.monopic b/etc/gix-components.monopic new file mode 100644 index 0000000000000000000000000000000000000000..d81ae6d00c6bb7a89d7afe3d2b044b5fb07b606d GIT binary patch literal 7925 zcmV*)yRy9ys#dp8#DXR zz$~WwZMIR6ZQ9nhZFwcRx@!jo%-8IP?UyVsVo;KuHB6>L6ezuP(=IC&OeXUZaW4Ou zEUy16zge#)AOBHae#lgLwf^#wf12M;K59Rm+%9fDE%Up{$LbICWI9>Tf6pJQkF4|G z*Pm_{&+GhoJ^A?eU#9hMUKaD`vdzE$^7ZSq`k3+MpFhr@^G~<4)x)R!d3ODn7vFF< zdtBwy$>ZYY*XGWn>clh^9Mi{igUlMnOT+x(gTeDyH9UHtZ@(dXGy{wcqm zueW};`O}-l<6=4aNZ`EAl=4N>o7?zQH1T{}eqz0REk3|~VX6-oKa{_je1uJutM!-1 z;zulETh8z2MNpHX*AUBon%!4HmvI!o{;~M;E1ebp@UmFV*Ym}5*{@|cRb2$8{7ijC zlcFmh^4aaD=f!$f{%Q4vZy&|#b$K_t$%`IewYOgf-rUbSx4O#jpUQtKZuHwBtX{9{ zd*1xjFKx5MAb6U+Y=r~HPPl&;cXzA27!ngTRqb!*At!%k^iI`TgB%l zZIrtanli4N^}}-UdjG(Wb+%s5ZXU|}uWx@d?A$qxx+(a;psSwDyMxX+w|U~m`C$g# zn-F3Ko$;#opo6Z1&bTaQ&{??5b@g25f^!{Y%|wH0pt){8gs8a=;Rtgb>X>1NR1ToP zlYLs;=8vDs#d&iFq^bDYr&5amU%uOQZ^GRF>P-tbzuv$1`DL-Qm_6UWKF*e# zZ<&0w)5+bk9ILbEn}@}6t4~Rl!(|gq^_`!tm-W}~{IcA6l)v12joyxzy%*@0rd^8=KoW^2>?@^ygRxAZ*ubl_Zr>lgXQX-j#+Ph zxp@VD$X7SZ`E||#-7J>RR$#mfiaH9lD(_tEa9uQ*_-a+pmFE&HR&P zW#iv$qm1)Y^Yz7Nv+agpYe3vR&hCr*)|4( zcJZY62GiWU#=ec{twv1n)vR{!X|JZvWL_WboY*^$nDy>f*K{iyxk(9K7J6hh`H}gU z&75)e#5QxrW~1E9>47$L_w(OBte3OTc`<-GZ|IDp?F}8MV!ol{Z|Lf=PzR*=O7j)) z6`t3I&N!2}q4Q2|X;e`&kR9eYN#PmDT#l26oPp*~v*ql=?fmYp`}|{f50u7EQ4cWx z7)~%6&sRJrS)P;ZnH?yNO+8RTM(O)!a1=d@LDyM~P-y21QVpx!hO@v%vgnYyE1KP? zGC~2_egS1#CLRQ#8?BEDT~|CXaAJFIAT!1luQ-nlv*P`%Fw|F;uiT_fdo@?tc`bTibZQpWqu8z+NoaCIYvh9Va&3c_+Z+H2wrwbo9D=GW z+BG?(-%AdWwqk)3+Cbn`RZNotueTDNm)Hu>a8gSw=*ywEDpVzcD%YD7xPegP z;Y|oJ%Mqy~P#s7`Q5{IZ=w7n#7teR|``2YPEdKNP-^$tL zruy9v40%QXqnx0Z`RDnnjGGa5<6MYdmW%bGh^M(vB9K|hsiu6qcPz_PaK_;PZwrzZ`QX)lhbkLm|oC@72 z;Z((V9d6^jDo*jQtIkUtx*a5yE|xeLK=c8b4~WbMM2CDue063>Ds-txD(!@{6TI9a zq2ZF(tMYmn%ZkQ$IlIp{iG;=NH6svE*#l_ESfOFpyD|t{ zctEgnE4?SN;~-KM_hY_Z7ki_-qPMD#6eH{DyNCSoZoPQUzk6CdFK(B!-@dz>-xq)R zq5QM&%AQ@$ufMxp6oX+ZH=wseYh>H%ul)8{p8fRwP4V2mbTV9Q`#q!J55A)ScF*Pi|Y}TdQ z&Ch>~sJf|r(MOjJP<2yWVN+aTVZ_d1h6&)D$;ma=T@tLKVF-&6`RYf__<1 zLp?tyB{FIJ_=F&T-iVQ1eZP2mniu!WHC`1gsOZ!Yp`8#FLmR1vQOQ*ew34FI2;pQA zr5ZL;4LcBfewc+&!|0PYjd)Jh2kILnlwbz6(1wO~=cXV-$$QvxPfKJuc?8d7qpC=hI0iKPdkM7AqU$CHxzuMC1N%$Y-__W$&_xc=}%l*TsU&Njlk zVKlapm9>$TwTU@?#MDEAGt<~6O58_CE>QN2l0-PIh==bQ6$bq=L9xQ08YoHhCWI)X zLUn{B5vZsnk&8mDYkz8}b?rxqT1ObDp8a$F^l~+OzAg2Xe!ZJN=BrILdtLlP`SaBu z{_mgv+j&LQeiza1+#nV0dX&^xFlm*eTi8=`b!Lcm?UNDhS=Q?ishyM!n&}x3W`d&J zQ7Ovo)JXa0@!}x%-YomU_9qAZwpjkUUgmkl66$UwhJGhkY1}XzA)Y;vXU|f4zJ(J` zxq5nDxk{hR%-3=gQ7yXFc_OlKYPAwGO8BBPgVsv!^4a=znMV*6?JgHzyI~3@h9?}u z6Al}UAsdX9#-nFeFiGa(_Yh9+z8exWsUDtw6ogFDU$o9MsZX=v#6E37nWyzvs zA`QBeD|V|lUH4xo$KZGc{WrgCArdxU)bKplD=Ucl4 zQ&{G+r`A{xrC8o5mle*i-Spr!zu87)+*&O0gVu zuyAaBvQ85kp{RT6+8QSnkPn$I7?+&KGlHdQ%7h4v^ zymLCU#{daMnbNu_I)-+qrY5bexl{uWTn%jUtq}#m9WsVDr3l>d>wzp(rCF$|PL8%G z+s0k=%<6&3qR~-UJ*=F7PAXlA*(F_h@N^}+M|4bBR@x`+ECIvhD_M{RS&#-$#_(3P~xvK$K>cQKxolrtETR zKJ=-%{IZw)raOB@?J5`)!Mg!Y&Gk^Iuk57t2x?i8&DEKmnvF`gT9R9sZFWf)*XiP5 ziAMWu(z*~0lP=y5Aue4E$4D1PK}>6nh|UWioSZ_j_Lqh4zzg5aGB3JdJuh1BzWBk8 zPB9=&?EuN3dN|ZqtWyk3=LDv6!nx@b10}06N_lW)LZ~>fD8-!YiTMUO96G3eF_G$N zF7HELKxK*{nLP@lBt8@?;c|Zcy3SiaMFhE=i5Xun{w=@Dm-+JziwGAnvsHUd^SJ1p zXTcq`Wzk|gklgv35TbU__6Tz4+D6HpqoxBW4>%Y#(~Buy7~98krkzp1zyn(#8ci0CYGmM zDI!Vpl+i`ue9=lVgrpj-r9elw-mW*c?JV{t32?5pK%-|X^dU|Wx@15%S}D_XQiA9Y zsi38d?MTaURR{&!HbHynbYo3&eO;Dm$9y;o#%PlF$VQFxpwSd-`8;*m5mkIA9ppP{ znzZEMVfaq_Aw=<=Dvdg6zLU05`=B1O8=akXNv`e8ONlaq6j57F-=t=bUZxHTnkJSD zRI^kKltp|KLd?`b*%7jcl*O4Q(j|3WjyfMqT^lCq85Rd*vlh)}E!qsk-rWw%Z3P&e z;_fdz4xLln>|>)%lmt0N!9}o=Nu!sM69wmkd(NfeuDFZ4navV!=QbG4{vFa=e zrQ{}^IumxuqEK+{C(oQ5KafM#n-Jo%C`WL}BA>;r{g+W!jk;hO#i-_y8^ut0#ZY-Y z5U*w{EUmTzj4leGeDcI-W)WzJ8l71PGOa7|`84obb_jbGgrI-^mLgaWA^! zNw+vWz^(^F>iMYCMb1HJjV6+Vc9QK0ME7=t`-Z2*x&_o~q3p7$RjEeRb*%$1BP1{9 z^Qs~gJf9X^B4iFB%4TZew0FHhRJu2aoGP1DVyS5*C{w58 zoOr<~snUg#b5zC}m^eO8{PN~u{yA^O52Y}pQ8vj~iA9~#u@y2JC3Fb}f-e2X>+0d^ z`-kEI4lR@L~X*`1;OwUx-0 zLo-O(FnirktVT=m;819_A_GC=HJhB1aG}O2-RF)nkn@Mt*#%Nq6?k2^z>8czrJ;(s zdm%)XaMck!b&QJg)G;bl@S!ZZ;A22YrOc32!&16x#tZ7BMHw`cx2LA??A1l+@}O0# zDy`df<6m0$?uk1aHGsIYZ_qKs_DAHI3+I8a&dltj)Z|Iq_}f}5VN@IcpI)zvsqOW# z-S)dE0+%K_wcig7d8vJWz`E>Quda<0{=bHll;`TbCfKR)&^%jRI;3F67 zk&E?g4W-!{O4FIy)@w?&t@rA}O|A}IL2j~UZn7TA$8bS3b@317&sTrADt|1O z6gE%*E%lzDRKrG&ln-=4t5)aU;K7$1I}EykWVhag5XE73*%3I*&d0GtLly}o_awD` z-b3QQGZ(`I1FwXg5@~Tl97?RU%c}?bQa-riNviJ%ym7kyulX0@Hy@wh{yzEmVY};r zzP3j`F~E(HGa0UN&z&mHy>yupx$IU2IEQ+Cj=r zN9)jrBui#v_$W4k)+a}TSK}nX`_xqt6Uq~Hk7l7D ztOR+G zV(f7-ju=Rl8nje#q%W&YOS!wDRJpX@o$3){v&)6zWww6Uu`L57SL7tQw#_*iuNSLx zSn3}pL3D>^BnR--KoT!+LWn7vfj@%8OR3EmW5TY2blC$)GKzVM0tAI5U2@cZoO9;9 zczu6wS?nM-sbTG2cd?Ui`~EJUtzVZ_eyf|MJRwT)<9iDKfu?P*RfqkA?y`QLGb)sl zE*X2Ndv%Z!FuwT9*Fc!y>P|Le6bdsm9rH5XP&SEMVZl~_(J&b!FRNa@icN%Z@@`V{ z+Gd0**z>2^a`xeNOOw%fIZ1Vsg#^)zPqunDPR^0t7$xYzjp_i&YaQ%bm8C{#m7oQ= z^A|c{<;@+W69SjxYSY01=!E+rM9~Q=c~faRA-I@D!oy|IRSAmVpi_*5QUmQ)-h>b} z=+qHRowdTaokP`;|I}K0N$R1WARDH z;**TRhZ=={M(i|~B^C#D?a?UX3#Dx)=O;sgZFg`V{g<0$t>gj(dr68^Nf8}Rif{tN za`wUUDc{miNfEsjLK|RgQpCC=Bt@*5w3ZxE1(D0>EJr5>ofZhWYjL1c)|(Kb#vdNx zltt*`xND{#lEK>pO!g)WQcupUYZL>i_vvd(X7BB<*YpN0+-z|gP4LbCn}k90J~VFw zzYF{-a8|HoXtJ1NbHV9q=CAO3#;+L*=rk<@L$VkS#Yqm%ad3*G&2TVwiWPYnBh3(L zw$N%WnwF8(z*r27Vb_ehX3#a)Z@H?>hZjClaS!uprad%pNtK0xC(K)DAPMYdJz-TX zhV^kGv#Enk%{kD+qDX}N}`MeMxa?liu+p{{<#%vvYKtd*NBK?$;U zrTjF3`(|ZlR=LQsWOA$Av_>REJR&O?wD&=HXYyQZbS8&)Lcz8LCECoM{$ghwu9P68 z_WA&sOdEn<)d_;6f*{(ymg#!xTHHN~71j_!Zc7Ns)@SH@6Tk=dMAQ7@KX zu6~%WZi??*e%UBS-fn(wWg?|q@2X)xdweN&gZX+Z+p)_3`_uF5({;YA+OyR^QT5*< z5?hWO4yNqVD(kVc{;SC^3}zqS!~1FVaP{+Y_PlyoEZ41*?)c!#Kjf>M<@}o6g273V zeKV;c9)#9rl#KNNEr|7yYujA-B{euR2&Di{eL+p%7=SanA3_vow93y(b4HzxgGgWy z8d#zmA+ zD&eW&wg}usKK#JAt<5H~v-dLYAh%pLh)60i?t>WkL5%z$hJFy98FGs-6}e?}KUpw? zTCi#n%F`Hvbn3d6D~V+NI@1OvluWu@He3=s<*C316FinBn5`OUxBn)Dn4P^Ca4> zJxf()Hy8e9U35_4qC<;)wnG&i_Ckm%I@lwuNnoSaB)kl|Y9pk=8zBaZ52Zu$CWN?+ z5FKM96s1FA&xs6NZZI$~>~xR~49u$l8fe(P2_b5(qazGEpr~P2qeDBV{>(7oaSEsY zXq4!2mrW}sRl*jWrXQd`e!S?VbvgXHCje;&LavpbR@miZX8^aDR$LBixmGRNip~s3 zPLzry$4i2!maDMAvN2N=35F6**$*MA^jl$frAxnH;^Zo%xMbc5^1b>AC;-V3{Xo5ln921ZC_!>FNEs%A+$=zpNwMid+J$73m7;xL|6|<3`S^Yev8dXSJvrjD$u82Z zhlH*C)KreTd;6MQ;a$_)vtjbHVfM2*H~O>5(uG?=Y@4rb2#%2@ zlOQ!ok(&UdoU;0H0Z{e)oRp^sS`st@jglL;rv|kI<1TB~0oSaS(%W5Z-0naviwSOO z)Bs}R4B-cc@B`~g0Sil&3ODB^Hg1zu^1R#x1RB$U`GALL#00s0l|4#4g9s55TNMy{6%d;h z5W5wSJ~KiWK`Nt*s2}s0f^w?|z%Y;(q{Jflf&Wk~H_5I@MX}*pCc+_-UUQw){RvEfu13cKO#IRS1VY3qB z8T8@JnE^b!OXUb(@-k&!#su>+#(~DWZnbf?uHpdzk&Tl@ literal 0 HcmV?d00001 diff --git a/etc/gix-components.txt b/etc/gix-components.txt new file mode 100644 index 00000000000..6a107dc7bf3 --- /dev/null +++ b/etc/gix-components.txt @@ -0,0 +1,3 @@ +╔════════════════╗ +║ gix-submodule ║ +╚════════════════╝ \ No newline at end of file From 7766cf91d28fe9a602048bd15a49632752173604 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 19 Aug 2023 14:44:02 +0200 Subject: [PATCH 2/7] feat: make blob-diffing a feature so it can be turned off by adding the `blob` feature (enabled by default) --- Cargo.lock | 1 + gix-diff/Cargo.toml | 12 +++++++++++- gix-diff/src/lib.rs | 7 +++++++ justfile | 1 + 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index c55c9e62035..2da15bd6faa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1497,6 +1497,7 @@ dependencies = [ name = "gix-diff" version = "0.34.0" dependencies = [ + "document-features", "getrandom", "gix-hash 0.11.4", "gix-object 0.34.0", diff --git a/gix-diff/Cargo.toml b/gix-diff/Cargo.toml index 43fbc0123c1..04cfeb9906c 100644 --- a/gix-diff/Cargo.toml +++ b/gix-diff/Cargo.toml @@ -11,6 +11,9 @@ rust-version = "1.65" autotests = false [features] +default = ["blob"] +## Enable diffing of blobs using imara-diff. +blob = ["dep:imara-diff"] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"] ## Make it possible to compile to the `wasm32-unknown-unknown` target. @@ -23,6 +26,13 @@ doctest = false gix-hash = { version = "^0.11.4", path = "../gix-hash" } gix-object = { version = "^0.34.0", path = "../gix-object" } thiserror = "1.0.32" -imara-diff = "0.1.3" +imara-diff = { version = "0.1.3", optional = true } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} getrandom = { version = "0.2.8", optional = true, default-features = false, features = ["js"] } + +document-features = { version = "0.2.0", optional = true } + +[package.metadata.docs.rs] +all-features = true +features = ["document-features"] +rustdoc-args = ["--cfg", "docsrs"] diff --git a/gix-diff/src/lib.rs b/gix-diff/src/lib.rs index a60f7bc04ee..6d94a75919f 100644 --- a/gix-diff/src/lib.rs +++ b/gix-diff/src/lib.rs @@ -1,4 +1,10 @@ //! Algorithms for diffing various git object types and for generating patches, highly optimized for performance. +//! ## Feature Flags +#![cfg_attr( +feature = "document-features", +cfg_attr(doc, doc = ::document_features::document_features!()) +)] +#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms)] #![forbid(unsafe_code)] @@ -6,4 +12,5 @@ pub mod tree; /// +#[cfg(feature = "blob")] pub mod blob; diff --git a/justfile b/justfile index 0421dc4c995..9818ba80ed7 100755 --- a/justfile +++ b/justfile @@ -99,6 +99,7 @@ check: cargo check -p gix-config-value cargo check -p gix-config --all-features cargo check -p gix-config + cargo check -p gix-diff --no-default-features cargo check -p gix-transport cargo check -p gix-transport --features blocking-client cargo check -p gix-transport --features async-client From be9af327c75d693658a2427ee9a711e631a8da7d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 19 Aug 2023 11:30:56 +0200 Subject: [PATCH 3/7] Create new `gix-status` crate to capture `git-status` like functionality This includes the various diffs git can do between different representations of the repository state, like * index and working tree * index and tree * find untracked files * check if the working tree is dirty (quickly) --- Cargo.lock | 16 + Cargo.toml | 1 + README.md | 1 + crate-status.md | 21 +- {gix-worktree => gix-status}/CHANGELOG.md | 0 gix-status/Cargo.toml | 37 ++ gix-status/LICENSE-APACHE | 1 + gix-status/LICENSE-MIT | 1 + gix-status/src/index_as_worktree/content.rs | 80 +++++ gix-status/src/index_as_worktree/function.rs | 331 ++++++++++++++++++ gix-status/src/index_as_worktree/mod.rs | 11 + gix-status/src/index_as_worktree/recorder.rs | 27 ++ gix-status/src/index_as_worktree/types.rs | 69 ++++ gix-status/src/lib.rs | 15 + gix-status/src/read.rs | 63 ++++ .../fixtures/generated-archives/.gitignore | 2 + .../generated-archives/racy_git.tar.xz | 3 + .../generated-archives/status_conflict.tar.xz | 3 + .../status_intent_to_add.tar.xz | 3 + .../generated-archives/status_removed.tar.xz | 3 + gix-status/tests/fixtures/racy_git.sh | 12 + gix-status/tests/fixtures/status_changed.sh | 26 ++ gix-status/tests/fixtures/status_conflict.sh | 18 + .../tests/fixtures/status_intent_to_add.sh | 9 + gix-status/tests/fixtures/status_removed.sh | 18 + gix-status/tests/fixtures/status_unchanged.sh | 20 ++ gix-status/tests/status-multi-threaded.rs | 4 + gix-status/tests/status-single-threaded.rs | 4 + gix-status/tests/status/index_as_worktree.rs | 226 ++++++++++++ gix-status/tests/status/mod.rs | 7 + justfile | 4 + 31 files changed, 1028 insertions(+), 8 deletions(-) rename {gix-worktree => gix-status}/CHANGELOG.md (100%) create mode 100644 gix-status/Cargo.toml create mode 120000 gix-status/LICENSE-APACHE create mode 120000 gix-status/LICENSE-MIT create mode 100644 gix-status/src/index_as_worktree/content.rs create mode 100644 gix-status/src/index_as_worktree/function.rs create mode 100644 gix-status/src/index_as_worktree/mod.rs create mode 100644 gix-status/src/index_as_worktree/recorder.rs create mode 100644 gix-status/src/index_as_worktree/types.rs create mode 100644 gix-status/src/lib.rs create mode 100644 gix-status/src/read.rs create mode 100644 gix-status/tests/fixtures/generated-archives/.gitignore create mode 100644 gix-status/tests/fixtures/generated-archives/racy_git.tar.xz create mode 100644 gix-status/tests/fixtures/generated-archives/status_conflict.tar.xz create mode 100644 gix-status/tests/fixtures/generated-archives/status_intent_to_add.tar.xz create mode 100644 gix-status/tests/fixtures/generated-archives/status_removed.tar.xz create mode 100755 gix-status/tests/fixtures/racy_git.sh create mode 100755 gix-status/tests/fixtures/status_changed.sh create mode 100755 gix-status/tests/fixtures/status_conflict.sh create mode 100755 gix-status/tests/fixtures/status_intent_to_add.sh create mode 100755 gix-status/tests/fixtures/status_removed.sh create mode 100755 gix-status/tests/fixtures/status_unchanged.sh create mode 100644 gix-status/tests/status-multi-threaded.rs create mode 100644 gix-status/tests/status-single-threaded.rs create mode 100644 gix-status/tests/status/index_as_worktree.rs create mode 100644 gix-status/tests/status/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 2da15bd6faa..8194daa534e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2216,6 +2216,22 @@ dependencies = [ name = "gix-sequencer" version = "0.0.0" +[[package]] +name = "gix-status" +version = "0.1.0" +dependencies = [ + "bstr", + "filetime", + "gix-features 0.32.1", + "gix-fs 0.4.1", + "gix-hash 0.11.4", + "gix-index 0.22.0", + "gix-object 0.34.0", + "gix-path 0.8.4", + "gix-testtools", + "thiserror", +] + [[package]] name = "gix-submodule" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 9c4ab23793a..9efc81cdd41 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -235,6 +235,7 @@ members = [ "gix-index", "gix-bitmap", "gix-worktree", + "gix-status", "gix-revision", "gix-packetline", "gix-packetline-blocking", diff --git a/README.md b/README.md index 7b969ad4374..82ba97ec99b 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,7 @@ is usable to some extent. * [gix-worktree-stream](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-worktree-stream) * [gix-archive](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-archive) * [gix-submodule](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-submodule) + * [gix-status](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-status) * `gitoxide-core` * **very early** _(possibly without any documentation and many rough edges)_ * [gix-date](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-date) diff --git a/crate-status.md b/crate-status.md index 89ce8574805..94288f6ea66 100644 --- a/crate-status.md +++ b/crate-status.md @@ -453,6 +453,12 @@ Make it the best-performing implementation and the most convenient one. ### gix-glob * [x] parse pattern * [x] a type for pattern matching of paths and non-paths, optionally case-insensitively. + +### gix-status +* [x] differences between index and worktree to turn index into worktree +* [ ] differences between tree and index to turn tree into index +* [ ] untracked files +* [ ] fast answer to 'is it dirty'. ### gix-worktree * handle the working **tree/checkout** @@ -463,18 +469,17 @@ Make it the best-performing implementation and the most convenient one. - [ ] handle sparse index - [x] linear scaling with multi-threading up to IO saturation - supported attributes to affect working tree and index contents - - [ ] eol - - [ ] working-tree-encoding + - [x] eol + - [x] working-tree-encoding - …more - **filtering** - - [ ] `text` - - [ ] `ident` - - [ ] filter processes - - [ ] single-invocation clean/smudge filters -* manage multiple worktrees + - [x] `text` + - [x] `ident` + - [x] filter processes + - [x] single-invocation clean/smudge filters * access to per-path information, like `.gitignore` and `.gitattributes` in a manner well suited for efficient lookups * [x] _exclude_ information - * [ ] attributes + * [x] attributes ### gix-revision * [x] `describe()` (similar to `git name-rev`) diff --git a/gix-worktree/CHANGELOG.md b/gix-status/CHANGELOG.md similarity index 100% rename from gix-worktree/CHANGELOG.md rename to gix-status/CHANGELOG.md diff --git a/gix-status/Cargo.toml b/gix-status/Cargo.toml new file mode 100644 index 00000000000..b89dd3fa7fc --- /dev/null +++ b/gix-status/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "gix-status" +version = "0.1.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT OR Apache-2.0" +description = "A crate of the gitoxide project dealing with 'git status'-like functionality" +authors = ["Sebastian Thiel ", "Pascal Kuthe "] +edition = "2021" +include = ["src/**/*", "LICENSE-*", "CHANGELOG.md"] +rust-version = "1.65" + +[lib] +doctest = false + +[[test]] +name = "multi-threaded" +path = "tests/status-multi-threaded.rs" +required-features = ["internal-testing-gix-features-parallel"] + +[features] +internal-testing-gix-features-parallel = ["gix-features/parallel"] + +[dependencies] +gix-index = { version = "^0.22.0", path = "../gix-index" } +gix-fs = { version = "^0.4.1", path = "../gix-fs" } +gix-hash = { version = "^0.11.4", path = "../gix-hash" } +gix-object = { version = "^0.34.0", path = "../gix-object" } +gix-path = { version = "^0.8.4", path = "../gix-path" } +gix-features = { version = "^0.32.1", path = "../gix-features" } + +thiserror = "1.0.26" +filetime = "0.2.15" +bstr = { version = "1.3.0", default-features = false } + +[dev-dependencies] +gix-testtools = { path = "../tests/tools" } + diff --git a/gix-status/LICENSE-APACHE b/gix-status/LICENSE-APACHE new file mode 120000 index 00000000000..965b606f331 --- /dev/null +++ b/gix-status/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/gix-status/LICENSE-MIT b/gix-status/LICENSE-MIT new file mode 120000 index 00000000000..76219eb72e8 --- /dev/null +++ b/gix-status/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/gix-status/src/index_as_worktree/content.rs b/gix-status/src/index_as_worktree/content.rs new file mode 100644 index 00000000000..aa775821a7a --- /dev/null +++ b/gix-status/src/index_as_worktree/content.rs @@ -0,0 +1,80 @@ +use gix_hash::ObjectId; +use gix_index as index; +use index::Entry; + +/// Compares the content of two blobs in some way. +pub trait CompareBlobs { + /// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()]. + type Output; + + /// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size` + /// and allow reading its bytes using `worktree_blob`. + /// If this function returns `None` the `entry` and the `worktree_blob` are assumed to be identical. + /// Use `entry_blob` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself. + fn compare_blobs<'a, E>( + &mut self, + entry: &'a gix_index::Entry, + worktree_blob_size: usize, + worktree_blob: impl ReadDataOnce<'a, E>, + entry_blob: impl ReadDataOnce<'a, E>, + ) -> Result, E>; +} + +/// Lazy borrowed access to blob data. +pub trait ReadDataOnce<'a, E> { + /// Returns the contents of this blob. + /// + /// This potentially performs IO and other expensive operations + /// and should only be called when necessary. + fn read_data(self) -> Result<&'a [u8], E>; +} + +/// Compares to blobs by comparing their size and oid, and only looks at the file if +/// the size matches, therefore it's very fast. +#[derive(Clone)] +pub struct FastEq; + +impl CompareBlobs for FastEq { + type Output = (); + + fn compare_blobs<'a, E>( + &mut self, + entry: &'a Entry, + worktree_blob_size: usize, + worktree_blob: impl ReadDataOnce<'a, E>, + _entry_blob: impl ReadDataOnce<'a, E>, + ) -> Result, E> { + // make sure to account for racily smudged entries here so that they don't always keep + // showing up as modified even after their contents have changed again, to a potentially + // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs. + if entry.stat.size as usize != worktree_blob_size && (entry.id.is_empty_blob() || entry.stat.size != 0) { + return Ok(Some(())); + } + let blob = worktree_blob.read_data()?; + let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); + Ok((entry.id != file_hash).then_some(())) + } +} + +/// Compares files to blobs by *always* comparing their hashes. +/// +/// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and +/// therefore always returns an OID that can be reused later. +#[derive(Clone)] +pub struct HashEq; + +impl CompareBlobs for HashEq { + type Output = ObjectId; + + fn compare_blobs<'a, E>( + &mut self, + entry: &'a Entry, + _worktree_blob_size: usize, + worktree_blob: impl ReadDataOnce<'a, E>, + _entry_blob: impl ReadDataOnce<'a, E>, + ) -> Result, E> { + let blob = worktree_blob.read_data()?; + let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); + Ok((entry.id != file_hash).then_some(file_hash)) + } +} diff --git a/gix-status/src/index_as_worktree/function.rs b/gix-status/src/index_as_worktree/function.rs new file mode 100644 index 00000000000..be2572013c6 --- /dev/null +++ b/gix-status/src/index_as_worktree/function.rs @@ -0,0 +1,331 @@ +use std::{io, marker::PhantomData, path::Path}; + +use bstr::BStr; +use filetime::FileTime; +use gix_features::parallel::{in_parallel_if, Reduce}; + +use crate::{ + index_as_worktree::{ + content, + content::CompareBlobs, + types::{Error, Options}, + Change, VisitEntry, + }, + read, +}; + +/// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them +/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes. +/// `options` are used to configure the operation. +/// +/// Note that `index` is updated with the latest seen stat information from the worktree, and its timestamp is adjusted to +/// the current time for which it will be considered fresh. +/// +/// Note that this isn't technically quite what this function does as this also provides some additional information, +/// like whether a file has conflicts, and files that were added with `git add` are shown as a special +/// changes despite not technically requiring a change to the index since `git add` already added the file to the index. +pub fn index_as_worktree<'index, T, Find, E>( + index: &'index mut gix_index::State, + worktree: &Path, + collector: &mut impl VisitEntry<'index, ContentChange = T>, + compare: impl CompareBlobs + Send + Clone, + find: Find, + options: Options, +) -> Result<(), Error> +where + T: Send, + E: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, +{ + // the order is absolutely critical here we use the old timestamp to detect racy index entries + // (modified at or after the last index update) during the index update we then set those + // entries size to 0 (see below) to ensure they keep showing up as racy and reset the timestamp. + let timestamp = index.timestamp(); + index.set_timestamp(FileTime::now()); + let (chunk_size, thread_limit, _) = gix_features::parallel::optimize_chunk_size_and_thread_limit( + 100, + index.entries().len().into(), + options.thread_limit, + None, + ); + let (entries, path_backing) = index.entries_mut_and_pathbacking(); + in_parallel_if( + || true, // TODO: heuristic: when is parallelization not worth it? + entries.chunks_mut(chunk_size), + thread_limit, + { + let options = &options; + move |_| { + ( + State { + buf: Vec::new(), + odb_buf: Vec::new(), + timestamp, + path_backing, + worktree, + options, + }, + compare.clone(), + find.clone(), + ) + } + }, + |entries, (state, diff, find)| { + entries + .iter_mut() + .filter_map(|entry| state.process(entry, diff, find)) + .collect() + }, + ReduceChange { + collector, + phantom: PhantomData, + }, + ) +} + +struct State<'a, 'b> { + buf: Vec, + odb_buf: Vec, + timestamp: FileTime, + // path_cache: fs::Cache TODO path cache + path_backing: &'b [u8], + worktree: &'a Path, + options: &'a Options, +} + +type StatusResult<'index, T> = Result<(&'index gix_index::Entry, &'index BStr, Option>, bool), Error>; + +impl<'index> State<'_, 'index> { + fn process( + &mut self, + entry: &'index mut gix_index::Entry, + diff: &mut impl CompareBlobs, + find: &mut Find, + ) -> Option> + where + E: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, + { + let conflict = match entry.stage() { + 0 => false, + 1 => true, + _ => return None, + }; + if entry.flags.intersects( + gix_index::entry::Flags::UPTODATE + | gix_index::entry::Flags::SKIP_WORKTREE + | gix_index::entry::Flags::ASSUME_VALID + | gix_index::entry::Flags::FSMONITOR_VALID, + ) { + return None; + } + let path = entry.path_in(self.path_backing); + let status = self.compute_status(&mut *entry, path, diff, find); + Some(status.map(move |status| (&*entry, path, status, conflict))) + } + + /// # On how racy-git is handled here + /// + /// Basically the racy detection is a safety mechanism that ensures we can always just compare the stat + /// information between index and worktree and if they match we don't need to look at the content. + /// This usually just works but if a file updates quickly we could run into the following situation: + /// + /// * save file version `A` from disk into worktree (git add) + /// * file is changed so fast that the mtime doesn't change - *we only looks at seconds by default* + /// * file contents change but file-size stays the same, so `"foo" -> "bar"` has the same size but different content + /// + /// Now both `mtime` and `size`, and all other stat information, is the same but the file has actually changed. + /// This case is called *racily clean*. *The file should show up as changed but due to a data race it doesn't.* + /// This is the racy git problem. + /// + /// To solve this we do the following trick: Whenever we modify the index, which includes `git status`, we save the + /// current timestamp before the modification starts. This timestamp fundamentally represents a checkpoint of sorts. + /// We "promise" ourselves that after the modification finishes all entries modified before this timestamp have the + /// racy git problem resolved. + /// + /// So now when we modify the index we must resolve the racy git problem somehow. To do that we only need to look at + /// unchanged entries. Changed entries are not interesting since they are already showing up as changed anyway so there + /// isn't really a race-condition to worry about. This also explains why removing the `return` here doesn't have an apparent effect. + /// This entire branch here is just the optimization of "don't even look at index entries where the stat hasn't changed". + /// If we don't have this optimization the result shouldn't change, our status implementation will just be super slow :D + + /// We calculate whether this change is `racy_clean`, so if the last `timestamp` is before or the same as the `mtime` of the entry + /// which is what `new_stat.is_racy(..)` does in the branch, and only if we are sure that there is no race condition + /// do we `return` early. Since we don't `return` early we just do a full content comparison below, + /// which always yields the correct result, there is no race condition there. + /// + /// If a file showed up as racily clean and didn't change then we don't need to do anything. After this status check is + /// complete and the file won't show up as racily clean anymore, since it's mtime is now before the new timestamp. + /// However if the file did actually change then we really ran into one of those rare race conditions in that case we, + /// and git does the same, set the size of the file in the index to 0. This will always make the file show up as changed. + /// This adds the need to treat all files of size 0 in the index as changed. This is not quite right of course because 0 sized files + /// could be entirely valid and unchanged. Therefore this only applies if the oid doesn't match the oid of an empty file, + /// which is a constant. + /// + /// Adapted from [here](https://github.com/Byron/gitoxide/pull/805#discussion_r1164676777). + fn compute_status( + &mut self, + entry: &mut gix_index::Entry, + git_path: &BStr, + diff: &mut impl CompareBlobs, + find: &mut Find, + ) -> Result>, Error> + where + E: std::error::Error + Send + Sync + 'static, + Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, + { + // TODO fs cache + let worktree_path = gix_path::try_from_bstr(git_path).map_err(|_| Error::IllformedUtf8)?; + let worktree_path = self.worktree.join(worktree_path); + let metadata = match worktree_path.symlink_metadata() { + // TODO: check if any parent directory is a symlink + // we need to use fs::Cache for that + Ok(metadata) if metadata.is_dir() => { + // index entries are normally only for files/symlinks + // if a file turned into a directory it was removed + // the only exception here are submodules which are + // part of the index despite being directories + // + // TODO: submodules: + // if entry.mode.contains(Mode::COMMIT) && + // resolve_gitlink_ref(ce->name, "HEAD", &sub)) + return Ok(Some(Change::Removed)); + } + Ok(metadata) => metadata, + Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), + Err(err) => { + return Err(err.into()); + } + }; + if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { + return Ok(Some(Change::IntentToAdd)); + } + let new_stat = gix_index::entry::Stat::from_fs(&metadata)?; + let executable_bit_changed = + match entry + .mode + .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit) + { + Some(gix_index::entry::mode::Change::Type { .. }) => return Ok(Some(Change::Type)), + Some(gix_index::entry::mode::Change::ExecutableBit) => true, + None => false, + }; + + // Here we implement racy-git. See racy-git.txt in the git documentation for a detailed documentation. + // + // A file is racy if: + // 1. its `mtime` is at or after the last index timestamp and its entry stat information + // matches the on-disk file but the file contents are actually modified + // 2. it's size is 0 (set after detecting a file was racy previously) + // + // The first case is detected below by checking the timestamp if the file is marked unmodified. + // The second case is usually detected either because the on-disk file is not empty, hence + // the basic stat match fails, or by checking whether the size doesn't fit the oid. + let mut racy_clean = false; + if !executable_bit_changed + && new_stat.matches(&entry.stat, self.options.stat) + // TODO: find a test for the following line or remove it. Is this more often hit with smudge/clean filters? + && (!entry.id.is_empty_blob() || entry.stat.size == 0) + { + racy_clean = new_stat.is_racy(self.timestamp, self.options.stat); + if !racy_clean { + return Ok(None); + } + } + + let read_file = WorktreeBlob { + buf: &mut self.buf, + path: &worktree_path, + entry, + options: self.options, + }; + let read_blob = OdbBlob { + buf: &mut self.odb_buf, + id: &entry.id, + find, + }; + let content_change = diff.compare_blobs::(entry, metadata.len() as usize, read_file, read_blob)?; + // This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated. + if content_change.is_some() && racy_clean { + entry.stat.size = 0; + } + if content_change.is_some() || executable_bit_changed { + Ok(Some(Change::Modification { + executable_bit_changed, + content_change, + })) + } else { + // don't diff against this file next time since we know the file is unchanged. + entry.stat = new_stat; + Ok(None) + } + } +} + +struct ReduceChange<'a, 'index, T: VisitEntry<'index>> { + collector: &'a mut T, + phantom: PhantomData, +} + +impl<'index, T, C: VisitEntry<'index, ContentChange = T>> Reduce for ReduceChange<'_, 'index, C> { + type Input = Vec>; + + type FeedProduce = (); + + type Output = (); + + type Error = Error; + + fn feed(&mut self, items: Self::Input) -> Result { + for item in items { + let (entry, path, change, conflict) = item?; + self.collector.visit_entry(entry, path, change, conflict); + } + Ok(()) + } + + fn finalize(self) -> Result { + Ok(()) + } +} + +struct WorktreeBlob<'a> { + buf: &'a mut Vec, + path: &'a Path, + entry: &'a gix_index::Entry, + options: &'a Options, +} + +struct OdbBlob<'a, Find, E> +where + E: std::error::Error + Send + Sync + 'static, + Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, +{ + buf: &'a mut Vec, + id: &'a gix_hash::oid, + find: Find, +} + +impl<'a> content::ReadDataOnce<'a, Error> for WorktreeBlob<'a> { + fn read_data(self) -> Result<&'a [u8], Error> { + let res = read::data_to_buf_with_meta( + self.path, + self.buf, + self.entry.mode == gix_index::entry::Mode::SYMLINK, + &self.options.fs, + )?; + Ok(res) + } +} + +impl<'a, Find, E> content::ReadDataOnce<'a, Error> for OdbBlob<'a, Find, E> +where + E: std::error::Error + Send + Sync + 'static, + Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, +{ + fn read_data(mut self) -> Result<&'a [u8], Error> { + (self.find)(self.id, self.buf) + .map(|b| b.data) + .map_err(move |err| Error::Find(Box::new(err))) + } +} diff --git a/gix-status/src/index_as_worktree/mod.rs b/gix-status/src/index_as_worktree/mod.rs new file mode 100644 index 00000000000..8294a54e8ac --- /dev/null +++ b/gix-status/src/index_as_worktree/mod.rs @@ -0,0 +1,11 @@ +//! Changes between an index and a worktree. +/// +mod types; +pub use types::{Change, Error, Options, VisitEntry}; + +mod recorder; +pub use recorder::Recorder; + +/// +pub mod content; +pub(crate) mod function; diff --git a/gix-status/src/index_as_worktree/recorder.rs b/gix-status/src/index_as_worktree/recorder.rs new file mode 100644 index 00000000000..48beb25a313 --- /dev/null +++ b/gix-status/src/index_as_worktree/recorder.rs @@ -0,0 +1,27 @@ +use bstr::BStr; +use gix_index as index; + +use crate::index_as_worktree::{Change, VisitEntry}; + +/// Convenience implementation of [`VisitEntry`] that collects all non-trivial changes into a `Vec`. +#[derive(Debug, Default)] +pub struct Recorder<'index, T = ()> { + /// collected changes, index entries without conflicts or changes are excluded. + pub records: Vec<(&'index BStr, Option>, bool)>, +} + +impl<'index, T: Send> VisitEntry<'index> for Recorder<'index, T> { + type ContentChange = T; + + fn visit_entry( + &mut self, + _entry: &'index index::Entry, + rela_path: &'index BStr, + status: Option>, + conflict: bool, + ) { + if conflict || status.is_some() { + self.records.push((rela_path, status, conflict)) + } + } +} diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs new file mode 100644 index 00000000000..3d488d24ef4 --- /dev/null +++ b/gix-status/src/index_as_worktree/types.rs @@ -0,0 +1,69 @@ +use bstr::BStr; + +/// The error returned by [`status()`][crate::status()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Could not convert path to UTF8")] + IllformedUtf8, + #[error("The clock was off when reading file related metadata after updating a file on disk")] + Time(#[from] std::time::SystemTimeError), + #[error("IO error while writing blob or reading file metadata or changing filetype")] + Io(#[from] std::io::Error), + #[error("Failed to obtain blob from object database")] + Find(#[source] Box), +} + +#[derive(Clone, Default)] +/// Options that control how the index status with a worktree is computed. +pub struct Options { + /// Capabilities of the file system which affect the status computation. + pub fs: gix_fs::Capabilities, + /// If set, don't use more than this amount of threads. + /// Otherwise, usually use as many threads as there are logical cores. + /// A value of 0 is interpreted as no-limit + pub thread_limit: Option, + /// Options that control how stat comparisons are made when checking if a file is fresh. + pub stat: gix_index::entry::stat::Options, +} + +/// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum Change { + /// This corresponding file does not exist in the worktree anymore. + Removed, + /// The type of file changed compared to the worktree, i.e. a symlink s now a file. + Type, + /// This worktree file was modified in some form, like a permission change or content change or both, + /// as compared to this entry. + Modification { + /// Indicates that one of the stat changes was an executable bit change + /// which is a significant change itself. + executable_bit_changed: bool, + /// The output of the [`CompareBlobs`][crate::status::content::CompareBlobs] run on this entry. + /// If there is no content change and only the executable bit + /// changed than this is `None`. + content_change: Option, + }, + /// An index entry that correspond to an untracked worktree file marked with `git add --intent-to-add`. + /// + /// This means it's not available in the object database yet or the index was created from, + /// even though now an entry exists that represents the worktree file. + IntentToAdd, +} + +/// Observe changes by comparing an index entry to the worktree or another index. +pub trait VisitEntry<'index> { + /// Data generated by comparing an entry with a file. + type ContentChange; + /// Observe the `change` of `entry` at the repository-relative `rela_path`, indicating whether + /// or not it has a `conflict`. + /// If `change` is `None`, there is no change. + fn visit_entry( + &mut self, + entry: &'index gix_index::Entry, + rela_path: &'index BStr, + change: Option>, + conflict: bool, + ); +} diff --git a/gix-status/src/lib.rs b/gix-status/src/lib.rs new file mode 100644 index 00000000000..843eb6a20a5 --- /dev/null +++ b/gix-status/src/lib.rs @@ -0,0 +1,15 @@ +//! This crate includes the various diffs `git` can do between different representations +//! of the repository state, like comparisons between… +//! +//! * index and working tree +//! * index and tree +//! * find untracked files +//! +//! While also being able to check check if the working tree is dirty, quickly. +#![deny(missing_docs, rust_2018_idioms, unsafe_code)] + +/// +pub mod read; + +pub mod index_as_worktree; +pub use index_as_worktree::function::index_as_worktree; diff --git a/gix-status/src/read.rs b/gix-status/src/read.rs new file mode 100644 index 00000000000..fd336817525 --- /dev/null +++ b/gix-status/src/read.rs @@ -0,0 +1,63 @@ +//! This module allows creating git blobs from worktree files. +//! +//! For the most part a blob just contains the raw on-disk data. However symlinks need to be considered properly +//! and attributes/config options need to be considered. + +use std::{ + fs::{read_link, File}, + io::{self, Read}, + path::Path, +}; + +use gix_object::Blob; + +// TODO: tests + +// TODO: what to do about precompose unicode and ignore_case for symlinks + +/// Create a blob from a file or symlink. +pub fn blob(path: &Path, capabilities: &gix_fs::Capabilities) -> io::Result { + let mut data = Vec::new(); + data_to_buf(path, &mut data, capabilities)?; + Ok(Blob { data }) +} + +/// Create a blob from a file or symlink. +pub fn blob_with_meta(path: &Path, is_symlink: bool, capabilities: &gix_fs::Capabilities) -> io::Result { + let mut data = Vec::new(); + data_to_buf_with_meta(path, &mut data, is_symlink, capabilities)?; + Ok(Blob { data }) +} + +/// Create blob data from a file or symlink. +pub fn data_to_buf<'a>(path: &Path, buf: &'a mut Vec, capabilities: &gix_fs::Capabilities) -> io::Result<&'a [u8]> { + data_to_buf_with_meta(path, buf, path.symlink_metadata()?.is_symlink(), capabilities) +} + +/// Create a blob from a file or symlink. +pub fn data_to_buf_with_meta<'a>( + path: &Path, + buf: &'a mut Vec, + is_symlink: bool, + capabilities: &gix_fs::Capabilities, +) -> io::Result<&'a [u8]> { + buf.clear(); + // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just + // normal files with their content equal to the linked path (so can be read normally) + // + if is_symlink && capabilities.symlink { + // conversion to bstr can never fail because symlinks are only used + // on unix (by git) so no reason to use the try version here + let symlink_path = gix_path::into_bstr(read_link(path)?); + buf.extend_from_slice(&symlink_path); + // TODO: there is no reason this should be a clone + // std isn't great about allowing users to avoid allocations but we could + // simply write our own wrapper around libc::readlink which reuses the + // buffer. This would require unsafe code tough (obviously) + } else { + buf.clear(); + File::open(path)?.read_to_end(buf)?; + // TODO apply filters + } + Ok(buf.as_slice()) +} diff --git a/gix-status/tests/fixtures/generated-archives/.gitignore b/gix-status/tests/fixtures/generated-archives/.gitignore new file mode 100644 index 00000000000..ee165f3a1c5 --- /dev/null +++ b/gix-status/tests/fixtures/generated-archives/.gitignore @@ -0,0 +1,2 @@ +status_unchanged.tar.xz +status_changed.tar.xz diff --git a/gix-status/tests/fixtures/generated-archives/racy_git.tar.xz b/gix-status/tests/fixtures/generated-archives/racy_git.tar.xz new file mode 100644 index 00000000000..bb2ff2cd4a1 --- /dev/null +++ b/gix-status/tests/fixtures/generated-archives/racy_git.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c78c306d8a19ce7f09a4cdf284e761d9c7766a3491bfb83751b9e6f6bc8fc1c9 +size 9812 diff --git a/gix-status/tests/fixtures/generated-archives/status_conflict.tar.xz b/gix-status/tests/fixtures/generated-archives/status_conflict.tar.xz new file mode 100644 index 00000000000..a871be6f358 --- /dev/null +++ b/gix-status/tests/fixtures/generated-archives/status_conflict.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b64431478ff4ee4613d2e1fe0766b4da8aeac8f4dee2b42726efb6d2e87bc830 +size 11012 diff --git a/gix-status/tests/fixtures/generated-archives/status_intent_to_add.tar.xz b/gix-status/tests/fixtures/generated-archives/status_intent_to_add.tar.xz new file mode 100644 index 00000000000..289a58a1947 --- /dev/null +++ b/gix-status/tests/fixtures/generated-archives/status_intent_to_add.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8fa2e8085d464945eade89cfa9dadb89345c584b1d72a535d464859f02950a7 +size 9280 diff --git a/gix-status/tests/fixtures/generated-archives/status_removed.tar.xz b/gix-status/tests/fixtures/generated-archives/status_removed.tar.xz new file mode 100644 index 00000000000..c0179e3cd18 --- /dev/null +++ b/gix-status/tests/fixtures/generated-archives/status_removed.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc72bb3b08e421928d4402b49100a4ac144b4cdcc84ab70e6b721ec5ce4546c +size 10568 diff --git a/gix-status/tests/fixtures/racy_git.sh b/gix-status/tests/fixtures/racy_git.sh new file mode 100755 index 00000000000..7fdef456f87 --- /dev/null +++ b/gix-status/tests/fixtures/racy_git.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +echo -n "foo" > content + +git add -A +git commit -m "Commit" + +# file size should not be changed by this +echo -n "bar" > content diff --git a/gix-status/tests/fixtures/status_changed.sh b/gix-status/tests/fixtures/status_changed.sh new file mode 100755 index 00000000000..033c6a8336f --- /dev/null +++ b/gix-status/tests/fixtures/status_changed.sh @@ -0,0 +1,26 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +touch empty +echo -n "content" > executable +chmod +x executable + +mkdir dir +echo -n "other content" > dir/content +echo -n "other content" > dir/content2 +mkdir dir/sub-dir +(cd dir/sub-dir && ln -sf ../content symlink) + +git add -A +git commit -m "Commit" + +chmod +x dir/content +echo "new content" > dir/content2 +chmod -x executable +echo -n "foo" > executable + +rm empty +ln -sf dir/content empty +git reset \ No newline at end of file diff --git a/gix-status/tests/fixtures/status_conflict.sh b/gix-status/tests/fixtures/status_conflict.sh new file mode 100755 index 00000000000..d78e81bfe7e --- /dev/null +++ b/gix-status/tests/fixtures/status_conflict.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +echo base > content +git add -A +git commit -m "base" + +git checkout -b feat +echo feat > content +git commit -am "feat" + +git checkout main +echo base-change > content +git commit -am "new base" + +git merge feat || : diff --git a/gix-status/tests/fixtures/status_intent_to_add.sh b/gix-status/tests/fixtures/status_intent_to_add.sh new file mode 100755 index 00000000000..7d1601385c0 --- /dev/null +++ b/gix-status/tests/fixtures/status_intent_to_add.sh @@ -0,0 +1,9 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +touch content +echo -n "content" > content + +git add --intent-to-add -A diff --git a/gix-status/tests/fixtures/status_removed.sh b/gix-status/tests/fixtures/status_removed.sh new file mode 100755 index 00000000000..30cdfb94993 --- /dev/null +++ b/gix-status/tests/fixtures/status_removed.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +touch empty +echo -n "content" > executable +chmod +x executable + +mkdir dir +echo -n "other content" > dir/content +mkdir dir/sub-dir +(cd dir/sub-dir && ln -sf ../content symlink) + +git add -A +git commit -m "Commit" +rm -rf ./empty ./executable ./dir/content ./dir/sub-dir/symlink +git reset \ No newline at end of file diff --git a/gix-status/tests/fixtures/status_unchanged.sh b/gix-status/tests/fixtures/status_unchanged.sh new file mode 100755 index 00000000000..67684549509 --- /dev/null +++ b/gix-status/tests/fixtures/status_unchanged.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +touch empty +echo -n "content" > executable +chmod +x executable + +mkdir dir +echo -n "other content" > dir/content +mkdir dir/sub-dir +(cd dir/sub-dir && ln -sf ../content symlink) + +git add -A +git commit -m "Commit" + +touch ./empty ./executable ./dir/content ./dir/sub-dir/symlink + +git reset # ensure index timestamp is large enough to not mark everything racy \ No newline at end of file diff --git a/gix-status/tests/status-multi-threaded.rs b/gix-status/tests/status-multi-threaded.rs new file mode 100644 index 00000000000..970eddcb034 --- /dev/null +++ b/gix-status/tests/status-multi-threaded.rs @@ -0,0 +1,4 @@ +#[cfg(feature = "internal-testing-gix-features-parallel")] +mod status; +#[cfg(feature = "internal-testing-gix-features-parallel")] +use status::*; diff --git a/gix-status/tests/status-single-threaded.rs b/gix-status/tests/status-single-threaded.rs new file mode 100644 index 00000000000..54e0daf8929 --- /dev/null +++ b/gix-status/tests/status-single-threaded.rs @@ -0,0 +1,4 @@ +#[cfg(not(feature = "internal-testing-gix-features-parallel"))] +mod status; +#[cfg(not(feature = "internal-testing-gix-features-parallel"))] +use status::*; diff --git a/gix-status/tests/status/index_as_worktree.rs b/gix-status/tests/status/index_as_worktree.rs new file mode 100644 index 00000000000..7a5f84f63b7 --- /dev/null +++ b/gix-status/tests/status/index_as_worktree.rs @@ -0,0 +1,226 @@ +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, +}; + +use bstr::BStr; +use filetime::{set_file_mtime, FileTime}; +use gix_index as index; +use gix_index::Entry; +use gix_status::{ + index_as_worktree, + index_as_worktree::{ + content::{CompareBlobs, FastEq, ReadDataOnce}, + Change, Options, Recorder, + }, +}; + +use crate::fixture_path; + +// since tests are fixtures a bunch of stat information (like inode number) +// changes when extracting the data so we need to disable all advanced stat +// changes and only look at mtime seconds and file size to properly +// test all code paths (and to trigger racy git). +const TEST_OPTIONS: index::entry::stat::Options = index::entry::stat::Options { + trust_ctime: false, + check_stat: false, + use_nsec: false, + use_stdev: false, +}; + +fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) { + let worktree = fixture_path(name); + let git_dir = worktree.join(".git"); + let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap(); + let mut recorder = Recorder::default(); + index_as_worktree( + &mut index, + &worktree, + &mut recorder, + FastEq, + |_, _| Ok::<_, std::convert::Infallible>(gix_object::BlobRef { data: &[] }), + Options { + fs: gix_fs::Capabilities::probe(git_dir), + stat: TEST_OPTIONS, + ..Options::default() + }, + ) + .unwrap(); + recorder.records.sort_unstable_by_key(|(name, _, _)| *name); + assert_eq!(recorder.records, expected_status) +} + +#[test] +fn removed() { + fixture( + "status_removed", + &[ + (BStr::new(b"dir/content"), Some(Change::Removed), false), + (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), false), + (BStr::new(b"empty"), Some(Change::Removed), false), + (BStr::new(b"executable"), Some(Change::Removed), false), + ], + ); +} + +#[test] +fn intent_to_add() { + fixture( + "status_intent_to_add", + &[(BStr::new(b"content"), Some(Change::IntentToAdd), false)], + ); +} + +#[test] +fn conflict() { + fixture( + "status_conflict", + &[( + BStr::new(b"content"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + }), + true, + )], + ); +} + +#[test] +fn unchanged() { + fixture("status_unchanged", &[]); +} + +#[test] +#[cfg_attr( + windows, + ignore = "needs work, on windows plenty of additional files are considered modified for some reason" +)] +fn modified() { + fixture( + "status_changed", + &[ + ( + BStr::new(b"dir/content"), + Some(Change::Modification { + executable_bit_changed: true, + content_change: None, + }), + false, + ), + ( + BStr::new(b"dir/content2"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + }), + false, + ), + (BStr::new(b"empty"), Some(Change::Type), false), + ( + BStr::new(b"executable"), + Some(Change::Modification { + executable_bit_changed: true, + content_change: Some(()), + }), + false, + ), + ], + ); +} + +#[test] +fn racy_git() { + let timestamp = 940040400; + // we need a writable fixture because we have to mess with `mtimes` manually, because touch -d + // respects the locale so the test wouldn't work depending on the timezone you + // run your test in. + let dir = gix_testtools::scripted_fixture_writable("racy_git.sh").expect("script works"); + let worktree = dir.path(); + let git_dir = worktree.join(".git"); + let fs = gix_fs::Capabilities::probe(&git_dir); + let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap(); + + #[derive(Clone)] + struct CountCalls(Arc, FastEq); + impl CompareBlobs for CountCalls { + type Output = (); + + fn compare_blobs<'a, E>( + &mut self, + entry: &'a Entry, + worktree_blob_size: usize, + worktree_blob: impl ReadDataOnce<'a, E>, + entry_blob: impl ReadDataOnce<'a, E>, + ) -> Result, E> { + self.0.fetch_add(1, Ordering::Relaxed); + self.1 + .compare_blobs(entry, worktree_blob_size, worktree_blob, entry_blob) + } + } + + // We artificially mess with the entry's `mtime` so that it's before the timestamp saved by git. + // This would usually mean an invalid fs/invalid index file and as a result the racy git + // mitigation doesn't work and the worktree shows up as unchanged even tough the file did + // change. + // This case doesn't happen in the realworld (except for file corruption) but + // makes sure we are actually hitting the right codepath. + index.entries_mut()[0].stat.mtime.secs = timestamp; + set_file_mtime(worktree.join("content"), FileTime::from_unix_time(timestamp as i64, 0)) + .expect("changing filetime works"); + let mut recorder = Recorder::default(); + + let count = Arc::new(AtomicUsize::new(0)); + let counter = CountCalls(count.clone(), FastEq); + index_as_worktree( + &mut index, + worktree, + &mut recorder, + counter.clone(), + |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), + Options { + fs, + stat: TEST_OPTIONS, + ..Options::default() + }, + ) + .unwrap(); + assert_eq!(count.load(Ordering::Relaxed), 0, "no blob content is accessed"); + assert_eq!(recorder.records, &[], "the testcase triggers racy git"); + + // Now we also backdate the index timestamp to match the artificially created + // mtime above this is now a realistic realworld race-condition which should trigger racy git + // and cause proper output. + index.set_timestamp(FileTime::from_unix_time(timestamp as i64, 0)); + let mut recorder = Recorder::default(); + index_as_worktree( + &mut index, + worktree, + &mut recorder, + counter, + |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), + Options { + fs, + stat: TEST_OPTIONS, + ..Options::default() + }, + ) + .unwrap(); + assert_eq!( + count.load(Ordering::Relaxed), + 1, + "no we needed to access the blob content" + ); + assert_eq!( + recorder.records, + &[( + BStr::new(b"content"), + Some(Change::Modification { + executable_bit_changed: false, + content_change: Some(()), + }), + false + )], + "racy change is correctly detected" + ); +} diff --git a/gix-status/tests/status/mod.rs b/gix-status/tests/status/mod.rs new file mode 100644 index 00000000000..af50d26613a --- /dev/null +++ b/gix-status/tests/status/mod.rs @@ -0,0 +1,7 @@ +mod index_as_worktree; + +pub fn fixture_path(name: &str) -> std::path::PathBuf { + let dir = gix_testtools::scripted_fixture_read_only(std::path::Path::new(name).with_extension("sh")) + .expect("script works"); + dir +} diff --git a/justfile b/justfile index 9818ba80ed7..a6e1873b235 100755 --- a/justfile +++ b/justfile @@ -135,6 +135,10 @@ unit-tests: cargo test -p gix-archive --features tar cargo test -p gix-archive --features tar_gz cargo test -p gix-archive --features zip + cd gix-status; \ + set -ex; \ + cargo test; \ + cargo test --features "internal-testing-gix-features-parallel" cd gix-object; \ set -ex; \ cargo test; \ From bd961b3065ca71ac4fa59e9988a3b7e705cd4c67 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 19 Aug 2023 15:29:14 +0200 Subject: [PATCH 4/7] move worktree- checkout functionality into its own crate --- Cargo.lock | 23 + Cargo.toml | 1 + README.md | 1 + crate-status.md | 21 + gix-worktree-state/Cargo.toml | 44 ++ gix-worktree-state/LICENSE-APACHE | 1 + gix-worktree-state/LICENSE-MIT | 1 + gix-worktree-state/src/checkout/chunk.rs | 403 ++++++++++++++ gix-worktree-state/src/checkout/entry.rs | 294 ++++++++++ gix-worktree-state/src/checkout/function.rs | 169 ++++++ gix-worktree-state/src/checkout/mod.rs | 101 ++++ gix-worktree-state/src/lib.rs | 6 + .../fixtures/generated-archives/.gitignore | 5 + .../make_ignorecase_collisions.tar.xz | 3 + .../tests/fixtures/make_dangerous_symlink.sh | 24 + .../fixtures/make_ignorecase_collisions.sh | 27 + .../fixtures/make_mixed_without_submodules.sh | 20 + ...e_mixed_without_submodules_and_symlinks.sh | 20 + .../tests/state-multi-threaded.rs | 4 + .../tests/state-single-threaded.rs | 4 + gix-worktree-state/tests/state/checkout.rs | 526 ++++++++++++++++++ gix-worktree-state/tests/state/mod.rs | 10 + justfile | 4 + 23 files changed, 1712 insertions(+) create mode 100644 gix-worktree-state/Cargo.toml create mode 120000 gix-worktree-state/LICENSE-APACHE create mode 120000 gix-worktree-state/LICENSE-MIT create mode 100644 gix-worktree-state/src/checkout/chunk.rs create mode 100644 gix-worktree-state/src/checkout/entry.rs create mode 100644 gix-worktree-state/src/checkout/function.rs create mode 100644 gix-worktree-state/src/checkout/mod.rs create mode 100644 gix-worktree-state/src/lib.rs create mode 100644 gix-worktree-state/tests/fixtures/generated-archives/.gitignore create mode 100644 gix-worktree-state/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz create mode 100755 gix-worktree-state/tests/fixtures/make_dangerous_symlink.sh create mode 100755 gix-worktree-state/tests/fixtures/make_ignorecase_collisions.sh create mode 100755 gix-worktree-state/tests/fixtures/make_mixed_without_submodules.sh create mode 100755 gix-worktree-state/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh create mode 100644 gix-worktree-state/tests/state-multi-threaded.rs create mode 100644 gix-worktree-state/tests/state-single-threaded.rs create mode 100644 gix-worktree-state/tests/state/checkout.rs create mode 100644 gix-worktree-state/tests/state/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 8194daa534e..d6398414be2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2488,6 +2488,29 @@ dependencies = [ "walkdir", ] +[[package]] +name = "gix-worktree-state" +version = "0.1.0" +dependencies = [ + "bstr", + "gix-features 0.32.1", + "gix-filter", + "gix-fs 0.4.1", + "gix-glob 0.10.2", + "gix-hash 0.11.4", + "gix-index 0.22.0", + "gix-object 0.34.0", + "gix-odb", + "gix-path 0.8.4", + "gix-testtools", + "gix-worktree 0.24.0", + "io-close", + "once_cell", + "symlink", + "thiserror", + "walkdir", +] + [[package]] name = "gix-worktree-stream" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index 9efc81cdd41..6fb4b758a4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -235,6 +235,7 @@ members = [ "gix-index", "gix-bitmap", "gix-worktree", + "gix-worktree-state", "gix-status", "gix-revision", "gix-packetline", diff --git a/README.md b/README.md index 82ba97ec99b..aa3bca6523b 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,7 @@ is usable to some extent. * [gix-archive](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-archive) * [gix-submodule](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-submodule) * [gix-status](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-status) + * [gix-worktree-state](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-worktree-state) * `gitoxide-core` * **very early** _(possibly without any documentation and many rough edges)_ * [gix-date](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-date) diff --git a/crate-status.md b/crate-status.md index 94288f6ea66..a0ca1892fc7 100644 --- a/crate-status.md +++ b/crate-status.md @@ -459,6 +459,27 @@ Make it the best-performing implementation and the most convenient one. * [ ] differences between tree and index to turn tree into index * [ ] untracked files * [ ] fast answer to 'is it dirty'. +* +### gix-worktree-state +* handle the working **tree/checkout** + - [x] checkout an index of files, executables and symlinks just as fast as git + - [x] forbid symlinks in directories + - [ ] handle submodules + - [ ] handle sparse directories + - [ ] handle sparse index + - [x] linear scaling with multi-threading up to IO saturation + - supported attributes to affect working tree and index contents + - [x] eol + - [x] working-tree-encoding + - …more + - **filtering** + - [x] `text` + - [x] `ident` + - [x] filter processes + - [x] single-invocation clean/smudge filters +* access to per-path information, like `.gitignore` and `.gitattributes` in a manner well suited for efficient lookups + * [x] _exclude_ information + * [x] attributes ### gix-worktree * handle the working **tree/checkout** diff --git a/gix-worktree-state/Cargo.toml b/gix-worktree-state/Cargo.toml new file mode 100644 index 00000000000..0e023ff638f --- /dev/null +++ b/gix-worktree-state/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "gix-worktree-state" +version = "0.1.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT OR Apache-2.0" +description = "A crate of the gitoxide project implementing setting the worktree to a particular state" +authors = ["Sebastian Thiel "] +edition = "2021" +include = ["src/**/*", "LICENSE-*", "CHANGELOG.md"] +rust-version = "1.65" + +[lib] +doctest = false + +[[test]] +name = "multi-threaded" +path = "tests/state-multi-threaded.rs" +required-features = ["internal-testing-gix-features-parallel"] + +[features] +internal-testing-gix-features-parallel = ["gix-features/parallel"] + +[dependencies] +gix-worktree = { version = "^0.24.0", path = "../gix-worktree" } +gix-index = { version = "^0.22.0", path = "../gix-index" } +gix-fs = { version = "^0.4.1", path = "../gix-fs" } +gix-hash = { version = "^0.11.4", path = "../gix-hash" } +gix-object = { version = "^0.34.0", path = "../gix-object" } +gix-glob = { version = "^0.10.2", path = "../gix-glob" } +gix-path = { version = "^0.8.4", path = "../gix-path" } +gix-features = { version = "^0.32.1", path = "../gix-features" } +gix-filter = { version = "^0.3.0", path = "../gix-filter" } + +io-close = "0.3.7" +thiserror = "1.0.26" +bstr = { version = "1.3.0", default-features = false } + +[dev-dependencies] +gix-testtools = { path = "../tests/tools" } +gix-odb = { path = "../gix-odb" } +symlink = "0.1.0" +once_cell = "1.18.0" + +walkdir = "2.3.2" diff --git a/gix-worktree-state/LICENSE-APACHE b/gix-worktree-state/LICENSE-APACHE new file mode 120000 index 00000000000..965b606f331 --- /dev/null +++ b/gix-worktree-state/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/gix-worktree-state/LICENSE-MIT b/gix-worktree-state/LICENSE-MIT new file mode 120000 index 00000000000..76219eb72e8 --- /dev/null +++ b/gix-worktree-state/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/gix-worktree-state/src/checkout/chunk.rs b/gix-worktree-state/src/checkout/chunk.rs new file mode 100644 index 00000000000..445ed9a9680 --- /dev/null +++ b/gix-worktree-state/src/checkout/chunk.rs @@ -0,0 +1,403 @@ +use std::{ + collections::BTreeSet, + sync::atomic::{AtomicUsize, Ordering}, +}; + +use bstr::{BStr, BString}; +use gix_hash::oid; + +use crate::{checkout, checkout::entry}; +use gix_worktree::Cache; + +mod reduce { + use std::marker::PhantomData; + + use gix_features::progress::Progress; + + use crate::checkout; + + pub struct Reduce<'a, 'entry, P1, P2, E> { + pub files: Option<&'a mut P1>, + pub bytes: Option<&'a mut P2>, + pub aggregate: super::Outcome<'entry>, + pub marker: PhantomData, + } + + impl<'a, 'entry, P1, P2, E> gix_features::parallel::Reduce for Reduce<'a, 'entry, P1, P2, E> + where + P1: Progress, + P2: Progress, + E: std::error::Error + Send + Sync + 'static, + { + type Input = Result, checkout::Error>; + type FeedProduce = (); + type Output = super::Outcome<'entry>; + type Error = checkout::Error; + + fn feed(&mut self, item: Self::Input) -> Result { + let item = item?; + let super::Outcome { + bytes_written, + files, + delayed_symlinks, + errors, + collisions, + delayed_paths_unknown, + delayed_paths_unprocessed, + } = item; + self.aggregate.bytes_written += bytes_written; + self.aggregate.files += files; + self.aggregate.delayed_symlinks.extend(delayed_symlinks); + self.aggregate.errors.extend(errors); + self.aggregate.collisions.extend(collisions); + self.aggregate.delayed_paths_unknown.extend(delayed_paths_unknown); + self.aggregate + .delayed_paths_unprocessed + .extend(delayed_paths_unprocessed); + + if let Some(progress) = self.bytes.as_deref_mut() { + progress.set(self.aggregate.bytes_written as gix_features::progress::Step); + } + if let Some(progress) = self.files.as_deref_mut() { + progress.set(self.aggregate.files); + } + + Ok(()) + } + + fn finalize(self) -> Result { + Ok(self.aggregate) + } + } +} +pub use reduce::Reduce; + +use crate::checkout::entry::DelayedFilteredStream; + +#[derive(Default)] +pub struct Outcome<'a> { + pub collisions: Vec, + pub errors: Vec, + pub delayed_symlinks: Vec<(&'a mut gix_index::Entry, &'a BStr)>, + // all (immediately) written bytes + pub bytes_written: u64, + // the amount of files we processed + pub files: usize, + /// Relative paths that the process listed as 'delayed' even though we never passed them. + pub delayed_paths_unknown: Vec, + /// All paths that were left unprocessed, because they were never listed by the process even though we passed them. + pub delayed_paths_unprocessed: Vec, +} + +#[derive(Clone)] +pub struct Context { + pub find: Find, + pub path_cache: Cache, + pub filters: gix_filter::Pipeline, + pub buf: Vec, + pub options: Options, +} + +#[derive(Clone, Copy)] +pub struct Options { + pub fs: gix_fs::Capabilities, + pub destination_is_initially_empty: bool, + pub overwrite_existing: bool, + pub keep_going: bool, + pub filter_process_delay: gix_filter::driver::apply::Delay, +} + +impl From<&checkout::Options> for Options { + fn from(opts: &checkout::Options) -> Self { + Options { + fs: opts.fs, + destination_is_initially_empty: opts.destination_is_initially_empty, + overwrite_existing: opts.overwrite_existing, + keep_going: opts.keep_going, + filter_process_delay: opts.filter_process_delay, + } + } +} + +pub fn process<'entry, Find, E>( + entries_with_paths: impl Iterator, + files: Option<&AtomicUsize>, + bytes: Option<&AtomicUsize>, + delayed_filter_results: &mut Vec>, + ctx: &mut Context, +) -> Result, checkout::Error> +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Clone, + E: std::error::Error + Send + Sync + 'static, +{ + let mut delayed_symlinks = Vec::new(); + let mut collisions = Vec::new(); + let mut errors = Vec::new(); + let mut bytes_written = 0; + let mut files_in_chunk = 0; + + for (entry, entry_path) in entries_with_paths { + // TODO: write test for that + if entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE) { + if let Some(files) = files { + files.fetch_add(1, Ordering::SeqCst); + } + files_in_chunk += 1; + continue; + } + + // Symlinks always have to be delayed on windows as they have to point to something that exists on creation. + // And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is + // before creating it. + // And to keep things sane, we just do the same on non-windows as well which is similar to what git does and adds some safety + // around writing through symlinks (even though we handle this). + // This also means that we prefer content in files over symlinks in case of collisions, which probably is for the better, too. + if entry.mode == gix_index::entry::Mode::SYMLINK { + delayed_symlinks.push((entry, entry_path)); + continue; + } + + match checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, files, bytes, ctx)? { + entry::Outcome::Written { bytes } => { + bytes_written += bytes as u64; + files_in_chunk += 1 + } + entry::Outcome::Delayed(delayed) => delayed_filter_results.push(delayed), + } + } + + Ok(Outcome { + bytes_written, + files: files_in_chunk, + errors, + collisions, + delayed_symlinks, + delayed_paths_unknown: Vec::new(), + delayed_paths_unprocessed: Vec::new(), + }) +} + +pub fn process_delayed_filter_results( + mut delayed_filter_results: Vec>, + files: Option<&AtomicUsize>, + bytes: Option<&AtomicUsize>, + out: &mut Outcome<'_>, + ctx: &mut Context, +) -> Result<(), checkout::Error> +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Clone, + E: std::error::Error + Send + Sync + 'static, +{ + let Options { + destination_is_initially_empty, + overwrite_existing, + keep_going, + .. + } = ctx.options; + let mut bytes_written = 0; + let mut delayed_files = 0; + // Sort by path for fast lookups + delayed_filter_results.sort_by(|a, b| a.entry_path.cmp(b.entry_path)); + // We process each key and do as the filter process tells us, while collecting data about the overall progress. + let keys: BTreeSet<_> = delayed_filter_results.iter().map(|d| d.key.clone()).collect(); + let mut unknown_paths = Vec::new(); + let mut rela_path_as_path = Default::default(); + for key in keys { + loop { + let rela_paths = ctx.filters.driver_state_mut().list_delayed_paths(&key)?; + if rela_paths.is_empty() { + break; + } + + for rela_path in rela_paths { + let delayed = match delayed_filter_results.binary_search_by(|d| d.entry_path.cmp(rela_path.as_ref())) { + Ok(idx) => &mut delayed_filter_results[idx], + Err(_) => { + if keep_going { + unknown_paths.push(rela_path); + continue; + } else { + return Err(checkout::Error::FilterPathUnknown { rela_path }); + } + } + }; + let mut read = std::io::BufReader::with_capacity( + 512 * 1024, + ctx.filters.driver_state_mut().fetch_delayed( + &key, + rela_path.as_ref(), + gix_filter::driver::Operation::Smudge, + )?, + ); + let (file, set_executable_after_creation) = match entry::open_file( + &std::mem::take(&mut delayed.validated_file_path), // mark it as seen, relevant for `unprocessed_paths` + destination_is_initially_empty, + overwrite_existing, + delayed.needs_executable_bit, + delayed.entry.mode, + ) { + Ok(res) => res, + Err(err) => { + if !is_collision(&err, delayed.entry_path, &mut out.collisions, files) { + handle_error(err, delayed.entry_path, files, &mut out.errors, ctx.options.keep_going)?; + } + std::io::copy(&mut read, &mut std::io::sink())?; + continue; + } + }; + let mut write = WriteWithProgress { + inner: std::io::BufWriter::with_capacity(512 * 1024, file), + progress: bytes, + }; + bytes_written += std::io::copy(&mut read, &mut write)?; + entry::finalize_entry( + delayed.entry, + write.inner.into_inner().map_err(std::io::IntoInnerError::into_error)?, + set_executable_after_creation.then(|| { + rela_path_as_path = gix_path::from_bstr(delayed.entry_path); + rela_path_as_path.as_ref() + }), + )?; + delayed_files += 1; + if let Some(files) = files { + files.fetch_add(1, Ordering::SeqCst); + } + } + } + } + + let unprocessed_paths = delayed_filter_results + .into_iter() + .filter_map(|d| (!d.validated_file_path.as_os_str().is_empty()).then(|| d.entry_path.to_owned())) + .collect(); + + if !keep_going && !unknown_paths.is_empty() { + return Err(checkout::Error::FilterPathsUnprocessed { + rela_paths: unprocessed_paths, + }); + } + + out.delayed_paths_unknown = unknown_paths; + out.delayed_paths_unprocessed = unprocessed_paths; + out.bytes_written += bytes_written; + out.files += delayed_files; + Ok(()) +} + +pub struct WriteWithProgress<'a, T> { + pub inner: T, + pub progress: Option<&'a AtomicUsize>, +} + +impl<'a, T> std::io::Write for WriteWithProgress<'a, T> +where + T: std::io::Write, +{ + fn write(&mut self, buf: &[u8]) -> std::io::Result { + let written = self.inner.write(buf)?; + if let Some(progress) = self.progress { + progress.fetch_add(written as gix_features::progress::Step, Ordering::SeqCst); + } + Ok(written) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.inner.flush() + } +} + +pub fn checkout_entry_handle_result<'entry, Find, E>( + entry: &'entry mut gix_index::Entry, + entry_path: &'entry BStr, + errors: &mut Vec, + collisions: &mut Vec, + files: Option<&AtomicUsize>, + bytes: Option<&AtomicUsize>, + Context { + find, + path_cache, + filters, + buf, + options, + }: &mut Context, +) -> Result, checkout::Error> +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Clone, + E: std::error::Error + Send + Sync + 'static, +{ + let res = entry::checkout( + entry, + entry_path, + entry::Context { + find, + path_cache, + filters, + buf, + }, + *options, + ); + match res { + Ok(out) => { + if let Some(num) = out.as_bytes() { + if let Some(bytes) = bytes { + bytes.fetch_add(num, Ordering::SeqCst); + } + if let Some(files) = files { + files.fetch_add(1, Ordering::SeqCst); + } + } + Ok(out) + } + Err(checkout::Error::Io(err)) if is_collision(&err, entry_path, collisions, files) => { + Ok(entry::Outcome::Written { bytes: 0 }) + } + Err(err) => handle_error(err, entry_path, files, errors, options.keep_going) + .map(|()| entry::Outcome::Written { bytes: 0 }), + } +} + +fn handle_error( + err: E, + entry_path: &BStr, + files: Option<&AtomicUsize>, + errors: &mut Vec, + keep_going: bool, +) -> Result<(), E> +where + E: std::error::Error + Send + Sync + 'static, +{ + if keep_going { + errors.push(checkout::ErrorRecord { + path: entry_path.into(), + error: Box::new(err), + }); + if let Some(files) = files { + files.fetch_add(1, Ordering::SeqCst); + } + Ok(()) + } else { + Err(err) + } +} + +fn is_collision( + err: &std::io::Error, + entry_path: &BStr, + collisions: &mut Vec, + files: Option<&AtomicUsize>, +) -> bool { + if !gix_fs::symlink::is_collision_error(err) { + return false; + } + // We are here because a file existed or was blocked by a directory which shouldn't be possible unless + // we are on a file insensitive file system. + gix_features::trace::error!("{entry_path}: collided ({:?})", err.kind()); + collisions.push(checkout::Collision { + path: entry_path.into(), + error_kind: err.kind(), + }); + if let Some(files) = files { + files.fetch_add(1, Ordering::SeqCst); + } + true +} diff --git a/gix-worktree-state/src/checkout/entry.rs b/gix-worktree-state/src/checkout/entry.rs new file mode 100644 index 00000000000..e699d948958 --- /dev/null +++ b/gix-worktree-state/src/checkout/entry.rs @@ -0,0 +1,294 @@ +use std::{ + fs::OpenOptions, + io::Write, + path::{Path, PathBuf}, +}; + +use bstr::BStr; +use gix_filter::{driver::apply::MaybeDelayed, pipeline::convert::ToWorktreeOutcome}; +use gix_hash::oid; +use gix_index::{entry::Stat, Entry}; +use io_close::Close; + +use gix_worktree::Cache; + +pub struct Context<'a, Find> { + pub find: &'a mut Find, + pub path_cache: &'a mut Cache, + pub filters: &'a mut gix_filter::Pipeline, + pub buf: &'a mut Vec, +} + +/// A delayed result of a long-running filter process, which is made available as stream. +pub struct DelayedFilteredStream<'a> { + /// The key identifying the driver program + pub key: gix_filter::driver::Key, + /// If the file is going to be an executable. + pub needs_executable_bit: bool, + /// The validated path on disk at which the file should be placed. + pub validated_file_path: PathBuf, + /// The entry to adjust with the file we will write. + pub entry: &'a mut gix_index::Entry, + /// The relative path at which the entry resides (for use when querying the delayed entry). + pub entry_path: &'a BStr, +} + +pub enum Outcome<'a> { + /// The file was written. + Written { + /// The amount of written bytes. + bytes: usize, + }, + /// The will be ready later. + Delayed(DelayedFilteredStream<'a>), +} + +impl Outcome<'_> { + /// Return ourselves as (in-memory) bytes if possible. + pub fn as_bytes(&self) -> Option { + match self { + Outcome::Written { bytes } => Some(*bytes), + Outcome::Delayed { .. } => None, + } + } +} + +#[cfg_attr(not(unix), allow(unused_variables))] +pub fn checkout<'entry, Find, E>( + entry: &'entry mut Entry, + entry_path: &'entry BStr, + Context { + find, + filters, + path_cache, + buf, + }: Context<'_, Find>, + crate::checkout::chunk::Options { + fs: gix_fs::Capabilities { + symlink, + executable_bit, + .. + }, + destination_is_initially_empty, + overwrite_existing, + filter_process_delay, + .. + }: crate::checkout::chunk::Options, +) -> Result, crate::checkout::Error> +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, + E: std::error::Error + Send + Sync + 'static, +{ + let dest_relative = gix_path::try_from_bstr(entry_path).map_err(|_| crate::checkout::Error::IllformedUtf8 { + path: entry_path.to_owned(), + })?; + let is_dir = Some(entry.mode == gix_index::entry::Mode::COMMIT || entry.mode == gix_index::entry::Mode::DIR); + let path_cache = path_cache.at_path(dest_relative, is_dir, &mut *find)?; + let dest = path_cache.path(); + + let object_size = match entry.mode { + gix_index::entry::Mode::FILE | gix_index::entry::Mode::FILE_EXECUTABLE => { + let obj = find(&entry.id, buf).map_err(|err| crate::checkout::Error::Find { + err, + oid: entry.id, + path: dest.to_path_buf(), + })?; + + let filtered = filters.convert_to_worktree( + obj.data, + entry_path, + |_, attrs| { + path_cache.matching_attributes(attrs); + }, + filter_process_delay, + )?; + let (num_bytes, file, set_executable_after_creation) = match filtered { + ToWorktreeOutcome::Unchanged(buf) | ToWorktreeOutcome::Buffer(buf) => { + let (mut file, flag) = open_file( + dest, + destination_is_initially_empty, + overwrite_existing, + executable_bit, + entry.mode, + )?; + file.write_all(buf)?; + (buf.len(), file, flag) + } + ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut filtered)) => { + let (mut file, flag) = open_file( + dest, + destination_is_initially_empty, + overwrite_existing, + executable_bit, + entry.mode, + )?; + let num_bytes = std::io::copy(&mut filtered, &mut file)? as usize; + (num_bytes, file, flag) + } + ToWorktreeOutcome::Process(MaybeDelayed::Delayed(key)) => { + return Ok(Outcome::Delayed(DelayedFilteredStream { + key, + needs_executable_bit: false, + validated_file_path: dest.to_owned(), + entry, + entry_path, + })) + } + }; + + // For possibly existing, overwritten files, we must change the file mode explicitly. + finalize_entry(entry, file, set_executable_after_creation.then_some(dest))?; + num_bytes + } + gix_index::entry::Mode::SYMLINK => { + let obj = find(&entry.id, buf).map_err(|err| crate::checkout::Error::Find { + err, + oid: entry.id, + path: dest.to_path_buf(), + })?; + let symlink_destination = gix_path::try_from_byte_slice(obj.data) + .map_err(|_| crate::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; + + if symlink { + try_op_or_unlink(dest, overwrite_existing, |p| { + gix_fs::symlink::create(symlink_destination, p) + })?; + } else { + let mut file = try_op_or_unlink(dest, overwrite_existing, |p| { + open_options(p, destination_is_initially_empty, overwrite_existing).open(dest) + })?; + file.write_all(obj.data)?; + file.close()?; + } + + entry.stat = Stat::from_fs(&std::fs::symlink_metadata(dest)?)?; + obj.data.len() + } + gix_index::entry::Mode::DIR => { + gix_features::trace::warn!( + "Skipped sparse directory at '{entry_path}' ({id}) as it cannot yet be handled", + id = entry.id + ); + 0 + } + gix_index::entry::Mode::COMMIT => { + gix_features::trace::warn!( + "Skipped submodule at '{entry_path}' ({id}) as it cannot yet be handled", + id = entry.id + ); + 0 + } + _ => unreachable!(), + }; + Ok(Outcome::Written { bytes: object_size }) +} + +/// Note that this works only because we assume to not race ourselves when symlinks are involved, and we do this by +/// delaying symlink creation to the end and will always do that sequentially. +/// It's still possible to fall for a race if other actors create symlinks in our path, but that's nothing to defend against. +fn try_op_or_unlink( + path: &Path, + overwrite_existing: bool, + op: impl Fn(&Path) -> std::io::Result, +) -> std::io::Result { + if overwrite_existing { + match op(path) { + Ok(res) => Ok(res), + Err(err) if gix_fs::symlink::is_collision_error(&err) => { + try_unlink_path_recursively(path, &std::fs::symlink_metadata(path)?)?; + op(path) + } + Err(err) => Err(err), + } + } else { + op(path) + } +} + +fn try_unlink_path_recursively(path: &Path, path_meta: &std::fs::Metadata) -> std::io::Result<()> { + if path_meta.is_dir() { + std::fs::remove_dir_all(path) + } else if path_meta.file_type().is_symlink() { + gix_fs::symlink::remove(path) + } else { + std::fs::remove_file(path) + } +} + +#[cfg(not(debug_assertions))] +fn debug_assert_dest_is_no_symlink(_path: &Path) {} + +/// This is a debug assertion as we expect the machinery calling this to prevent this possibility in the first place +#[cfg(debug_assertions)] +fn debug_assert_dest_is_no_symlink(path: &Path) { + if let Ok(meta) = path.metadata() { + debug_assert!( + !meta.file_type().is_symlink(), + "BUG: should not ever allow to overwrite/write-into the target of a symbolic link: {}", + path.display() + ); + } +} + +fn open_options(path: &Path, destination_is_initially_empty: bool, overwrite_existing: bool) -> OpenOptions { + if overwrite_existing || !destination_is_initially_empty { + debug_assert_dest_is_no_symlink(path); + } + let mut options = gix_features::fs::open_options_no_follow(); + options + .create_new(destination_is_initially_empty && !overwrite_existing) + .create(!destination_is_initially_empty || overwrite_existing) + .write(true); + options +} + +pub(crate) fn open_file( + path: &Path, + destination_is_initially_empty: bool, + overwrite_existing: bool, + fs_supports_executable_bit: bool, + entry_mode: gix_index::entry::Mode, +) -> std::io::Result<(std::fs::File, bool)> { + #[cfg_attr(windows, allow(unused_mut))] + let mut options = open_options(path, destination_is_initially_empty, overwrite_existing); + let needs_executable_bit = fs_supports_executable_bit && entry_mode == gix_index::entry::Mode::FILE_EXECUTABLE; + #[cfg(unix)] + let set_executable_after_creation = if needs_executable_bit && destination_is_initially_empty { + use std::os::unix::fs::OpenOptionsExt; + // Note that these only work if the file was newly created, but won't if it's already + // existing, possibly without the executable bit set. Thus we do this only if the file is new. + options.mode(0o777); + false + } else { + needs_executable_bit + }; + // not supported on windows + #[cfg(windows)] + let set_executable_after_creation = needs_executable_bit; + try_op_or_unlink(path, overwrite_existing, |p| options.open(p)).map(|f| (f, set_executable_after_creation)) +} + +/// Close `file` and store its stats in `entry`, possibly setting `file` executable depending on `set_executable_after_creation`. +#[cfg_attr(windows, allow(unused_variables))] +pub(crate) fn finalize_entry( + entry: &mut gix_index::Entry, + file: std::fs::File, + set_executable_after_creation: Option<&Path>, +) -> Result<(), crate::checkout::Error> +where + E: std::error::Error + Send + Sync + 'static, +{ + // For possibly existing, overwritten files, we must change the file mode explicitly. + #[cfg(unix)] + if let Some(path) = set_executable_after_creation { + use std::os::unix::fs::PermissionsExt; + let mut perm = std::fs::symlink_metadata(path)?.permissions(); + perm.set_mode(0o777); + std::fs::set_permissions(path, perm)?; + } + // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. + // revisit this once there is a bug to fix. + entry.stat = Stat::from_fs(&file.metadata()?)?; + file.close()?; + Ok(()) +} diff --git a/gix-worktree-state/src/checkout/function.rs b/gix-worktree-state/src/checkout/function.rs new file mode 100644 index 00000000000..ba825e8431a --- /dev/null +++ b/gix-worktree-state/src/checkout/function.rs @@ -0,0 +1,169 @@ +use std::sync::atomic::AtomicBool; + +use gix_features::{interrupt, parallel::in_parallel_with_finalize, progress::Progress}; +use gix_hash::oid; + +use crate::checkout::chunk; +use gix_worktree::{cache, Cache}; + +/// Checkout the entire `index` into `dir`, and resolve objects found in index entries with `find` to write their content to their +/// respective path in `dir`. +/// Use `files` to count each fully checked out file, and count the amount written `bytes`. If `should_interrupt` is `true`, the +/// operation will abort. +/// `options` provide a lot of context on how to perform the operation. +/// +/// ### Handling the return value +/// +/// Note that interruption still produce an `Ok(…)` value, so the caller should look at `should_interrupt` to communicate the outcome. +/// +#[allow(clippy::too_many_arguments)] +pub fn checkout( + index: &mut gix_index::State, + dir: impl Into, + find: Find, + files: &mut impl Progress, + bytes: &mut impl Progress, + should_interrupt: &AtomicBool, + options: crate::checkout::Options, +) -> Result> +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Send + Clone, + E: std::error::Error + Send + Sync + 'static, +{ + let paths = index.take_path_backing(); + let res = checkout_inner(index, &paths, dir, find, files, bytes, should_interrupt, options); + index.return_path_backing(paths); + res +} + +#[allow(clippy::too_many_arguments)] +fn checkout_inner( + index: &mut gix_index::State, + paths: &gix_index::PathStorage, + dir: impl Into, + find: Find, + files: &mut impl Progress, + bytes: &mut impl Progress, + should_interrupt: &AtomicBool, + mut options: crate::checkout::Options, +) -> Result> +where + Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Send + Clone, + E: std::error::Error + Send + Sync + 'static, +{ + let num_files = files.counter(); + let num_bytes = bytes.counter(); + let dir = dir.into(); + let case = if options.fs.ignore_case { + gix_glob::pattern::Case::Fold + } else { + gix_glob::pattern::Case::Sensitive + }; + let (chunk_size, thread_limit, num_threads) = gix_features::parallel::optimize_chunk_size_and_thread_limit( + 100, + index.entries().len().into(), + options.thread_limit, + None, + ); + + let state = cache::State::for_checkout(options.overwrite_existing, std::mem::take(&mut options.attributes)); + let attribute_files = state.id_mappings_from_index(index, paths, case); + let mut ctx = chunk::Context { + buf: Vec::new(), + options: (&options).into(), + path_cache: Cache::new(dir, state, case, Vec::with_capacity(512), attribute_files), + filters: options.filters, + find, + }; + + let chunk::Outcome { + mut collisions, + mut errors, + mut bytes_written, + files: files_updated, + delayed_symlinks, + delayed_paths_unknown, + delayed_paths_unprocessed, + } = if num_threads == 1 { + let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt); + let mut delayed_filter_results = Vec::new(); + let mut out = chunk::process( + entries_with_paths, + num_files.as_deref(), + num_bytes.as_deref(), + &mut delayed_filter_results, + &mut ctx, + )?; + chunk::process_delayed_filter_results( + delayed_filter_results, + num_files.as_deref(), + num_bytes.as_deref(), + &mut out, + &mut ctx, + )?; + out + } else { + let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt); + in_parallel_with_finalize( + gix_features::iter::Chunks { + inner: entries_with_paths, + size: chunk_size, + }, + thread_limit, + { + let ctx = ctx.clone(); + move |_| (Vec::new(), ctx) + }, + |chunk, (delayed_filter_results, ctx)| { + chunk::process( + chunk.into_iter(), + num_files.as_deref(), + num_bytes.as_deref(), + delayed_filter_results, + ctx, + ) + }, + |(delayed_filter_results, mut ctx)| { + let mut out = chunk::Outcome::default(); + chunk::process_delayed_filter_results( + delayed_filter_results, + num_files.as_deref(), + num_bytes.as_deref(), + &mut out, + &mut ctx, + )?; + Ok(out) + }, + chunk::Reduce { + files: num_files.is_none().then_some(files), + bytes: num_bytes.is_none().then_some(bytes), + aggregate: Default::default(), + marker: Default::default(), + }, + )? + }; + + for (entry, entry_path) in delayed_symlinks { + bytes_written += chunk::checkout_entry_handle_result( + entry, + entry_path, + &mut errors, + &mut collisions, + num_files.as_deref(), + num_bytes.as_deref(), + &mut ctx, + )? + .as_bytes() + .expect("only symlinks are delayed here, they are never filtered (or delayed again)") + as u64; + } + + Ok(crate::checkout::Outcome { + files_updated, + collisions, + errors, + bytes_written, + delayed_paths_unknown, + delayed_paths_unprocessed, + }) +} diff --git a/gix-worktree-state/src/checkout/mod.rs b/gix-worktree-state/src/checkout/mod.rs new file mode 100644 index 00000000000..e01f7bff74a --- /dev/null +++ b/gix-worktree-state/src/checkout/mod.rs @@ -0,0 +1,101 @@ +use bstr::BString; +use gix_index::entry::stat; + +/// Information about a path that failed to checkout as something else was already present. +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Collision { + /// the path that collided with something already present on disk. + pub path: BString, + /// The io error we encountered when checking out `path`. + pub error_kind: std::io::ErrorKind, +} + +/// A path that encountered an IO error. +pub struct ErrorRecord { + /// the path that encountered the error. + pub path: BString, + /// The error + pub error: Box, +} + +/// The outcome of checking out an entire index. +#[derive(Default)] +pub struct Outcome { + /// The amount of files updated, or created. + pub files_updated: usize, + /// The amount of bytes written to disk, + pub bytes_written: u64, + /// The encountered collisions, which can happen on a case-insensitive filesystem. + pub collisions: Vec, + /// Other errors that happened during checkout. + pub errors: Vec, + /// Relative paths that the process listed as 'delayed' even though we never passed them. + pub delayed_paths_unknown: Vec, + /// All paths that were left unprocessed, because they were never listed by the process even though we passed them. + pub delayed_paths_unprocessed: Vec, +} + +/// Options to further configure the checkout operation. +#[derive(Clone, Default)] +pub struct Options { + /// capabilities of the file system + pub fs: gix_fs::Capabilities, + /// If set, don't use more than this amount of threads. + /// Otherwise, usually use as many threads as there are logical cores. + /// A value of 0 is interpreted as no-limit + pub thread_limit: Option, + /// If true, we assume no file to exist in the target directory, and want exclusive access to it. + /// This should be enabled when cloning to avoid checks for freshness of files. This also enables + /// detection of collisions based on whether or not exclusive file creation succeeds or fails. + pub destination_is_initially_empty: bool, + /// If true, default false, worktree entries on disk will be overwritten with content from the index + /// even if they appear to be changed. When creating directories that clash with existing worktree entries, + /// these will try to delete the existing entry. + /// This is similar in behaviour as `git checkout --force`. + pub overwrite_existing: bool, + /// If true, default false, try to checkout as much as possible and don't abort on first error which isn't + /// due to a conflict. + /// The checkout operation will never fail, but count the encountered errors instead along with their paths. + pub keep_going: bool, + /// Control how stat comparisons are made when checking if a file is fresh. + pub stat_options: stat::Options, + /// A stack of attributes to use with the filesystem cache to use as driver for filters. + pub attributes: gix_worktree::cache::state::Attributes, + /// The filter pipeline to use for applying mandatory filters before writing to the worktree. + pub filters: gix_filter::Pipeline, + /// Control how long-running processes may use the 'delay' capability. + pub filter_process_delay: gix_filter::driver::apply::Delay, +} + +/// The error returned by the [checkout()][crate::checkout()] function. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Could not convert path to UTF8: {}", .path)] + IllformedUtf8 { path: BString }, + #[error("The clock was off when reading file related metadata after updating a file on disk")] + Time(#[from] std::time::SystemTimeError), + #[error("IO error while writing blob or reading file metadata or changing filetype")] + Io(#[from] std::io::Error), + #[error("object {} for checkout at {} could not be retrieved from object database", .oid.to_hex(), .path.display())] + Find { + #[source] + err: E, + oid: gix_hash::ObjectId, + path: std::path::PathBuf, + }, + #[error(transparent)] + Filter(#[from] gix_filter::pipeline::convert::to_worktree::Error), + #[error(transparent)] + FilterListDelayed(#[from] gix_filter::driver::delayed::list::Error), + #[error(transparent)] + FilterFetchDelayed(#[from] gix_filter::driver::delayed::fetch::Error), + #[error("The entry at path '{rela_path}' was listed as delayed by the filter process, but we never passed it")] + FilterPathUnknown { rela_path: BString }, + #[error("The following paths were delayed and apparently forgotten to be processed by the filter driver: ")] + FilterPathsUnprocessed { rela_paths: Vec }, +} + +mod chunk; +mod entry; +pub(crate) mod function; diff --git a/gix-worktree-state/src/lib.rs b/gix-worktree-state/src/lib.rs new file mode 100644 index 00000000000..2c2cf67f64d --- /dev/null +++ b/gix-worktree-state/src/lib.rs @@ -0,0 +1,6 @@ +//! A crate to help setting the worktree to a particular state. +#![deny(missing_docs, rust_2018_idioms, unsafe_code)] + +/// +pub mod checkout; +pub use checkout::function::checkout; diff --git a/gix-worktree-state/tests/fixtures/generated-archives/.gitignore b/gix-worktree-state/tests/fixtures/generated-archives/.gitignore new file mode 100644 index 00000000000..ea20ce61c44 --- /dev/null +++ b/gix-worktree-state/tests/fixtures/generated-archives/.gitignore @@ -0,0 +1,5 @@ +make_ignore_and_attributes_setup.tar.xz +make_mixed_without_submodules.tar.xz +make_mixed_without_submodules_and_symlinks.tar.xz +make_attributes_baseline.tar.xz +make_dangerous_symlink.tar.xz diff --git a/gix-worktree-state/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz b/gix-worktree-state/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz new file mode 100644 index 00000000000..6e4ed4be056 --- /dev/null +++ b/gix-worktree-state/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2984e2e61b28635014165351cd872ea7e4f09c28b8b4bbe50692a465ef648033 +size 10616 diff --git a/gix-worktree-state/tests/fixtures/make_dangerous_symlink.sh b/gix-worktree-state/tests/fixtures/make_dangerous_symlink.sh new file mode 100755 index 00000000000..31437285a37 --- /dev/null +++ b/gix-worktree-state/tests/fixtures/make_dangerous_symlink.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +# Every symlink is dangerous as it might either link to another directory and thus redirect +# all writes in the path, or it might point to a file and opening the symlink actually opens +# the target. +# We handle this by either validating symlinks specifically or create symlinks +empty_oid=$(git hash-object -w --stdin .gitattributes +git add -A + +git update-index --index-info <<-EOF +100644 $content_oid FILE_X +100644 $content_oid FILE_x +100644 $content_oid file_X +100644 $content_oid file_x +100644 $empty_oid D/B +100644 $empty_oid D/C +100644 $empty_oid d +100644 $empty_oid X +120000 $symlink_target x +120000 $symlink_target link-to-X +EOF + +git commit -m "init" +git checkout -f HEAD; diff --git a/gix-worktree-state/tests/fixtures/make_mixed_without_submodules.sh b/gix-worktree-state/tests/fixtures/make_mixed_without_submodules.sh new file mode 100755 index 00000000000..43fafbad944 --- /dev/null +++ b/gix-worktree-state/tests/fixtures/make_mixed_without_submodules.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +touch empty +echo -n "content" > executable +chmod +x executable + +mkdir dir +echo "other content" > dir/content +echo "* filter=arrow" > .gitattributes +echo "executable -filter" >> .gitattributes +echo ".gitattributes -filter" >> .gitattributes + +mkdir dir/sub-dir +(cd dir/sub-dir && ln -sf ../content symlink) + +git add -A +git commit -m "Commit" diff --git a/gix-worktree-state/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh b/gix-worktree-state/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh new file mode 100755 index 00000000000..0e0e95ae301 --- /dev/null +++ b/gix-worktree-state/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -eu -o pipefail + +git init -q + +touch empty +echo -n "content" > executable +chmod +x executable + +mkdir dir +echo "other content" > dir/content +echo "* filter=arrow" > .gitattributes +echo "executable -filter" >> .gitattributes +echo ".gitattributes -filter" >> .gitattributes + +mkdir dir/sub-dir +echo "even other content" > dir/sub-dir/file + +git add -A +git commit -m "Commit" diff --git a/gix-worktree-state/tests/state-multi-threaded.rs b/gix-worktree-state/tests/state-multi-threaded.rs new file mode 100644 index 00000000000..41474d7ef08 --- /dev/null +++ b/gix-worktree-state/tests/state-multi-threaded.rs @@ -0,0 +1,4 @@ +#[cfg(feature = "internal-testing-gix-features-parallel")] +mod state; +#[cfg(feature = "internal-testing-gix-features-parallel")] +use state::*; diff --git a/gix-worktree-state/tests/state-single-threaded.rs b/gix-worktree-state/tests/state-single-threaded.rs new file mode 100644 index 00000000000..3f4e78811e2 --- /dev/null +++ b/gix-worktree-state/tests/state-single-threaded.rs @@ -0,0 +1,4 @@ +#[cfg(not(feature = "internal-testing-gix-features-parallel"))] +mod state; +#[cfg(not(feature = "internal-testing-gix-features-parallel"))] +use state::*; diff --git a/gix-worktree-state/tests/state/checkout.rs b/gix-worktree-state/tests/state/checkout.rs new file mode 100644 index 00000000000..52e453dd23e --- /dev/null +++ b/gix-worktree-state/tests/state/checkout.rs @@ -0,0 +1,526 @@ +#[cfg(unix)] +use std::os::unix::prelude::MetadataExt; +use std::{ + fs, + io::{ErrorKind, ErrorKind::AlreadyExists}, + path::{Path, PathBuf}, + sync::atomic::{AtomicBool, AtomicUsize, Ordering}, +}; + +use gix_features::progress; +use gix_object::bstr::ByteSlice; +use gix_odb::FindExt; +use gix_testtools::tempfile::TempDir; +use gix_worktree::checkout::Collision; +use once_cell::sync::Lazy; + +use crate::fixture_path; + +static DRIVER: Lazy = Lazy::new(|| { + let mut cargo = std::process::Command::new(env!("CARGO")); + let res = cargo + .args(["build", "-p=gix-filter", "--example", "arrow"]) + .status() + .expect("cargo should run fine"); + assert!(res.success(), "cargo invocation should be successful"); + + let path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")) + .ancestors() + .nth(1) + .expect("first parent in target dir") + .join("debug") + .join("examples") + .join(if cfg!(windows) { "arrow.exe" } else { "arrow" }); + assert!(path.is_file(), "Expecting driver to be located at {path:?}"); + path +}); + +fn driver_exe() -> String { + let mut exe = DRIVER.to_string_lossy().into_owned(); + if cfg!(windows) { + exe = exe.replace('\\', "/"); + } + exe +} + +#[test] +fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden() { + let mut opts = opts_from_probe(); + // without overwrite mode, everything is safe. + opts.overwrite_existing = false; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts.clone(), "make_dangerous_symlink").unwrap(); + + let source_files = dir_structure(&source_tree); + let worktree_files = dir_structure(&destination); + + if opts.fs.ignore_case { + assert_eq!( + stripped_prefix(&source_tree, &source_files), + stripped_prefix(&destination, &worktree_files), + ); + if multi_threaded() { + assert_eq!(outcome.collisions.len(), 2); + } else { + assert_eq!( + outcome.collisions, + vec![ + Collision { + path: "FAKE-DIR".into(), + error_kind: AlreadyExists + }, + Collision { + path: "FAKE-FILE".into(), + error_kind: AlreadyExists + } + ] + ); + } + } else { + let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; + assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); + assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); + assert!(outcome.collisions.is_empty()); + }; +} + +#[test] +fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { + let mut opts = opts_from_probe(); + // with overwrite mode + opts.overwrite_existing = true; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts.clone(), "make_dangerous_symlink").unwrap(); + + let source_files = dir_structure(&source_tree); + let worktree_files = dir_structure(&destination); + + if opts.fs.ignore_case { + assert_eq!( + stripped_prefix(&source_tree, &source_files), + paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]), + ); + assert_eq!( + stripped_prefix(&destination, &worktree_files), + paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE"]), + ); + assert!(outcome.collisions.is_empty()); + } else { + let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; + assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); + assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); + assert!(outcome.collisions.is_empty()); + }; +} + +#[test] +fn delayed_driver_process() -> crate::Result { + let mut opts = opts_from_probe(); + opts.overwrite_existing = true; + opts.filter_process_delay = gix_filter::driver::apply::Delay::Allow; + opts.destination_is_initially_empty = false; + setup_filter_pipeline(opts.filters.options_mut()); + let (_source, destination, _index, outcome) = + checkout_index_in_tmp_dir_opts(opts, "make_mixed_without_submodules_and_symlinks", |_| true, |_| Ok(()))?; + assert_eq!(outcome.collisions.len(), 0); + assert_eq!(outcome.errors.len(), 0); + assert_eq!(outcome.files_updated, 5); + + let dest = destination.path(); + assert_eq!( + std::fs::read(dest.join("executable"))?.as_bstr(), + "content", + "unfiltered" + ); + assert_eq!( + std::fs::read(dest.join("dir").join("content"))?.as_bstr(), + "➡other content\r\n" + ); + assert_eq!( + std::fs::read(dest.join("dir").join("sub-dir").join("file"))?.as_bstr(), + "➡even other content\r\n" + ); + Ok(()) +} + +#[test] +#[cfg_attr( + windows, + ignore = "on windows, the symlink to a directory doesn't seem to work and we really want to test with symlinks" +)] +fn overwriting_files_and_lone_directories_works() -> crate::Result { + for delay in [ + gix_filter::driver::apply::Delay::Allow, + gix_filter::driver::apply::Delay::Forbid, + ] { + let mut opts = opts_from_probe(); + opts.overwrite_existing = true; + opts.filter_process_delay = delay; + opts.destination_is_initially_empty = false; + setup_filter_pipeline(opts.filters.options_mut()); + let (source, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( + opts.clone(), + "make_mixed_without_submodules", + |_| true, + |d| { + let empty = d.join("empty"); + symlink::symlink_dir(d.join(".."), &empty)?; // empty is symlink to the directory above + std::fs::write(d.join("executable"), b"foo")?; // executable is regular file and has different content + let dir = d.join("dir"); + std::fs::create_dir(&dir)?; + std::fs::create_dir(dir.join("content"))?; // 'content' is a directory now + + let dir = dir.join("sub-dir"); + std::fs::create_dir(&dir)?; + + symlink::symlink_dir(empty, dir.join("symlink"))?; // 'symlink' is a symlink to another file + Ok(()) + }, + )?; + + assert!(outcome.collisions.is_empty()); + + assert_eq!( + stripped_prefix(&destination, &dir_structure(&destination)), + paths(["dir/content", "dir/sub-dir/symlink", "empty", "executable"]) + ); + let meta = std::fs::symlink_metadata(destination.path().join("empty"))?; + assert!(meta.is_file(), "'empty' is now a file"); + assert_eq!(meta.len(), 0, "'empty' is indeed empty"); + + let exe = destination.path().join("executable"); + assert_eq!(std::fs::read(&exe)?, b"content", "'exe' has the correct content"); + + let meta = std::fs::symlink_metadata(exe)?; + assert!(meta.is_file()); + if opts.fs.executable_bit { + #[cfg(unix)] + assert_eq!(meta.mode() & 0o700, 0o700, "the executable bit is set where supported"); + } + + assert_eq!( + std::fs::read(source.join("dir/content"))?.as_bstr(), + "other content\n", + "in the worktree, we have LF" + ); + assert_eq!( + std::fs::read(destination.path().join("dir/content"))?.as_bstr(), + "➡other content\r\n", + "autocrlf is enabled, so we get CRLF when checking out as the pipeline is active, and we have a filter" + ); + + let symlink = destination.path().join("dir/sub-dir/symlink"); + // on windows, git won't create symlinks as its probe won't detect the capability, even though we do. + assert_eq!(std::fs::symlink_metadata(&symlink)?.is_symlink(), cfg!(unix)); + assert_eq!( + std::fs::read(symlink)?.as_bstr(), + "➡other content\r\n", + "autocrlf is enabled" + ); + } + Ok(()) +} + +#[test] +fn symlinks_become_files_if_disabled() -> crate::Result { + let mut opts = opts_from_probe(); + opts.fs.symlink = false; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts.clone(), "make_mixed_without_submodules")?; + + assert_equality(&source_tree, &destination, opts.fs.symlink)?; + assert!(outcome.collisions.is_empty()); + Ok(()) +} + +#[test] +fn allow_or_disallow_symlinks() -> crate::Result { + let mut opts = opts_from_probe(); + for allowed in &[false, true] { + opts.fs.symlink = *allowed; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts.clone(), "make_mixed_without_submodules")?; + + assert_equality(&source_tree, &destination, opts.fs.symlink)?; + assert!(outcome.collisions.is_empty()); + } + Ok(()) +} + +#[test] +fn keep_going_collects_results() { + let mut opts = opts_from_probe(); + opts.keep_going = true; + let count = AtomicUsize::default(); + let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( + opts, + "make_mixed_without_submodules", + |_id| { + !matches!( + count.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { + (current < 2).then_some(current + 1) + }), + Ok(_) + ) + }, + |_| Ok(()), + ) + .unwrap(); + + if multi_threaded() { + assert_eq!( + outcome.errors.len(), + 2, + "content changes due to non-deterministic nature of racy threads" + ) + } else { + assert_eq!( + outcome + .errors + .iter() + .map(|r| r.path.to_path_lossy().into_owned()) + .collect::>(), + paths(if cfg!(unix) { + [".gitattributes", "dir/content"] + } else { + // not actually a symlink anymore, even though symlinks are supported but git think differently. + ["dir/content", "dir/sub-dir/symlink"] + }) + ); + } + + if multi_threaded() { + let actual = dir_structure(&destination); + assert!( + (2..=3).contains(&actual.len()), + "it's 3 most of the time, but can be 2 of the 'empty' file is missing as the object couldn't be accessed.\ + It's unclear why there isn't more, as it would keep going" + ); + } else { + assert_eq!( + stripped_prefix(&destination, &dir_structure(&destination)), + paths(if cfg!(unix) { + Box::new(["dir/sub-dir/symlink", "empty", "executable"].into_iter()) as Box> + } else { + Box::new(["empty", "executable"].into_iter()) + }), + "some files could not be created" + ); + } + + assert!(outcome.collisions.is_empty()); +} + +#[test] +fn no_case_related_collisions_on_case_sensitive_filesystem() { + let opts = opts_from_probe(); + if opts.fs.ignore_case { + eprintln!("Skipping case-sensitive testing on what would be a case-insensitive file system"); + return; + } + let (source_tree, destination, index, outcome) = + checkout_index_in_tmp_dir(opts.clone(), "make_ignorecase_collisions").unwrap(); + + assert!(outcome.collisions.is_empty()); + let num_files = assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap(); + assert_eq!( + num_files, + index.entries().len() - 1, + "it checks out all files (minus 1 to account for .gitattributes which is skipped in the worktree in our tests)" + ); + assert!( + destination.path().join(".gitattributes").is_file(), + "we do have attributes even though, dot files are ignored in `assert_equality`" + ); +} + +#[test] +fn collisions_are_detected_on_a_case_insensitive_filesystem_even_with_delayed_filters() { + let mut opts = opts_from_probe(); + if !opts.fs.ignore_case { + eprintln!("Skipping case-insensitive testing on what would be a case-sensitive file system"); + return; + } + setup_filter_pipeline(opts.filters.options_mut()); + opts.filter_process_delay = gix_filter::driver::apply::Delay::Allow; + let (source_tree, destination, _index, outcome) = + checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); + + let source_files = dir_structure(&source_tree); + assert_eq!( + stripped_prefix(&source_tree, &source_files), + paths(["d", "file_x", "link-to-X", "x"]), + "plenty of collisions prevent a checkout" + ); + + let dest_files = dir_structure(&destination); + if multi_threaded() { + assert!( + (4..=6).contains(&dest_files.len()), + "due to the clash happening at nearly any time, and keep-going is false, we get a variance of files" + ); + } else { + assert_eq!( + stripped_prefix(&destination, &dest_files), + paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), + "we checkout files in order and generally handle collision detection differently, hence the difference" + ); + } + + let error_kind = ErrorKind::AlreadyExists; + #[cfg(windows)] + let error_kind_dir = ErrorKind::PermissionDenied; + #[cfg(not(windows))] + let error_kind_dir = error_kind; + + if multi_threaded() { + assert!( + (5..=6).contains(&outcome.collisions.len()), + "can only assert on number as it's racily creating files so unclear which one clashes, and due to keep-going = false there is variance" + ); + } else { + assert_eq!( + outcome.collisions, + vec![ + Collision { + path: "d".into(), + error_kind: error_kind_dir, + }, + Collision { + path: "FILE_x".into(), + error_kind, + }, + Collision { + path: "file_X".into(), + error_kind, + }, + Collision { + path: "file_x".into(), + error_kind, + }, + Collision { + path: "x".into(), + error_kind, + }, + ], + "these files couldn't be checked out" + ); + } +} + +fn multi_threaded() -> bool { + gix_features::parallel::num_threads(None) > 1 +} + +fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result { + let source_files = dir_structure(source_tree); + let worktree_files = dir_structure(destination); + + assert_eq!( + stripped_prefix(source_tree, &source_files), + stripped_prefix(destination, &worktree_files), + ); + + let mut count = 0; + for (source_file, worktree_file) in source_files.iter().zip(worktree_files.iter()) { + count += 1; + if !allow_symlinks && source_file.is_symlink() { + assert!(!worktree_file.is_symlink()); + assert_eq!(fs::read(worktree_file)?.to_path()?, fs::read_link(source_file)?); + } else { + assert_eq!(fs::read(source_file)?, fs::read(worktree_file)?); + #[cfg(unix)] + assert_eq!( + fs::symlink_metadata(source_file)?.mode() & 0o700, + fs::symlink_metadata(worktree_file)?.mode() & 0o700, + "permissions of source and checked out file are comparable" + ); + } + } + Ok(count) +} + +pub fn dir_structure>(path: P) -> Vec { + let path = path.as_ref(); + let mut files: Vec<_> = walkdir::WalkDir::new(path) + .follow_links(false) + .into_iter() + .filter_entry(|e| e.path() == path || !e.file_name().to_string_lossy().starts_with('.')) + .flatten() + .filter_map(|e| (!e.path().symlink_metadata().map_or(true, |m| m.is_dir())).then(|| e.path().to_path_buf())) + .collect(); + files.sort(); + files +} + +fn checkout_index_in_tmp_dir( + opts: gix_worktree::checkout::Options, + name: &str, +) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree::checkout::Outcome)> { + checkout_index_in_tmp_dir_opts(opts, name, |_d| true, |_| Ok(())) +} + +fn checkout_index_in_tmp_dir_opts( + opts: gix_worktree::checkout::Options, + name: &str, + mut allow_return_object: impl FnMut(&gix_hash::oid) -> bool + Send + Clone, + prep_dest: impl Fn(&Path) -> std::io::Result<()>, +) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree::checkout::Outcome)> { + let source_tree = fixture_path(name); + let git_dir = source_tree.join(".git"); + let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default())?; + let odb = gix_odb::at(git_dir.join("objects"))?.into_inner().into_arc()?; + let destination = gix_testtools::tempfile::tempdir_in(std::env::current_dir()?)?; + prep_dest(destination.path()).expect("preparation must succeed"); + + let outcome = gix_worktree::checkout( + &mut index, + destination.path(), + move |oid, buf| { + if allow_return_object(oid) { + odb.find_blob(oid, buf) + } else { + Err(gix_odb::find::existing_object::Error::NotFound { oid: oid.to_owned() }) + } + }, + &mut progress::Discard, + &mut progress::Discard, + &AtomicBool::default(), + opts, + )?; + Ok((source_tree, destination, index, outcome)) +} + +fn stripped_prefix(prefix: impl AsRef, source_files: &[PathBuf]) -> Vec<&Path> { + source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect() +} + +fn probe_gitoxide_dir() -> crate::Result { + Ok(gix_fs::Capabilities::probe( + std::env::current_dir()?.join("..").join(".git"), + )) +} + +fn opts_from_probe() -> gix_worktree::checkout::Options { + gix_worktree::checkout::Options { + fs: probe_gitoxide_dir().unwrap(), + destination_is_initially_empty: true, + thread_limit: gix_features::parallel::num_threads(None).into(), + ..Default::default() + } +} + +fn paths<'a>(p: impl IntoIterator) -> Vec { + p.into_iter().map(PathBuf::from).collect() +} + +fn setup_filter_pipeline(opts: &mut gix_filter::pipeline::Options) { + opts.eol_config.auto_crlf = gix_filter::eol::AutoCrlf::Enabled; + opts.drivers = vec![gix_filter::Driver { + name: "arrow".into(), + clean: None, + smudge: None, + process: Some((driver_exe() + " process").into()), + required: true, + }]; +} diff --git a/gix-worktree-state/tests/state/mod.rs b/gix-worktree-state/tests/state/mod.rs new file mode 100644 index 00000000000..82da2b9adf1 --- /dev/null +++ b/gix-worktree-state/tests/state/mod.rs @@ -0,0 +1,10 @@ +mod checkout; + +use std::path::{Path, PathBuf}; + +pub type Result = std::result::Result>; + +pub fn fixture_path(name: &str) -> PathBuf { + let dir = gix_testtools::scripted_fixture_read_only(Path::new(name).with_extension("sh")).expect("script works"); + dir +} diff --git a/justfile b/justfile index a6e1873b235..961bfd9f482 100755 --- a/justfile +++ b/justfile @@ -139,6 +139,10 @@ unit-tests: set -ex; \ cargo test; \ cargo test --features "internal-testing-gix-features-parallel" + cd gix-worktree-state; \ + set -ex; \ + cargo test; \ + cargo test --features "internal-testing-gix-features-parallel" cd gix-object; \ set -ex; \ cargo test; \ From 1958dffc164e7b60ddc2eb308ed6da74a80559df Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 19 Aug 2023 16:07:21 +0200 Subject: [PATCH 5/7] chore!: remove `checkout` and `status` functionality so only `Cache` remains. `gix-worktree` is now a base-crate for use by derived crates that provide additional functionality. Shared types or utilities go here. --- Cargo.lock | 7 - crate-status.md | 20 +- gix-worktree/Cargo.toml | 10 +- gix-worktree/src/checkout/chunk.rs | 402 ------------- gix-worktree/src/checkout/entry.rs | 294 ---------- gix-worktree/src/checkout/function.rs | 168 ------ gix-worktree/src/checkout/mod.rs | 101 ---- gix-worktree/src/lib.rs | 11 +- gix-worktree/src/read.rs | 64 --- gix-worktree/src/status/content.rs | 80 --- gix-worktree/src/status/function.rs | 331 ----------- gix-worktree/src/status/mod.rs | 11 - gix-worktree/src/status/recorder.rs | 27 - gix-worktree/src/status/types.rs | 69 --- gix-worktree/src/untracked.rs | 1 - .../fixtures/generated-archives/.gitignore | 5 - .../make_ignorecase_collisions.tar.xz | 3 - .../make_special_exclude_case.tar.xz | 4 +- .../generated-archives/racy_git.tar.xz | 3 - .../generated-archives/status_conflict.tar.xz | 3 - .../status_intent_to_add.tar.xz | 3 - .../generated-archives/status_removed.tar.xz | 3 - .../tests/fixtures/make_dangerous_symlink.sh | 24 - .../tests/fixtures/make_ignore_setup.sh | 18 - .../fixtures/make_ignorecase_collisions.sh | 27 - .../fixtures/make_mixed_without_submodules.sh | 20 - ...e_mixed_without_submodules_and_symlinks.sh | 20 - gix-worktree/tests/fixtures/racy_git.sh | 12 - gix-worktree/tests/fixtures/status_changed.sh | 26 - .../tests/fixtures/status_conflict.sh | 18 - .../tests/fixtures/status_intent_to_add.sh | 9 - gix-worktree/tests/fixtures/status_removed.sh | 18 - .../tests/fixtures/status_unchanged.sh | 20 - .../tests/worktree/cache/create_directory.rs | 2 +- gix-worktree/tests/worktree/checkout.rs | 526 ------------------ gix-worktree/tests/worktree/mod.rs | 9 - gix-worktree/tests/worktree/status.rs | 226 -------- justfile | 4 - 38 files changed, 6 insertions(+), 2593 deletions(-) delete mode 100644 gix-worktree/src/checkout/chunk.rs delete mode 100644 gix-worktree/src/checkout/entry.rs delete mode 100644 gix-worktree/src/checkout/function.rs delete mode 100644 gix-worktree/src/checkout/mod.rs delete mode 100644 gix-worktree/src/read.rs delete mode 100644 gix-worktree/src/status/content.rs delete mode 100644 gix-worktree/src/status/function.rs delete mode 100644 gix-worktree/src/status/mod.rs delete mode 100644 gix-worktree/src/status/recorder.rs delete mode 100644 gix-worktree/src/status/types.rs delete mode 100644 gix-worktree/src/untracked.rs delete mode 100644 gix-worktree/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz delete mode 100644 gix-worktree/tests/fixtures/generated-archives/racy_git.tar.xz delete mode 100644 gix-worktree/tests/fixtures/generated-archives/status_conflict.tar.xz delete mode 100644 gix-worktree/tests/fixtures/generated-archives/status_intent_to_add.tar.xz delete mode 100644 gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz delete mode 100755 gix-worktree/tests/fixtures/make_dangerous_symlink.sh delete mode 100755 gix-worktree/tests/fixtures/make_ignore_setup.sh delete mode 100755 gix-worktree/tests/fixtures/make_ignorecase_collisions.sh delete mode 100755 gix-worktree/tests/fixtures/make_mixed_without_submodules.sh delete mode 100755 gix-worktree/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh delete mode 100755 gix-worktree/tests/fixtures/racy_git.sh delete mode 100755 gix-worktree/tests/fixtures/status_changed.sh delete mode 100755 gix-worktree/tests/fixtures/status_conflict.sh delete mode 100755 gix-worktree/tests/fixtures/status_intent_to_add.sh delete mode 100755 gix-worktree/tests/fixtures/status_removed.sh delete mode 100755 gix-worktree/tests/fixtures/status_unchanged.sh delete mode 100644 gix-worktree/tests/worktree/checkout.rs delete mode 100644 gix-worktree/tests/worktree/status.rs diff --git a/Cargo.lock b/Cargo.lock index d6398414be2..bc8cb1be423 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2466,10 +2466,8 @@ version = "0.24.0" dependencies = [ "bstr", "document-features", - "filetime", "gix-attributes 0.16.0", "gix-features 0.32.1", - "gix-filter", "gix-fs 0.4.1", "gix-glob 0.10.2", "gix-hash 0.11.4", @@ -2479,13 +2477,8 @@ dependencies = [ "gix-odb", "gix-path 0.8.4", "gix-testtools", - "io-close", - "once_cell", "serde", "symlink", - "tempfile", - "thiserror", - "walkdir", ] [[package]] diff --git a/crate-status.md b/crate-status.md index a0ca1892fc7..3079e063507 100644 --- a/crate-status.md +++ b/crate-status.md @@ -482,25 +482,7 @@ Make it the best-performing implementation and the most convenient one. * [x] attributes ### gix-worktree -* handle the working **tree/checkout** - - [x] checkout an index of files, executables and symlinks just as fast as git - - [x] forbid symlinks in directories - - [ ] handle submodules - - [ ] handle sparse directories - - [ ] handle sparse index - - [x] linear scaling with multi-threading up to IO saturation - - supported attributes to affect working tree and index contents - - [x] eol - - [x] working-tree-encoding - - …more - - **filtering** - - [x] `text` - - [x] `ident` - - [x] filter processes - - [x] single-invocation clean/smudge filters -* access to per-path information, like `.gitignore` and `.gitattributes` in a manner well suited for efficient lookups - * [x] _exclude_ information - * [x] attributes +* [x] A stack to to efficiently generate attribute lists for matching paths against. ### gix-revision * [x] `describe()` (similar to `git name-rev`) diff --git a/gix-worktree/Cargo.toml b/gix-worktree/Cargo.toml index fbcb08fa1c5..2796485c024 100644 --- a/gix-worktree/Cargo.toml +++ b/gix-worktree/Cargo.toml @@ -3,7 +3,7 @@ name = "gix-worktree" version = "0.24.0" repository = "https://github.com/Byron/gitoxide" license = "MIT OR Apache-2.0" -description = "A crate of the gitoxide project dedicated implementing everything around working trees and git excludes" +description = "A crate of the gitoxide project for shared worktree related types and utilities." authors = ["Sebastian Thiel "] edition = "2021" include = ["src/**/*", "LICENSE-*", "CHANGELOG.md"] @@ -33,25 +33,17 @@ gix-path = { version = "^0.8.4", path = "../gix-path" } gix-attributes = { version = "^0.16.0", path = "../gix-attributes" } gix-ignore = { version = "^0.5.1", path = "../gix-ignore" } gix-features = { version = "^0.32.1", path = "../gix-features" } -gix-filter = { version = "^0.3.0", path = "../gix-filter" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} -thiserror = "1.0.26" -filetime = "0.2.15" bstr = { version = "1.3.0", default-features = false } document-features = { version = "0.2.0", optional = true } -io-close = "0.3.7" [dev-dependencies] gix-testtools = { path = "../tests/tools" } gix-odb = { path = "../gix-odb" } symlink = "0.1.0" -once_cell = "1.18.0" - -walkdir = "2.3.2" -tempfile = "3.2.0" [package.metadata.docs.rs] features = ["document-features", "serde"] diff --git a/gix-worktree/src/checkout/chunk.rs b/gix-worktree/src/checkout/chunk.rs deleted file mode 100644 index 13110e90004..00000000000 --- a/gix-worktree/src/checkout/chunk.rs +++ /dev/null @@ -1,402 +0,0 @@ -use std::{ - collections::BTreeSet, - sync::atomic::{AtomicUsize, Ordering}, -}; - -use bstr::{BStr, BString}; -use gix_hash::oid; - -use crate::{checkout, checkout::entry, Cache}; - -mod reduce { - use std::marker::PhantomData; - - use gix_features::progress::Progress; - - use crate::checkout; - - pub struct Reduce<'a, 'entry, P1, P2, E> { - pub files: Option<&'a mut P1>, - pub bytes: Option<&'a mut P2>, - pub aggregate: super::Outcome<'entry>, - pub marker: PhantomData, - } - - impl<'a, 'entry, P1, P2, E> gix_features::parallel::Reduce for Reduce<'a, 'entry, P1, P2, E> - where - P1: Progress, - P2: Progress, - E: std::error::Error + Send + Sync + 'static, - { - type Input = Result, checkout::Error>; - type FeedProduce = (); - type Output = super::Outcome<'entry>; - type Error = checkout::Error; - - fn feed(&mut self, item: Self::Input) -> Result { - let item = item?; - let super::Outcome { - bytes_written, - files, - delayed_symlinks, - errors, - collisions, - delayed_paths_unknown, - delayed_paths_unprocessed, - } = item; - self.aggregate.bytes_written += bytes_written; - self.aggregate.files += files; - self.aggregate.delayed_symlinks.extend(delayed_symlinks); - self.aggregate.errors.extend(errors); - self.aggregate.collisions.extend(collisions); - self.aggregate.delayed_paths_unknown.extend(delayed_paths_unknown); - self.aggregate - .delayed_paths_unprocessed - .extend(delayed_paths_unprocessed); - - if let Some(progress) = self.bytes.as_deref_mut() { - progress.set(self.aggregate.bytes_written as gix_features::progress::Step); - } - if let Some(progress) = self.files.as_deref_mut() { - progress.set(self.aggregate.files); - } - - Ok(()) - } - - fn finalize(self) -> Result { - Ok(self.aggregate) - } - } -} -pub use reduce::Reduce; - -use crate::checkout::entry::DelayedFilteredStream; - -#[derive(Default)] -pub struct Outcome<'a> { - pub collisions: Vec, - pub errors: Vec, - pub delayed_symlinks: Vec<(&'a mut gix_index::Entry, &'a BStr)>, - // all (immediately) written bytes - pub bytes_written: u64, - // the amount of files we processed - pub files: usize, - /// Relative paths that the process listed as 'delayed' even though we never passed them. - pub delayed_paths_unknown: Vec, - /// All paths that were left unprocessed, because they were never listed by the process even though we passed them. - pub delayed_paths_unprocessed: Vec, -} - -#[derive(Clone)] -pub struct Context { - pub find: Find, - pub path_cache: Cache, - pub filters: gix_filter::Pipeline, - pub buf: Vec, - pub options: Options, -} - -#[derive(Clone, Copy)] -pub struct Options { - pub fs: gix_fs::Capabilities, - pub destination_is_initially_empty: bool, - pub overwrite_existing: bool, - pub keep_going: bool, - pub filter_process_delay: gix_filter::driver::apply::Delay, -} - -impl From<&checkout::Options> for Options { - fn from(opts: &checkout::Options) -> Self { - Options { - fs: opts.fs, - destination_is_initially_empty: opts.destination_is_initially_empty, - overwrite_existing: opts.overwrite_existing, - keep_going: opts.keep_going, - filter_process_delay: opts.filter_process_delay, - } - } -} - -pub fn process<'entry, Find, E>( - entries_with_paths: impl Iterator, - files: Option<&AtomicUsize>, - bytes: Option<&AtomicUsize>, - delayed_filter_results: &mut Vec>, - ctx: &mut Context, -) -> Result, checkout::Error> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let mut delayed_symlinks = Vec::new(); - let mut collisions = Vec::new(); - let mut errors = Vec::new(); - let mut bytes_written = 0; - let mut files_in_chunk = 0; - - for (entry, entry_path) in entries_with_paths { - // TODO: write test for that - if entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE) { - if let Some(files) = files { - files.fetch_add(1, Ordering::SeqCst); - } - files_in_chunk += 1; - continue; - } - - // Symlinks always have to be delayed on windows as they have to point to something that exists on creation. - // And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is - // before creating it. - // And to keep things sane, we just do the same on non-windows as well which is similar to what git does and adds some safety - // around writing through symlinks (even though we handle this). - // This also means that we prefer content in files over symlinks in case of collisions, which probably is for the better, too. - if entry.mode == gix_index::entry::Mode::SYMLINK { - delayed_symlinks.push((entry, entry_path)); - continue; - } - - match checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, files, bytes, ctx)? { - entry::Outcome::Written { bytes } => { - bytes_written += bytes as u64; - files_in_chunk += 1 - } - entry::Outcome::Delayed(delayed) => delayed_filter_results.push(delayed), - } - } - - Ok(Outcome { - bytes_written, - files: files_in_chunk, - errors, - collisions, - delayed_symlinks, - delayed_paths_unknown: Vec::new(), - delayed_paths_unprocessed: Vec::new(), - }) -} - -pub fn process_delayed_filter_results( - mut delayed_filter_results: Vec>, - files: Option<&AtomicUsize>, - bytes: Option<&AtomicUsize>, - out: &mut Outcome<'_>, - ctx: &mut Context, -) -> Result<(), checkout::Error> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let Options { - destination_is_initially_empty, - overwrite_existing, - keep_going, - .. - } = ctx.options; - let mut bytes_written = 0; - let mut delayed_files = 0; - // Sort by path for fast lookups - delayed_filter_results.sort_by(|a, b| a.entry_path.cmp(b.entry_path)); - // We process each key and do as the filter process tells us, while collecting data about the overall progress. - let keys: BTreeSet<_> = delayed_filter_results.iter().map(|d| d.key.clone()).collect(); - let mut unknown_paths = Vec::new(); - let mut rela_path_as_path = Default::default(); - for key in keys { - loop { - let rela_paths = ctx.filters.driver_state_mut().list_delayed_paths(&key)?; - if rela_paths.is_empty() { - break; - } - - for rela_path in rela_paths { - let delayed = match delayed_filter_results.binary_search_by(|d| d.entry_path.cmp(rela_path.as_ref())) { - Ok(idx) => &mut delayed_filter_results[idx], - Err(_) => { - if keep_going { - unknown_paths.push(rela_path); - continue; - } else { - return Err(checkout::Error::FilterPathUnknown { rela_path }); - } - } - }; - let mut read = std::io::BufReader::with_capacity( - 512 * 1024, - ctx.filters.driver_state_mut().fetch_delayed( - &key, - rela_path.as_ref(), - gix_filter::driver::Operation::Smudge, - )?, - ); - let (file, set_executable_after_creation) = match entry::open_file( - &std::mem::take(&mut delayed.validated_file_path), // mark it as seen, relevant for `unprocessed_paths` - destination_is_initially_empty, - overwrite_existing, - delayed.needs_executable_bit, - delayed.entry.mode, - ) { - Ok(res) => res, - Err(err) => { - if !is_collision(&err, delayed.entry_path, &mut out.collisions, files) { - handle_error(err, delayed.entry_path, files, &mut out.errors, ctx.options.keep_going)?; - } - std::io::copy(&mut read, &mut std::io::sink())?; - continue; - } - }; - let mut write = WriteWithProgress { - inner: std::io::BufWriter::with_capacity(512 * 1024, file), - progress: bytes, - }; - bytes_written += std::io::copy(&mut read, &mut write)?; - entry::finalize_entry( - delayed.entry, - write.inner.into_inner().map_err(std::io::IntoInnerError::into_error)?, - set_executable_after_creation.then(|| { - rela_path_as_path = gix_path::from_bstr(delayed.entry_path); - rela_path_as_path.as_ref() - }), - )?; - delayed_files += 1; - if let Some(files) = files { - files.fetch_add(1, Ordering::SeqCst); - } - } - } - } - - let unprocessed_paths = delayed_filter_results - .into_iter() - .filter_map(|d| (!d.validated_file_path.as_os_str().is_empty()).then(|| d.entry_path.to_owned())) - .collect(); - - if !keep_going && !unknown_paths.is_empty() { - return Err(checkout::Error::FilterPathsUnprocessed { - rela_paths: unprocessed_paths, - }); - } - - out.delayed_paths_unknown = unknown_paths; - out.delayed_paths_unprocessed = unprocessed_paths; - out.bytes_written += bytes_written; - out.files += delayed_files; - Ok(()) -} - -pub struct WriteWithProgress<'a, T> { - pub inner: T, - pub progress: Option<&'a AtomicUsize>, -} - -impl<'a, T> std::io::Write for WriteWithProgress<'a, T> -where - T: std::io::Write, -{ - fn write(&mut self, buf: &[u8]) -> std::io::Result { - let written = self.inner.write(buf)?; - if let Some(progress) = self.progress { - progress.fetch_add(written as gix_features::progress::Step, Ordering::SeqCst); - } - Ok(written) - } - - fn flush(&mut self) -> std::io::Result<()> { - self.inner.flush() - } -} - -pub fn checkout_entry_handle_result<'entry, Find, E>( - entry: &'entry mut gix_index::Entry, - entry_path: &'entry BStr, - errors: &mut Vec, - collisions: &mut Vec, - files: Option<&AtomicUsize>, - bytes: Option<&AtomicUsize>, - Context { - find, - path_cache, - filters, - buf, - options, - }: &mut Context, -) -> Result, checkout::Error> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let res = entry::checkout( - entry, - entry_path, - entry::Context { - find, - path_cache, - filters, - buf, - }, - *options, - ); - match res { - Ok(out) => { - if let Some(num) = out.as_bytes() { - if let Some(bytes) = bytes { - bytes.fetch_add(num, Ordering::SeqCst); - } - if let Some(files) = files { - files.fetch_add(1, Ordering::SeqCst); - } - } - Ok(out) - } - Err(checkout::Error::Io(err)) if is_collision(&err, entry_path, collisions, files) => { - Ok(entry::Outcome::Written { bytes: 0 }) - } - Err(err) => handle_error(err, entry_path, files, errors, options.keep_going) - .map(|()| entry::Outcome::Written { bytes: 0 }), - } -} - -fn handle_error( - err: E, - entry_path: &BStr, - files: Option<&AtomicUsize>, - errors: &mut Vec, - keep_going: bool, -) -> Result<(), E> -where - E: std::error::Error + Send + Sync + 'static, -{ - if keep_going { - errors.push(checkout::ErrorRecord { - path: entry_path.into(), - error: Box::new(err), - }); - if let Some(files) = files { - files.fetch_add(1, Ordering::SeqCst); - } - Ok(()) - } else { - Err(err) - } -} - -fn is_collision( - err: &std::io::Error, - entry_path: &BStr, - collisions: &mut Vec, - files: Option<&AtomicUsize>, -) -> bool { - if !gix_fs::symlink::is_collision_error(err) { - return false; - } - // We are here because a file existed or was blocked by a directory which shouldn't be possible unless - // we are on a file insensitive file system. - gix_features::trace::error!("{entry_path}: collided ({:?})", err.kind()); - collisions.push(checkout::Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); - if let Some(files) = files { - files.fetch_add(1, Ordering::SeqCst); - } - true -} diff --git a/gix-worktree/src/checkout/entry.rs b/gix-worktree/src/checkout/entry.rs deleted file mode 100644 index 4744f5e16ce..00000000000 --- a/gix-worktree/src/checkout/entry.rs +++ /dev/null @@ -1,294 +0,0 @@ -use std::{ - fs::OpenOptions, - io::Write, - path::{Path, PathBuf}, -}; - -use bstr::BStr; -use gix_filter::{driver::apply::MaybeDelayed, pipeline::convert::ToWorktreeOutcome}; -use gix_hash::oid; -use gix_index::{entry::Stat, Entry}; -use io_close::Close; - -use crate::Cache; - -pub struct Context<'a, Find> { - pub find: &'a mut Find, - pub path_cache: &'a mut Cache, - pub filters: &'a mut gix_filter::Pipeline, - pub buf: &'a mut Vec, -} - -/// A delayed result of a long-running filter process, which is made available as stream. -pub struct DelayedFilteredStream<'a> { - /// The key identifying the driver program - pub key: gix_filter::driver::Key, - /// If the file is going to be an executable. - pub needs_executable_bit: bool, - /// The validated path on disk at which the file should be placed. - pub validated_file_path: PathBuf, - /// The entry to adjust with the file we will write. - pub entry: &'a mut gix_index::Entry, - /// The relative path at which the entry resides (for use when querying the delayed entry). - pub entry_path: &'a BStr, -} - -pub enum Outcome<'a> { - /// The file was written. - Written { - /// The amount of written bytes. - bytes: usize, - }, - /// The will be ready later. - Delayed(DelayedFilteredStream<'a>), -} - -impl Outcome<'_> { - /// Return ourselves as (in-memory) bytes if possible. - pub fn as_bytes(&self) -> Option { - match self { - Outcome::Written { bytes } => Some(*bytes), - Outcome::Delayed { .. } => None, - } - } -} - -#[cfg_attr(not(unix), allow(unused_variables))] -pub fn checkout<'entry, Find, E>( - entry: &'entry mut Entry, - entry_path: &'entry BStr, - Context { - find, - filters, - path_cache, - buf, - }: Context<'_, Find>, - crate::checkout::chunk::Options { - fs: gix_fs::Capabilities { - symlink, - executable_bit, - .. - }, - destination_is_initially_empty, - overwrite_existing, - filter_process_delay, - .. - }: crate::checkout::chunk::Options, -) -> Result, crate::checkout::Error> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, - E: std::error::Error + Send + Sync + 'static, -{ - let dest_relative = gix_path::try_from_bstr(entry_path).map_err(|_| crate::checkout::Error::IllformedUtf8 { - path: entry_path.to_owned(), - })?; - let is_dir = Some(entry.mode == gix_index::entry::Mode::COMMIT || entry.mode == gix_index::entry::Mode::DIR); - let path_cache = path_cache.at_path(dest_relative, is_dir, &mut *find)?; - let dest = path_cache.path(); - - let object_size = match entry.mode { - gix_index::entry::Mode::FILE | gix_index::entry::Mode::FILE_EXECUTABLE => { - let obj = find(&entry.id, buf).map_err(|err| crate::checkout::Error::Find { - err, - oid: entry.id, - path: dest.to_path_buf(), - })?; - - let filtered = filters.convert_to_worktree( - obj.data, - entry_path, - |_, attrs| { - path_cache.matching_attributes(attrs); - }, - filter_process_delay, - )?; - let (num_bytes, file, set_executable_after_creation) = match filtered { - ToWorktreeOutcome::Unchanged(buf) | ToWorktreeOutcome::Buffer(buf) => { - let (mut file, flag) = open_file( - dest, - destination_is_initially_empty, - overwrite_existing, - executable_bit, - entry.mode, - )?; - file.write_all(buf)?; - (buf.len(), file, flag) - } - ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut filtered)) => { - let (mut file, flag) = open_file( - dest, - destination_is_initially_empty, - overwrite_existing, - executable_bit, - entry.mode, - )?; - let num_bytes = std::io::copy(&mut filtered, &mut file)? as usize; - (num_bytes, file, flag) - } - ToWorktreeOutcome::Process(MaybeDelayed::Delayed(key)) => { - return Ok(Outcome::Delayed(DelayedFilteredStream { - key, - needs_executable_bit: false, - validated_file_path: dest.to_owned(), - entry, - entry_path, - })) - } - }; - - // For possibly existing, overwritten files, we must change the file mode explicitly. - finalize_entry(entry, file, set_executable_after_creation.then_some(dest))?; - num_bytes - } - gix_index::entry::Mode::SYMLINK => { - let obj = find(&entry.id, buf).map_err(|err| crate::checkout::Error::Find { - err, - oid: entry.id, - path: dest.to_path_buf(), - })?; - let symlink_destination = gix_path::try_from_byte_slice(obj.data) - .map_err(|_| crate::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; - - if symlink { - try_op_or_unlink(dest, overwrite_existing, |p| { - gix_fs::symlink::create(symlink_destination, p) - })?; - } else { - let mut file = try_op_or_unlink(dest, overwrite_existing, |p| { - open_options(p, destination_is_initially_empty, overwrite_existing).open(dest) - })?; - file.write_all(obj.data)?; - file.close()?; - } - - entry.stat = Stat::from_fs(&std::fs::symlink_metadata(dest)?)?; - obj.data.len() - } - gix_index::entry::Mode::DIR => { - gix_features::trace::warn!( - "Skipped sparse directory at '{entry_path}' ({id}) as it cannot yet be handled", - id = entry.id - ); - 0 - } - gix_index::entry::Mode::COMMIT => { - gix_features::trace::warn!( - "Skipped submodule at '{entry_path}' ({id}) as it cannot yet be handled", - id = entry.id - ); - 0 - } - _ => unreachable!(), - }; - Ok(Outcome::Written { bytes: object_size }) -} - -/// Note that this works only because we assume to not race ourselves when symlinks are involved, and we do this by -/// delaying symlink creation to the end and will always do that sequentially. -/// It's still possible to fall for a race if other actors create symlinks in our path, but that's nothing to defend against. -fn try_op_or_unlink( - path: &Path, - overwrite_existing: bool, - op: impl Fn(&Path) -> std::io::Result, -) -> std::io::Result { - if overwrite_existing { - match op(path) { - Ok(res) => Ok(res), - Err(err) if gix_fs::symlink::is_collision_error(&err) => { - try_unlink_path_recursively(path, &std::fs::symlink_metadata(path)?)?; - op(path) - } - Err(err) => Err(err), - } - } else { - op(path) - } -} - -fn try_unlink_path_recursively(path: &Path, path_meta: &std::fs::Metadata) -> std::io::Result<()> { - if path_meta.is_dir() { - std::fs::remove_dir_all(path) - } else if path_meta.file_type().is_symlink() { - gix_fs::symlink::remove(path) - } else { - std::fs::remove_file(path) - } -} - -#[cfg(not(debug_assertions))] -fn debug_assert_dest_is_no_symlink(_path: &Path) {} - -/// This is a debug assertion as we expect the machinery calling this to prevent this possibility in the first place -#[cfg(debug_assertions)] -fn debug_assert_dest_is_no_symlink(path: &Path) { - if let Ok(meta) = path.metadata() { - debug_assert!( - !meta.file_type().is_symlink(), - "BUG: should not ever allow to overwrite/write-into the target of a symbolic link: {}", - path.display() - ); - } -} - -fn open_options(path: &Path, destination_is_initially_empty: bool, overwrite_existing: bool) -> OpenOptions { - if overwrite_existing || !destination_is_initially_empty { - debug_assert_dest_is_no_symlink(path); - } - let mut options = gix_features::fs::open_options_no_follow(); - options - .create_new(destination_is_initially_empty && !overwrite_existing) - .create(!destination_is_initially_empty || overwrite_existing) - .write(true); - options -} - -pub(crate) fn open_file( - path: &Path, - destination_is_initially_empty: bool, - overwrite_existing: bool, - fs_supports_executable_bit: bool, - entry_mode: gix_index::entry::Mode, -) -> std::io::Result<(std::fs::File, bool)> { - #[cfg_attr(windows, allow(unused_mut))] - let mut options = open_options(path, destination_is_initially_empty, overwrite_existing); - let needs_executable_bit = fs_supports_executable_bit && entry_mode == gix_index::entry::Mode::FILE_EXECUTABLE; - #[cfg(unix)] - let set_executable_after_creation = if needs_executable_bit && destination_is_initially_empty { - use std::os::unix::fs::OpenOptionsExt; - // Note that these only work if the file was newly created, but won't if it's already - // existing, possibly without the executable bit set. Thus we do this only if the file is new. - options.mode(0o777); - false - } else { - needs_executable_bit - }; - // not supported on windows - #[cfg(windows)] - let set_executable_after_creation = needs_executable_bit; - try_op_or_unlink(path, overwrite_existing, |p| options.open(p)).map(|f| (f, set_executable_after_creation)) -} - -/// Close `file` and store its stats in `entry`, possibly setting `file` executable depending on `set_executable_after_creation`. -#[cfg_attr(windows, allow(unused_variables))] -pub(crate) fn finalize_entry( - entry: &mut gix_index::Entry, - file: std::fs::File, - set_executable_after_creation: Option<&Path>, -) -> Result<(), crate::checkout::Error> -where - E: std::error::Error + Send + Sync + 'static, -{ - // For possibly existing, overwritten files, we must change the file mode explicitly. - #[cfg(unix)] - if let Some(path) = set_executable_after_creation { - use std::os::unix::fs::PermissionsExt; - let mut perm = std::fs::symlink_metadata(path)?.permissions(); - perm.set_mode(0o777); - std::fs::set_permissions(path, perm)?; - } - // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. - // revisit this once there is a bug to fix. - entry.stat = Stat::from_fs(&file.metadata()?)?; - file.close()?; - Ok(()) -} diff --git a/gix-worktree/src/checkout/function.rs b/gix-worktree/src/checkout/function.rs deleted file mode 100644 index e52299e3972..00000000000 --- a/gix-worktree/src/checkout/function.rs +++ /dev/null @@ -1,168 +0,0 @@ -use std::sync::atomic::AtomicBool; - -use gix_features::{interrupt, parallel::in_parallel_with_finalize, progress::Progress}; -use gix_hash::oid; - -use crate::{cache, checkout::chunk, Cache}; - -/// Checkout the entire `index` into `dir`, and resolve objects found in index entries with `find` to write their content to their -/// respective path in `dir`. -/// Use `files` to count each fully checked out file, and count the amount written `bytes`. If `should_interrupt` is `true`, the -/// operation will abort. -/// `options` provide a lot of context on how to perform the operation. -/// -/// ### Handling the return value -/// -/// Note that interruption still produce an `Ok(…)` value, so the caller should look at `should_interrupt` to communicate the outcome. -/// -#[allow(clippy::too_many_arguments)] -pub fn checkout( - index: &mut gix_index::State, - dir: impl Into, - find: Find, - files: &mut impl Progress, - bytes: &mut impl Progress, - should_interrupt: &AtomicBool, - options: crate::checkout::Options, -) -> Result> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Send + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let paths = index.take_path_backing(); - let res = checkout_inner(index, &paths, dir, find, files, bytes, should_interrupt, options); - index.return_path_backing(paths); - res -} - -#[allow(clippy::too_many_arguments)] -fn checkout_inner( - index: &mut gix_index::State, - paths: &gix_index::PathStorage, - dir: impl Into, - find: Find, - files: &mut impl Progress, - bytes: &mut impl Progress, - should_interrupt: &AtomicBool, - mut options: crate::checkout::Options, -) -> Result> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Send + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let num_files = files.counter(); - let num_bytes = bytes.counter(); - let dir = dir.into(); - let case = if options.fs.ignore_case { - gix_glob::pattern::Case::Fold - } else { - gix_glob::pattern::Case::Sensitive - }; - let (chunk_size, thread_limit, num_threads) = gix_features::parallel::optimize_chunk_size_and_thread_limit( - 100, - index.entries().len().into(), - options.thread_limit, - None, - ); - - let state = cache::State::for_checkout(options.overwrite_existing, std::mem::take(&mut options.attributes)); - let attribute_files = state.id_mappings_from_index(index, paths, case); - let mut ctx = chunk::Context { - buf: Vec::new(), - options: (&options).into(), - path_cache: Cache::new(dir, state, case, Vec::with_capacity(512), attribute_files), - filters: options.filters, - find, - }; - - let chunk::Outcome { - mut collisions, - mut errors, - mut bytes_written, - files: files_updated, - delayed_symlinks, - delayed_paths_unknown, - delayed_paths_unprocessed, - } = if num_threads == 1 { - let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt); - let mut delayed_filter_results = Vec::new(); - let mut out = chunk::process( - entries_with_paths, - num_files.as_deref(), - num_bytes.as_deref(), - &mut delayed_filter_results, - &mut ctx, - )?; - chunk::process_delayed_filter_results( - delayed_filter_results, - num_files.as_deref(), - num_bytes.as_deref(), - &mut out, - &mut ctx, - )?; - out - } else { - let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt); - in_parallel_with_finalize( - gix_features::iter::Chunks { - inner: entries_with_paths, - size: chunk_size, - }, - thread_limit, - { - let ctx = ctx.clone(); - move |_| (Vec::new(), ctx) - }, - |chunk, (delayed_filter_results, ctx)| { - chunk::process( - chunk.into_iter(), - num_files.as_deref(), - num_bytes.as_deref(), - delayed_filter_results, - ctx, - ) - }, - |(delayed_filter_results, mut ctx)| { - let mut out = chunk::Outcome::default(); - chunk::process_delayed_filter_results( - delayed_filter_results, - num_files.as_deref(), - num_bytes.as_deref(), - &mut out, - &mut ctx, - )?; - Ok(out) - }, - chunk::Reduce { - files: num_files.is_none().then_some(files), - bytes: num_bytes.is_none().then_some(bytes), - aggregate: Default::default(), - marker: Default::default(), - }, - )? - }; - - for (entry, entry_path) in delayed_symlinks { - bytes_written += chunk::checkout_entry_handle_result( - entry, - entry_path, - &mut errors, - &mut collisions, - num_files.as_deref(), - num_bytes.as_deref(), - &mut ctx, - )? - .as_bytes() - .expect("only symlinks are delayed here, they are never filtered (or delayed again)") - as u64; - } - - Ok(crate::checkout::Outcome { - files_updated, - collisions, - errors, - bytes_written, - delayed_paths_unknown, - delayed_paths_unprocessed, - }) -} diff --git a/gix-worktree/src/checkout/mod.rs b/gix-worktree/src/checkout/mod.rs deleted file mode 100644 index 4590b1f0373..00000000000 --- a/gix-worktree/src/checkout/mod.rs +++ /dev/null @@ -1,101 +0,0 @@ -use bstr::BString; -use gix_index::entry::stat; - -/// Information about a path that failed to checkout as something else was already present. -#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct Collision { - /// the path that collided with something already present on disk. - pub path: BString, - /// The io error we encountered when checking out `path`. - pub error_kind: std::io::ErrorKind, -} - -/// A path that encountered an IO error. -pub struct ErrorRecord { - /// the path that encountered the error. - pub path: BString, - /// The error - pub error: Box, -} - -/// The outcome of checking out an entire index. -#[derive(Default)] -pub struct Outcome { - /// The amount of files updated, or created. - pub files_updated: usize, - /// The amount of bytes written to disk, - pub bytes_written: u64, - /// The encountered collisions, which can happen on a case-insensitive filesystem. - pub collisions: Vec, - /// Other errors that happened during checkout. - pub errors: Vec, - /// Relative paths that the process listed as 'delayed' even though we never passed them. - pub delayed_paths_unknown: Vec, - /// All paths that were left unprocessed, because they were never listed by the process even though we passed them. - pub delayed_paths_unprocessed: Vec, -} - -/// Options to further configure the checkout operation. -#[derive(Clone, Default)] -pub struct Options { - /// capabilities of the file system - pub fs: gix_fs::Capabilities, - /// If set, don't use more than this amount of threads. - /// Otherwise, usually use as many threads as there are logical cores. - /// A value of 0 is interpreted as no-limit - pub thread_limit: Option, - /// If true, we assume no file to exist in the target directory, and want exclusive access to it. - /// This should be enabled when cloning to avoid checks for freshness of files. This also enables - /// detection of collisions based on whether or not exclusive file creation succeeds or fails. - pub destination_is_initially_empty: bool, - /// If true, default false, worktree entries on disk will be overwritten with content from the index - /// even if they appear to be changed. When creating directories that clash with existing worktree entries, - /// these will try to delete the existing entry. - /// This is similar in behaviour as `git checkout --force`. - pub overwrite_existing: bool, - /// If true, default false, try to checkout as much as possible and don't abort on first error which isn't - /// due to a conflict. - /// The checkout operation will never fail, but count the encountered errors instead along with their paths. - pub keep_going: bool, - /// Control how stat comparisons are made when checking if a file is fresh. - pub stat_options: stat::Options, - /// A stack of attributes to use with the filesystem cache to use as driver for filters. - pub attributes: crate::cache::state::Attributes, - /// The filter pipeline to use for applying mandatory filters before writing to the worktree. - pub filters: gix_filter::Pipeline, - /// Control how long-running processes may use the 'delay' capability. - pub filter_process_delay: gix_filter::driver::apply::Delay, -} - -/// The error returned by the [checkout()][crate::checkout()] function. -#[derive(Debug, thiserror::Error)] -#[allow(missing_docs)] -pub enum Error { - #[error("Could not convert path to UTF8: {}", .path)] - IllformedUtf8 { path: BString }, - #[error("The clock was off when reading file related metadata after updating a file on disk")] - Time(#[from] std::time::SystemTimeError), - #[error("IO error while writing blob or reading file metadata or changing filetype")] - Io(#[from] std::io::Error), - #[error("object {} for checkout at {} could not be retrieved from object database", .oid.to_hex(), .path.display())] - Find { - #[source] - err: E, - oid: gix_hash::ObjectId, - path: std::path::PathBuf, - }, - #[error(transparent)] - Filter(#[from] gix_filter::pipeline::convert::to_worktree::Error), - #[error(transparent)] - FilterListDelayed(#[from] gix_filter::driver::delayed::list::Error), - #[error(transparent)] - FilterFetchDelayed(#[from] gix_filter::driver::delayed::fetch::Error), - #[error("The entry at path '{rela_path}' was listed as delayed by the filter process, but we never passed it")] - FilterPathUnknown { rela_path: BString }, - #[error("The following paths were delayed and apparently forgotten to be processed by the filter driver: ")] - FilterPathsUnprocessed { rela_paths: Vec }, -} - -mod chunk; -mod entry; -pub(crate) mod function; diff --git a/gix-worktree/src/lib.rs b/gix-worktree/src/lib.rs index 20ae186877b..9a0c32641fc 100644 --- a/gix-worktree/src/lib.rs +++ b/gix-worktree/src/lib.rs @@ -1,4 +1,4 @@ -//! A crate with all index-centric functionality that is interacting with a worktree. +//! A crate with utility types for use by other crates that implement specifics. //! //! Unless specified differently, all operations need an index file (e.g. `.git/index`) as driver. //! @@ -11,9 +11,6 @@ #![deny(missing_docs, rust_2018_idioms, unsafe_code)] use bstr::BString; -/// -pub mod read; - /// A cache for efficiently executing operations on directories and files which are encountered in sorted order. /// That way, these operations can be re-used for subsequent invocations in the same directory. /// @@ -52,9 +49,3 @@ pub(crate) type PathIdMapping = (BString, gix_hash::ObjectId); /// pub mod cache; -/// -pub mod checkout; -pub use checkout::function::checkout; - -pub mod status; -pub use status::function::status; diff --git a/gix-worktree/src/read.rs b/gix-worktree/src/read.rs deleted file mode 100644 index a54fc2c7611..00000000000 --- a/gix-worktree/src/read.rs +++ /dev/null @@ -1,64 +0,0 @@ -//! This module allows creating git blobs from worktree files. -//! -//! For the most part a blob just contains the raw on-disk data. However symlinks need to be considered properly -//! and attributes/config options need to be considered. - -use std::{ - fs::{read_link, File}, - io::{self, Read}, - path::Path, -}; - -use gix_object::Blob; -use gix_path as path; - -// TODO: tests - -// TODO: what to do about precompose unicode and ignore_case for symlinks - -/// Create a blob from a file or symlink. -pub fn blob(path: &Path, capabilities: &gix_fs::Capabilities) -> io::Result { - let mut data = Vec::new(); - data_to_buf(path, &mut data, capabilities)?; - Ok(Blob { data }) -} - -/// Create a blob from a file or symlink. -pub fn blob_with_meta(path: &Path, is_symlink: bool, capabilities: &gix_fs::Capabilities) -> io::Result { - let mut data = Vec::new(); - data_to_buf_with_meta(path, &mut data, is_symlink, capabilities)?; - Ok(Blob { data }) -} - -/// Create blob data from a file or symlink. -pub fn data_to_buf<'a>(path: &Path, buf: &'a mut Vec, capabilities: &gix_fs::Capabilities) -> io::Result<&'a [u8]> { - data_to_buf_with_meta(path, buf, path.symlink_metadata()?.is_symlink(), capabilities) -} - -/// Create a blob from a file or symlink. -pub fn data_to_buf_with_meta<'a>( - path: &Path, - buf: &'a mut Vec, - is_symlink: bool, - capabilities: &gix_fs::Capabilities, -) -> io::Result<&'a [u8]> { - buf.clear(); - // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just - // normal files with their content equal to the linked path (so can be read normally) - // - if is_symlink && capabilities.symlink { - // conversion to bstr can never fail because symlinks are only used - // on unix (by git) so no reason to use the try version here - let symlink_path = path::into_bstr(read_link(path)?); - buf.extend_from_slice(&symlink_path); - // TODO: there is no reason this should be a clone - // std isn't great about allowing users to avoid allocations but we could - // simply write our own wrapper around libc::readlink which reuses the - // buffer. This would require unsafe code tough (obviously) - } else { - buf.clear(); - File::open(path)?.read_to_end(buf)?; - // TODO apply filters - } - Ok(buf.as_slice()) -} diff --git a/gix-worktree/src/status/content.rs b/gix-worktree/src/status/content.rs deleted file mode 100644 index aa775821a7a..00000000000 --- a/gix-worktree/src/status/content.rs +++ /dev/null @@ -1,80 +0,0 @@ -use gix_hash::ObjectId; -use gix_index as index; -use index::Entry; - -/// Compares the content of two blobs in some way. -pub trait CompareBlobs { - /// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()]. - type Output; - - /// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size` - /// and allow reading its bytes using `worktree_blob`. - /// If this function returns `None` the `entry` and the `worktree_blob` are assumed to be identical. - /// Use `entry_blob` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself. - fn compare_blobs<'a, E>( - &mut self, - entry: &'a gix_index::Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E>; -} - -/// Lazy borrowed access to blob data. -pub trait ReadDataOnce<'a, E> { - /// Returns the contents of this blob. - /// - /// This potentially performs IO and other expensive operations - /// and should only be called when necessary. - fn read_data(self) -> Result<&'a [u8], E>; -} - -/// Compares to blobs by comparing their size and oid, and only looks at the file if -/// the size matches, therefore it's very fast. -#[derive(Clone)] -pub struct FastEq; - -impl CompareBlobs for FastEq { - type Output = (); - - fn compare_blobs<'a, E>( - &mut self, - entry: &'a Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - _entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { - // make sure to account for racily smudged entries here so that they don't always keep - // showing up as modified even after their contents have changed again, to a potentially - // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs. - if entry.stat.size as usize != worktree_blob_size && (entry.id.is_empty_blob() || entry.stat.size != 0) { - return Ok(Some(())); - } - let blob = worktree_blob.read_data()?; - let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); - Ok((entry.id != file_hash).then_some(())) - } -} - -/// Compares files to blobs by *always* comparing their hashes. -/// -/// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and -/// therefore always returns an OID that can be reused later. -#[derive(Clone)] -pub struct HashEq; - -impl CompareBlobs for HashEq { - type Output = ObjectId; - - fn compare_blobs<'a, E>( - &mut self, - entry: &'a Entry, - _worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - _entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { - let blob = worktree_blob.read_data()?; - let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); - Ok((entry.id != file_hash).then_some(file_hash)) - } -} diff --git a/gix-worktree/src/status/function.rs b/gix-worktree/src/status/function.rs deleted file mode 100644 index 5e01628b416..00000000000 --- a/gix-worktree/src/status/function.rs +++ /dev/null @@ -1,331 +0,0 @@ -use std::{io, marker::PhantomData, path::Path}; - -use bstr::BStr; -use filetime::FileTime; -use gix_features::parallel::{in_parallel_if, Reduce}; - -use crate::{ - read, - status::{ - content, - content::CompareBlobs, - types::{Error, Options}, - Change, VisitEntry, - }, -}; - -/// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them -/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes. -/// `options` are used to configure the operation. -/// -/// Note that `index` is updated with the latest seen stat information from the worktree, and its timestamp is adjusted to -/// the current time for which it will be considered fresh. -/// -/// Note that this isn't technically quite what this function does as this also provides some additional information, -/// like whether a file has conflicts, and files that were added with `git add` are shown as a special -/// changes despite not technically requiring a change to the index since `git add` already added the file to the index. -pub fn status<'index, T, Find, E>( - index: &'index mut gix_index::State, - worktree: &Path, - collector: &mut impl VisitEntry<'index, ContentChange = T>, - compare: impl CompareBlobs + Send + Clone, - find: Find, - options: Options, -) -> Result<(), Error> -where - T: Send, - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, -{ - // the order is absolutely critical here we use the old timestamp to detect racy index entries - // (modified at or after the last index update) during the index update we then set those - // entries size to 0 (see below) to ensure they keep showing up as racy and reset the timestamp. - let timestamp = index.timestamp(); - index.set_timestamp(FileTime::now()); - let (chunk_size, thread_limit, _) = gix_features::parallel::optimize_chunk_size_and_thread_limit( - 100, - index.entries().len().into(), - options.thread_limit, - None, - ); - let (entries, path_backing) = index.entries_mut_and_pathbacking(); - in_parallel_if( - || true, // TODO: heuristic: when is parallelization not worth it? - entries.chunks_mut(chunk_size), - thread_limit, - { - let options = &options; - move |_| { - ( - State { - buf: Vec::new(), - odb_buf: Vec::new(), - timestamp, - path_backing, - worktree, - options, - }, - compare.clone(), - find.clone(), - ) - } - }, - |entries, (state, diff, find)| { - entries - .iter_mut() - .filter_map(|entry| state.process(entry, diff, find)) - .collect() - }, - ReduceChange { - collector, - phantom: PhantomData, - }, - ) -} - -struct State<'a, 'b> { - buf: Vec, - odb_buf: Vec, - timestamp: FileTime, - // path_cache: fs::Cache TODO path cache - path_backing: &'b [u8], - worktree: &'a Path, - options: &'a Options, -} - -type StatusResult<'index, T> = Result<(&'index gix_index::Entry, &'index BStr, Option>, bool), Error>; - -impl<'index> State<'_, 'index> { - fn process( - &mut self, - entry: &'index mut gix_index::Entry, - diff: &mut impl CompareBlobs, - find: &mut Find, - ) -> Option> - where - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, - { - let conflict = match entry.stage() { - 0 => false, - 1 => true, - _ => return None, - }; - if entry.flags.intersects( - gix_index::entry::Flags::UPTODATE - | gix_index::entry::Flags::SKIP_WORKTREE - | gix_index::entry::Flags::ASSUME_VALID - | gix_index::entry::Flags::FSMONITOR_VALID, - ) { - return None; - } - let path = entry.path_in(self.path_backing); - let status = self.compute_status(&mut *entry, path, diff, find); - Some(status.map(move |status| (&*entry, path, status, conflict))) - } - - /// # On how racy-git is handled here - /// - /// Basically the racy detection is a safety mechanism that ensures we can always just compare the stat - /// information between index and worktree and if they match we don't need to look at the content. - /// This usually just works but if a file updates quickly we could run into the following situation: - /// - /// * save file version `A` from disk into worktree (git add) - /// * file is changed so fast that the mtime doesn't change - *we only looks at seconds by default* - /// * file contents change but file-size stays the same, so `"foo" -> "bar"` has the same size but different content - /// - /// Now both `mtime` and `size`, and all other stat information, is the same but the file has actually changed. - /// This case is called *racily clean*. *The file should show up as changed but due to a data race it doesn't.* - /// This is the racy git problem. - /// - /// To solve this we do the following trick: Whenever we modify the index, which includes `git status`, we save the - /// current timestamp before the modification starts. This timestamp fundamentally represents a checkpoint of sorts. - /// We "promise" ourselves that after the modification finishes all entries modified before this timestamp have the - /// racy git problem resolved. - /// - /// So now when we modify the index we must resolve the racy git problem somehow. To do that we only need to look at - /// unchanged entries. Changed entries are not interesting since they are already showing up as changed anyway so there - /// isn't really a race-condition to worry about. This also explains why removing the `return` here doesn't have an apparent effect. - /// This entire branch here is just the optimization of "don't even look at index entries where the stat hasn't changed". - /// If we don't have this optimization the result shouldn't change, our status implementation will just be super slow :D - - /// We calculate whether this change is `racy_clean`, so if the last `timestamp` is before or the same as the `mtime` of the entry - /// which is what `new_stat.is_racy(..)` does in the branch, and only if we are sure that there is no race condition - /// do we `return` early. Since we don't `return` early we just do a full content comparison below, - /// which always yields the correct result, there is no race condition there. - /// - /// If a file showed up as racily clean and didn't change then we don't need to do anything. After this status check is - /// complete and the file won't show up as racily clean anymore, since it's mtime is now before the new timestamp. - /// However if the file did actually change then we really ran into one of those rare race conditions in that case we, - /// and git does the same, set the size of the file in the index to 0. This will always make the file show up as changed. - /// This adds the need to treat all files of size 0 in the index as changed. This is not quite right of course because 0 sized files - /// could be entirely valid and unchanged. Therefore this only applies if the oid doesn't match the oid of an empty file, - /// which is a constant. - /// - /// Adapted from [here](https://github.com/Byron/gitoxide/pull/805#discussion_r1164676777). - fn compute_status( - &mut self, - entry: &mut gix_index::Entry, - git_path: &BStr, - diff: &mut impl CompareBlobs, - find: &mut Find, - ) -> Result>, Error> - where - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, - { - // TODO fs cache - let worktree_path = gix_path::try_from_bstr(git_path).map_err(|_| Error::IllformedUtf8)?; - let worktree_path = self.worktree.join(worktree_path); - let metadata = match worktree_path.symlink_metadata() { - // TODO: check if any parent directory is a symlink - // we need to use fs::Cache for that - Ok(metadata) if metadata.is_dir() => { - // index entries are normally only for files/symlinks - // if a file turned into a directory it was removed - // the only exception here are submodules which are - // part of the index despite being directories - // - // TODO: submodules: - // if entry.mode.contains(Mode::COMMIT) && - // resolve_gitlink_ref(ce->name, "HEAD", &sub)) - return Ok(Some(Change::Removed)); - } - Ok(metadata) => metadata, - Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), - Err(err) => { - return Err(err.into()); - } - }; - if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { - return Ok(Some(Change::IntentToAdd)); - } - let new_stat = gix_index::entry::Stat::from_fs(&metadata)?; - let executable_bit_changed = - match entry - .mode - .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit) - { - Some(gix_index::entry::mode::Change::Type { .. }) => return Ok(Some(Change::Type)), - Some(gix_index::entry::mode::Change::ExecutableBit) => true, - None => false, - }; - - // Here we implement racy-git. See racy-git.txt in the git documentation for a detailed documentation. - // - // A file is racy if: - // 1. its `mtime` is at or after the last index timestamp and its entry stat information - // matches the on-disk file but the file contents are actually modified - // 2. it's size is 0 (set after detecting a file was racy previously) - // - // The first case is detected below by checking the timestamp if the file is marked unmodified. - // The second case is usually detected either because the on-disk file is not empty, hence - // the basic stat match fails, or by checking whether the size doesn't fit the oid. - let mut racy_clean = false; - if !executable_bit_changed - && new_stat.matches(&entry.stat, self.options.stat) - // TODO: find a test for the following line or remove it. Is this more often hit with smudge/clean filters? - && (!entry.id.is_empty_blob() || entry.stat.size == 0) - { - racy_clean = new_stat.is_racy(self.timestamp, self.options.stat); - if !racy_clean { - return Ok(None); - } - } - - let read_file = WorktreeBlob { - buf: &mut self.buf, - path: &worktree_path, - entry, - options: self.options, - }; - let read_blob = OdbBlob { - buf: &mut self.odb_buf, - id: &entry.id, - find, - }; - let content_change = diff.compare_blobs::(entry, metadata.len() as usize, read_file, read_blob)?; - // This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated. - if content_change.is_some() && racy_clean { - entry.stat.size = 0; - } - if content_change.is_some() || executable_bit_changed { - Ok(Some(Change::Modification { - executable_bit_changed, - content_change, - })) - } else { - // don't diff against this file next time since we know the file is unchanged. - entry.stat = new_stat; - Ok(None) - } - } -} - -struct ReduceChange<'a, 'index, T: VisitEntry<'index>> { - collector: &'a mut T, - phantom: PhantomData, -} - -impl<'index, T, C: VisitEntry<'index, ContentChange = T>> Reduce for ReduceChange<'_, 'index, C> { - type Input = Vec>; - - type FeedProduce = (); - - type Output = (); - - type Error = Error; - - fn feed(&mut self, items: Self::Input) -> Result { - for item in items { - let (entry, path, change, conflict) = item?; - self.collector.visit_entry(entry, path, change, conflict); - } - Ok(()) - } - - fn finalize(self) -> Result { - Ok(()) - } -} - -struct WorktreeBlob<'a> { - buf: &'a mut Vec, - path: &'a Path, - entry: &'a gix_index::Entry, - options: &'a Options, -} - -struct OdbBlob<'a, Find, E> -where - E: std::error::Error + Send + Sync + 'static, - Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, -{ - buf: &'a mut Vec, - id: &'a gix_hash::oid, - find: Find, -} - -impl<'a> content::ReadDataOnce<'a, Error> for WorktreeBlob<'a> { - fn read_data(self) -> Result<&'a [u8], Error> { - let res = read::data_to_buf_with_meta( - self.path, - self.buf, - self.entry.mode == gix_index::entry::Mode::SYMLINK, - &self.options.fs, - )?; - Ok(res) - } -} - -impl<'a, Find, E> content::ReadDataOnce<'a, Error> for OdbBlob<'a, Find, E> -where - E: std::error::Error + Send + Sync + 'static, - Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, -{ - fn read_data(mut self) -> Result<&'a [u8], Error> { - (self.find)(self.id, self.buf) - .map(|b| b.data) - .map_err(move |err| Error::Find(Box::new(err))) - } -} diff --git a/gix-worktree/src/status/mod.rs b/gix-worktree/src/status/mod.rs deleted file mode 100644 index 8294a54e8ac..00000000000 --- a/gix-worktree/src/status/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -//! Changes between an index and a worktree. -/// -mod types; -pub use types::{Change, Error, Options, VisitEntry}; - -mod recorder; -pub use recorder::Recorder; - -/// -pub mod content; -pub(crate) mod function; diff --git a/gix-worktree/src/status/recorder.rs b/gix-worktree/src/status/recorder.rs deleted file mode 100644 index ea10303ae60..00000000000 --- a/gix-worktree/src/status/recorder.rs +++ /dev/null @@ -1,27 +0,0 @@ -use bstr::BStr; -use gix_index as index; - -use crate::status::{Change, VisitEntry}; - -/// Convenience implementation of [`VisitEntry`] that collects all non-trivial changes into a `Vec`. -#[derive(Debug, Default)] -pub struct Recorder<'index, T = ()> { - /// collected changes, index entries without conflicts or changes are excluded. - pub records: Vec<(&'index BStr, Option>, bool)>, -} - -impl<'index, T: Send> VisitEntry<'index> for Recorder<'index, T> { - type ContentChange = T; - - fn visit_entry( - &mut self, - _entry: &'index index::Entry, - rela_path: &'index BStr, - status: Option>, - conflict: bool, - ) { - if conflict || status.is_some() { - self.records.push((rela_path, status, conflict)) - } - } -} diff --git a/gix-worktree/src/status/types.rs b/gix-worktree/src/status/types.rs deleted file mode 100644 index 3d488d24ef4..00000000000 --- a/gix-worktree/src/status/types.rs +++ /dev/null @@ -1,69 +0,0 @@ -use bstr::BStr; - -/// The error returned by [`status()`][crate::status()]. -#[derive(Debug, thiserror::Error)] -#[allow(missing_docs)] -pub enum Error { - #[error("Could not convert path to UTF8")] - IllformedUtf8, - #[error("The clock was off when reading file related metadata after updating a file on disk")] - Time(#[from] std::time::SystemTimeError), - #[error("IO error while writing blob or reading file metadata or changing filetype")] - Io(#[from] std::io::Error), - #[error("Failed to obtain blob from object database")] - Find(#[source] Box), -} - -#[derive(Clone, Default)] -/// Options that control how the index status with a worktree is computed. -pub struct Options { - /// Capabilities of the file system which affect the status computation. - pub fs: gix_fs::Capabilities, - /// If set, don't use more than this amount of threads. - /// Otherwise, usually use as many threads as there are logical cores. - /// A value of 0 is interpreted as no-limit - pub thread_limit: Option, - /// Options that control how stat comparisons are made when checking if a file is fresh. - pub stat: gix_index::entry::stat::Options, -} - -/// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] -pub enum Change { - /// This corresponding file does not exist in the worktree anymore. - Removed, - /// The type of file changed compared to the worktree, i.e. a symlink s now a file. - Type, - /// This worktree file was modified in some form, like a permission change or content change or both, - /// as compared to this entry. - Modification { - /// Indicates that one of the stat changes was an executable bit change - /// which is a significant change itself. - executable_bit_changed: bool, - /// The output of the [`CompareBlobs`][crate::status::content::CompareBlobs] run on this entry. - /// If there is no content change and only the executable bit - /// changed than this is `None`. - content_change: Option, - }, - /// An index entry that correspond to an untracked worktree file marked with `git add --intent-to-add`. - /// - /// This means it's not available in the object database yet or the index was created from, - /// even though now an entry exists that represents the worktree file. - IntentToAdd, -} - -/// Observe changes by comparing an index entry to the worktree or another index. -pub trait VisitEntry<'index> { - /// Data generated by comparing an entry with a file. - type ContentChange; - /// Observe the `change` of `entry` at the repository-relative `rela_path`, indicating whether - /// or not it has a `conflict`. - /// If `change` is `None`, there is no change. - fn visit_entry( - &mut self, - entry: &'index gix_index::Entry, - rela_path: &'index BStr, - change: Option>, - conflict: bool, - ); -} diff --git a/gix-worktree/src/untracked.rs b/gix-worktree/src/untracked.rs deleted file mode 100644 index 6e77d7fa3ba..00000000000 --- a/gix-worktree/src/untracked.rs +++ /dev/null @@ -1 +0,0 @@ -// TODO: untracked file detection, needs fs::Cache diff --git a/gix-worktree/tests/fixtures/generated-archives/.gitignore b/gix-worktree/tests/fixtures/generated-archives/.gitignore index e8d0fd48dce..6f631797de0 100644 --- a/gix-worktree/tests/fixtures/generated-archives/.gitignore +++ b/gix-worktree/tests/fixtures/generated-archives/.gitignore @@ -1,7 +1,2 @@ make_ignore_and_attributes_setup.tar.xz -make_mixed_without_submodules.tar.xz -make_mixed_without_submodules_and_symlinks.tar.xz make_attributes_baseline.tar.xz -make_dangerous_symlink.tar.xz -status_unchanged.tar.xz -status_changed.tar.xz diff --git a/gix-worktree/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz b/gix-worktree/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz deleted file mode 100644 index 6e4ed4be056..00000000000 --- a/gix-worktree/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2984e2e61b28635014165351cd872ea7e4f09c28b8b4bbe50692a465ef648033 -size 10616 diff --git a/gix-worktree/tests/fixtures/generated-archives/make_special_exclude_case.tar.xz b/gix-worktree/tests/fixtures/generated-archives/make_special_exclude_case.tar.xz index 56edd71ff4a..dc93af2130b 100644 --- a/gix-worktree/tests/fixtures/generated-archives/make_special_exclude_case.tar.xz +++ b/gix-worktree/tests/fixtures/generated-archives/make_special_exclude_case.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24f605623efc49819d1b30c52fe22da8f94f2d267e8030ec9bc3b9b845801f76 -size 9220 +oid sha256:1804dc740055b8a5afe65a2db14f29c8ae4691896e67342a8dcb11530fd448c6 +size 9240 diff --git a/gix-worktree/tests/fixtures/generated-archives/racy_git.tar.xz b/gix-worktree/tests/fixtures/generated-archives/racy_git.tar.xz deleted file mode 100644 index 2d045b26aab..00000000000 --- a/gix-worktree/tests/fixtures/generated-archives/racy_git.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:35b728a127f3b6170bac44469ff36d5ad0be2a4247a8926f1aaffb97b5973efc -size 1596 diff --git a/gix-worktree/tests/fixtures/generated-archives/status_conflict.tar.xz b/gix-worktree/tests/fixtures/generated-archives/status_conflict.tar.xz deleted file mode 100644 index dbe191fbe1f..00000000000 --- a/gix-worktree/tests/fixtures/generated-archives/status_conflict.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd6d32ab7a1e372d80a617926cac2463f6620baedf74642d78fe7f8c956fd031 -size 11036 diff --git a/gix-worktree/tests/fixtures/generated-archives/status_intent_to_add.tar.xz b/gix-worktree/tests/fixtures/generated-archives/status_intent_to_add.tar.xz deleted file mode 100644 index 76feea7dc82..00000000000 --- a/gix-worktree/tests/fixtures/generated-archives/status_intent_to_add.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:876670d74c01249d361aa73d83ab20d846db7c922a3ca825f778b5f9d746c401 -size 9304 diff --git a/gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz b/gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz deleted file mode 100644 index 7b1462fc83e..00000000000 --- a/gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e5fe85a65e3689e9e60598130be60761dc4ea129e04d7d5501320f7ebad1eb2b -size 10520 diff --git a/gix-worktree/tests/fixtures/make_dangerous_symlink.sh b/gix-worktree/tests/fixtures/make_dangerous_symlink.sh deleted file mode 100755 index 31437285a37..00000000000 --- a/gix-worktree/tests/fixtures/make_dangerous_symlink.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -# Every symlink is dangerous as it might either link to another directory and thus redirect -# all writes in the path, or it might point to a file and opening the symlink actually opens -# the target. -# We handle this by either validating symlinks specifically or create symlinks -empty_oid=$(git hash-object -w --stdin .git/info/exclude -# a sample .git/info/exclude -file-anywhere -/file-from-top - -dir-anywhere/ -/dir-from-top - -subdir-anywhere/file -subdir-anywhere/dir/ -EOF - -git commit --allow-empty -m "init" diff --git a/gix-worktree/tests/fixtures/make_ignorecase_collisions.sh b/gix-worktree/tests/fixtures/make_ignorecase_collisions.sh deleted file mode 100755 index d91bd542588..00000000000 --- a/gix-worktree/tests/fixtures/make_ignorecase_collisions.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -empty_oid=$(git hash-object -w --stdin .gitattributes -git add -A - -git update-index --index-info <<-EOF -100644 $content_oid FILE_X -100644 $content_oid FILE_x -100644 $content_oid file_X -100644 $content_oid file_x -100644 $empty_oid D/B -100644 $empty_oid D/C -100644 $empty_oid d -100644 $empty_oid X -120000 $symlink_target x -120000 $symlink_target link-to-X -EOF - -git commit -m "init" -git checkout -f HEAD; diff --git a/gix-worktree/tests/fixtures/make_mixed_without_submodules.sh b/gix-worktree/tests/fixtures/make_mixed_without_submodules.sh deleted file mode 100755 index 43fafbad944..00000000000 --- a/gix-worktree/tests/fixtures/make_mixed_without_submodules.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch empty -echo -n "content" > executable -chmod +x executable - -mkdir dir -echo "other content" > dir/content -echo "* filter=arrow" > .gitattributes -echo "executable -filter" >> .gitattributes -echo ".gitattributes -filter" >> .gitattributes - -mkdir dir/sub-dir -(cd dir/sub-dir && ln -sf ../content symlink) - -git add -A -git commit -m "Commit" diff --git a/gix-worktree/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh b/gix-worktree/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh deleted file mode 100755 index 0e0e95ae301..00000000000 --- a/gix-worktree/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch empty -echo -n "content" > executable -chmod +x executable - -mkdir dir -echo "other content" > dir/content -echo "* filter=arrow" > .gitattributes -echo "executable -filter" >> .gitattributes -echo ".gitattributes -filter" >> .gitattributes - -mkdir dir/sub-dir -echo "even other content" > dir/sub-dir/file - -git add -A -git commit -m "Commit" diff --git a/gix-worktree/tests/fixtures/racy_git.sh b/gix-worktree/tests/fixtures/racy_git.sh deleted file mode 100755 index 7fdef456f87..00000000000 --- a/gix-worktree/tests/fixtures/racy_git.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -echo -n "foo" > content - -git add -A -git commit -m "Commit" - -# file size should not be changed by this -echo -n "bar" > content diff --git a/gix-worktree/tests/fixtures/status_changed.sh b/gix-worktree/tests/fixtures/status_changed.sh deleted file mode 100755 index 033c6a8336f..00000000000 --- a/gix-worktree/tests/fixtures/status_changed.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch empty -echo -n "content" > executable -chmod +x executable - -mkdir dir -echo -n "other content" > dir/content -echo -n "other content" > dir/content2 -mkdir dir/sub-dir -(cd dir/sub-dir && ln -sf ../content symlink) - -git add -A -git commit -m "Commit" - -chmod +x dir/content -echo "new content" > dir/content2 -chmod -x executable -echo -n "foo" > executable - -rm empty -ln -sf dir/content empty -git reset \ No newline at end of file diff --git a/gix-worktree/tests/fixtures/status_conflict.sh b/gix-worktree/tests/fixtures/status_conflict.sh deleted file mode 100755 index d78e81bfe7e..00000000000 --- a/gix-worktree/tests/fixtures/status_conflict.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -echo base > content -git add -A -git commit -m "base" - -git checkout -b feat -echo feat > content -git commit -am "feat" - -git checkout main -echo base-change > content -git commit -am "new base" - -git merge feat || : diff --git a/gix-worktree/tests/fixtures/status_intent_to_add.sh b/gix-worktree/tests/fixtures/status_intent_to_add.sh deleted file mode 100755 index 7d1601385c0..00000000000 --- a/gix-worktree/tests/fixtures/status_intent_to_add.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch content -echo -n "content" > content - -git add --intent-to-add -A diff --git a/gix-worktree/tests/fixtures/status_removed.sh b/gix-worktree/tests/fixtures/status_removed.sh deleted file mode 100755 index 30cdfb94993..00000000000 --- a/gix-worktree/tests/fixtures/status_removed.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch empty -echo -n "content" > executable -chmod +x executable - -mkdir dir -echo -n "other content" > dir/content -mkdir dir/sub-dir -(cd dir/sub-dir && ln -sf ../content symlink) - -git add -A -git commit -m "Commit" -rm -rf ./empty ./executable ./dir/content ./dir/sub-dir/symlink -git reset \ No newline at end of file diff --git a/gix-worktree/tests/fixtures/status_unchanged.sh b/gix-worktree/tests/fixtures/status_unchanged.sh deleted file mode 100755 index 67684549509..00000000000 --- a/gix-worktree/tests/fixtures/status_unchanged.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch empty -echo -n "content" > executable -chmod +x executable - -mkdir dir -echo -n "other content" > dir/content -mkdir dir/sub-dir -(cd dir/sub-dir && ln -sf ../content symlink) - -git add -A -git commit -m "Commit" - -touch ./empty ./executable ./dir/content ./dir/sub-dir/symlink - -git reset # ensure index timestamp is large enough to not mark everything racy \ No newline at end of file diff --git a/gix-worktree/tests/worktree/cache/create_directory.rs b/gix-worktree/tests/worktree/cache/create_directory.rs index 48bfcd862e2..f0f16b83172 100644 --- a/gix-worktree/tests/worktree/cache/create_directory.rs +++ b/gix-worktree/tests/worktree/cache/create_directory.rs @@ -1,7 +1,7 @@ use std::path::Path; +use gix_testtools::tempfile::{tempdir, TempDir}; use gix_worktree::{cache, Cache}; -use tempfile::{tempdir, TempDir}; #[allow(clippy::ptr_arg)] fn panic_on_find<'buf>(_oid: &gix_hash::oid, _buf: &'buf mut Vec) -> std::io::Result> { diff --git a/gix-worktree/tests/worktree/checkout.rs b/gix-worktree/tests/worktree/checkout.rs deleted file mode 100644 index db12d1fb4cc..00000000000 --- a/gix-worktree/tests/worktree/checkout.rs +++ /dev/null @@ -1,526 +0,0 @@ -#[cfg(unix)] -use std::os::unix::prelude::MetadataExt; -use std::{ - fs, - io::{ErrorKind, ErrorKind::AlreadyExists}, - path::{Path, PathBuf}, - sync::atomic::{AtomicBool, AtomicUsize, Ordering}, -}; - -use gix_features::progress; -use gix_object::bstr::ByteSlice; -use gix_odb::FindExt; -use gix_worktree::checkout::Collision; -use once_cell::sync::Lazy; -use tempfile::TempDir; - -use crate::fixture_path; - -static DRIVER: Lazy = Lazy::new(|| { - let mut cargo = std::process::Command::new(env!("CARGO")); - let res = cargo - .args(["build", "-p=gix-filter", "--example", "arrow"]) - .status() - .expect("cargo should run fine"); - assert!(res.success(), "cargo invocation should be successful"); - - let path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")) - .ancestors() - .nth(1) - .expect("first parent in target dir") - .join("debug") - .join("examples") - .join(if cfg!(windows) { "arrow.exe" } else { "arrow" }); - assert!(path.is_file(), "Expecting driver to be located at {path:?}"); - path -}); - -fn driver_exe() -> String { - let mut exe = DRIVER.to_string_lossy().into_owned(); - if cfg!(windows) { - exe = exe.replace('\\', "/"); - } - exe -} - -#[test] -fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden() { - let mut opts = opts_from_probe(); - // without overwrite mode, everything is safe. - opts.overwrite_existing = false; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts.clone(), "make_dangerous_symlink").unwrap(); - - let source_files = dir_structure(&source_tree); - let worktree_files = dir_structure(&destination); - - if opts.fs.ignore_case { - assert_eq!( - stripped_prefix(&source_tree, &source_files), - stripped_prefix(&destination, &worktree_files), - ); - if multi_threaded() { - assert_eq!(outcome.collisions.len(), 2); - } else { - assert_eq!( - outcome.collisions, - vec![ - Collision { - path: "FAKE-DIR".into(), - error_kind: AlreadyExists - }, - Collision { - path: "FAKE-FILE".into(), - error_kind: AlreadyExists - } - ] - ); - } - } else { - let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; - assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); - assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); - assert!(outcome.collisions.is_empty()); - }; -} - -#[test] -fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { - let mut opts = opts_from_probe(); - // with overwrite mode - opts.overwrite_existing = true; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts.clone(), "make_dangerous_symlink").unwrap(); - - let source_files = dir_structure(&source_tree); - let worktree_files = dir_structure(&destination); - - if opts.fs.ignore_case { - assert_eq!( - stripped_prefix(&source_tree, &source_files), - paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]), - ); - assert_eq!( - stripped_prefix(&destination, &worktree_files), - paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE"]), - ); - assert!(outcome.collisions.is_empty()); - } else { - let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; - assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); - assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); - assert!(outcome.collisions.is_empty()); - }; -} - -#[test] -fn delayed_driver_process() -> crate::Result { - let mut opts = opts_from_probe(); - opts.overwrite_existing = true; - opts.filter_process_delay = gix_filter::driver::apply::Delay::Allow; - opts.destination_is_initially_empty = false; - setup_filter_pipeline(opts.filters.options_mut()); - let (_source, destination, _index, outcome) = - checkout_index_in_tmp_dir_opts(opts, "make_mixed_without_submodules_and_symlinks", |_| true, |_| Ok(()))?; - assert_eq!(outcome.collisions.len(), 0); - assert_eq!(outcome.errors.len(), 0); - assert_eq!(outcome.files_updated, 5); - - let dest = destination.path(); - assert_eq!( - std::fs::read(dest.join("executable"))?.as_bstr(), - "content", - "unfiltered" - ); - assert_eq!( - std::fs::read(dest.join("dir").join("content"))?.as_bstr(), - "➡other content\r\n" - ); - assert_eq!( - std::fs::read(dest.join("dir").join("sub-dir").join("file"))?.as_bstr(), - "➡even other content\r\n" - ); - Ok(()) -} - -#[test] -#[cfg_attr( - windows, - ignore = "on windows, the symlink to a directory doesn't seem to work and we really want to test with symlinks" -)] -fn overwriting_files_and_lone_directories_works() -> crate::Result { - for delay in [ - gix_filter::driver::apply::Delay::Allow, - gix_filter::driver::apply::Delay::Forbid, - ] { - let mut opts = opts_from_probe(); - opts.overwrite_existing = true; - opts.filter_process_delay = delay; - opts.destination_is_initially_empty = false; - setup_filter_pipeline(opts.filters.options_mut()); - let (source, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( - opts.clone(), - "make_mixed_without_submodules", - |_| true, - |d| { - let empty = d.join("empty"); - symlink::symlink_dir(d.join(".."), &empty)?; // empty is symlink to the directory above - std::fs::write(d.join("executable"), b"foo")?; // executable is regular file and has different content - let dir = d.join("dir"); - std::fs::create_dir(&dir)?; - std::fs::create_dir(dir.join("content"))?; // 'content' is a directory now - - let dir = dir.join("sub-dir"); - std::fs::create_dir(&dir)?; - - symlink::symlink_dir(empty, dir.join("symlink"))?; // 'symlink' is a symlink to another file - Ok(()) - }, - )?; - - assert!(outcome.collisions.is_empty()); - - assert_eq!( - stripped_prefix(&destination, &dir_structure(&destination)), - paths(["dir/content", "dir/sub-dir/symlink", "empty", "executable"]) - ); - let meta = std::fs::symlink_metadata(destination.path().join("empty"))?; - assert!(meta.is_file(), "'empty' is now a file"); - assert_eq!(meta.len(), 0, "'empty' is indeed empty"); - - let exe = destination.path().join("executable"); - assert_eq!(std::fs::read(&exe)?, b"content", "'exe' has the correct content"); - - let meta = std::fs::symlink_metadata(exe)?; - assert!(meta.is_file()); - if opts.fs.executable_bit { - #[cfg(unix)] - assert_eq!(meta.mode() & 0o700, 0o700, "the executable bit is set where supported"); - } - - assert_eq!( - std::fs::read(source.join("dir/content"))?.as_bstr(), - "other content\n", - "in the worktree, we have LF" - ); - assert_eq!( - std::fs::read(destination.path().join("dir/content"))?.as_bstr(), - "➡other content\r\n", - "autocrlf is enabled, so we get CRLF when checking out as the pipeline is active, and we have a filter" - ); - - let symlink = destination.path().join("dir/sub-dir/symlink"); - // on windows, git won't create symlinks as its probe won't detect the capability, even though we do. - assert_eq!(std::fs::symlink_metadata(&symlink)?.is_symlink(), cfg!(unix)); - assert_eq!( - std::fs::read(symlink)?.as_bstr(), - "➡other content\r\n", - "autocrlf is enabled" - ); - } - Ok(()) -} - -#[test] -fn symlinks_become_files_if_disabled() -> crate::Result { - let mut opts = opts_from_probe(); - opts.fs.symlink = false; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts.clone(), "make_mixed_without_submodules")?; - - assert_equality(&source_tree, &destination, opts.fs.symlink)?; - assert!(outcome.collisions.is_empty()); - Ok(()) -} - -#[test] -fn allow_or_disallow_symlinks() -> crate::Result { - let mut opts = opts_from_probe(); - for allowed in &[false, true] { - opts.fs.symlink = *allowed; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts.clone(), "make_mixed_without_submodules")?; - - assert_equality(&source_tree, &destination, opts.fs.symlink)?; - assert!(outcome.collisions.is_empty()); - } - Ok(()) -} - -#[test] -fn keep_going_collects_results() { - let mut opts = opts_from_probe(); - opts.keep_going = true; - let count = AtomicUsize::default(); - let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( - opts, - "make_mixed_without_submodules", - |_id| { - !matches!( - count.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { - (current < 2).then_some(current + 1) - }), - Ok(_) - ) - }, - |_| Ok(()), - ) - .unwrap(); - - if multi_threaded() { - assert_eq!( - outcome.errors.len(), - 2, - "content changes due to non-deterministic nature of racy threads" - ) - } else { - assert_eq!( - outcome - .errors - .iter() - .map(|r| r.path.to_path_lossy().into_owned()) - .collect::>(), - paths(if cfg!(unix) { - [".gitattributes", "dir/content"] - } else { - // not actually a symlink anymore, even though symlinks are supported but git think differently. - ["dir/content", "dir/sub-dir/symlink"] - }) - ); - } - - if multi_threaded() { - let actual = dir_structure(&destination); - assert!( - (2..=3).contains(&actual.len()), - "it's 3 most of the time, but can be 2 of the 'empty' file is missing as the object couldn't be accessed.\ - It's unclear why there isn't more, as it would keep going" - ); - } else { - assert_eq!( - stripped_prefix(&destination, &dir_structure(&destination)), - paths(if cfg!(unix) { - Box::new(["dir/sub-dir/symlink", "empty", "executable"].into_iter()) as Box> - } else { - Box::new(["empty", "executable"].into_iter()) - }), - "some files could not be created" - ); - } - - assert!(outcome.collisions.is_empty()); -} - -#[test] -fn no_case_related_collisions_on_case_sensitive_filesystem() { - let opts = opts_from_probe(); - if opts.fs.ignore_case { - eprintln!("Skipping case-sensitive testing on what would be a case-insensitive file system"); - return; - } - let (source_tree, destination, index, outcome) = - checkout_index_in_tmp_dir(opts.clone(), "make_ignorecase_collisions").unwrap(); - - assert!(outcome.collisions.is_empty()); - let num_files = assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap(); - assert_eq!( - num_files, - index.entries().len() - 1, - "it checks out all files (minus 1 to account for .gitattributes which is skipped in the worktree in our tests)" - ); - assert!( - destination.path().join(".gitattributes").is_file(), - "we do have attributes even though, dot files are ignored in `assert_equality`" - ); -} - -#[test] -fn collisions_are_detected_on_a_case_insensitive_filesystem_even_with_delayed_filters() { - let mut opts = opts_from_probe(); - if !opts.fs.ignore_case { - eprintln!("Skipping case-insensitive testing on what would be a case-sensitive file system"); - return; - } - setup_filter_pipeline(opts.filters.options_mut()); - opts.filter_process_delay = gix_filter::driver::apply::Delay::Allow; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); - - let source_files = dir_structure(&source_tree); - assert_eq!( - stripped_prefix(&source_tree, &source_files), - paths(["d", "file_x", "link-to-X", "x"]), - "plenty of collisions prevent a checkout" - ); - - let dest_files = dir_structure(&destination); - if multi_threaded() { - assert!( - (4..=6).contains(&dest_files.len()), - "due to the clash happening at nearly any time, and keep-going is false, we get a variance of files" - ); - } else { - assert_eq!( - stripped_prefix(&destination, &dest_files), - paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), - "we checkout files in order and generally handle collision detection differently, hence the difference" - ); - } - - let error_kind = ErrorKind::AlreadyExists; - #[cfg(windows)] - let error_kind_dir = ErrorKind::PermissionDenied; - #[cfg(not(windows))] - let error_kind_dir = error_kind; - - if multi_threaded() { - assert!( - (5..=6).contains(&outcome.collisions.len()), - "can only assert on number as it's racily creating files so unclear which one clashes, and due to keep-going = false there is variance" - ); - } else { - assert_eq!( - outcome.collisions, - vec![ - Collision { - path: "d".into(), - error_kind: error_kind_dir, - }, - Collision { - path: "FILE_x".into(), - error_kind, - }, - Collision { - path: "file_X".into(), - error_kind, - }, - Collision { - path: "file_x".into(), - error_kind, - }, - Collision { - path: "x".into(), - error_kind, - }, - ], - "these files couldn't be checked out" - ); - } -} - -fn multi_threaded() -> bool { - gix_features::parallel::num_threads(None) > 1 -} - -fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result { - let source_files = dir_structure(source_tree); - let worktree_files = dir_structure(destination); - - assert_eq!( - stripped_prefix(source_tree, &source_files), - stripped_prefix(destination, &worktree_files), - ); - - let mut count = 0; - for (source_file, worktree_file) in source_files.iter().zip(worktree_files.iter()) { - count += 1; - if !allow_symlinks && source_file.is_symlink() { - assert!(!worktree_file.is_symlink()); - assert_eq!(fs::read(worktree_file)?.to_path()?, fs::read_link(source_file)?); - } else { - assert_eq!(fs::read(source_file)?, fs::read(worktree_file)?); - #[cfg(unix)] - assert_eq!( - fs::symlink_metadata(source_file)?.mode() & 0o700, - fs::symlink_metadata(worktree_file)?.mode() & 0o700, - "permissions of source and checked out file are comparable" - ); - } - } - Ok(count) -} - -pub fn dir_structure>(path: P) -> Vec { - let path = path.as_ref(); - let mut files: Vec<_> = walkdir::WalkDir::new(path) - .follow_links(false) - .into_iter() - .filter_entry(|e| e.path() == path || !e.file_name().to_string_lossy().starts_with('.')) - .flatten() - .filter_map(|e| (!e.path().symlink_metadata().map_or(true, |m| m.is_dir())).then(|| e.path().to_path_buf())) - .collect(); - files.sort(); - files -} - -fn checkout_index_in_tmp_dir( - opts: gix_worktree::checkout::Options, - name: &str, -) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree::checkout::Outcome)> { - checkout_index_in_tmp_dir_opts(opts, name, |_d| true, |_| Ok(())) -} - -fn checkout_index_in_tmp_dir_opts( - opts: gix_worktree::checkout::Options, - name: &str, - mut allow_return_object: impl FnMut(&gix_hash::oid) -> bool + Send + Clone, - prep_dest: impl Fn(&Path) -> std::io::Result<()>, -) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree::checkout::Outcome)> { - let source_tree = fixture_path(name); - let git_dir = source_tree.join(".git"); - let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default())?; - let odb = gix_odb::at(git_dir.join("objects"))?.into_inner().into_arc()?; - let destination = tempfile::tempdir_in(std::env::current_dir()?)?; - prep_dest(destination.path()).expect("preparation must succeed"); - - let outcome = gix_worktree::checkout( - &mut index, - destination.path(), - move |oid, buf| { - if allow_return_object(oid) { - odb.find_blob(oid, buf) - } else { - Err(gix_odb::find::existing_object::Error::NotFound { oid: oid.to_owned() }) - } - }, - &mut progress::Discard, - &mut progress::Discard, - &AtomicBool::default(), - opts, - )?; - Ok((source_tree, destination, index, outcome)) -} - -fn stripped_prefix(prefix: impl AsRef, source_files: &[PathBuf]) -> Vec<&Path> { - source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect() -} - -fn probe_gitoxide_dir() -> crate::Result { - Ok(gix_fs::Capabilities::probe( - std::env::current_dir()?.join("..").join(".git"), - )) -} - -fn opts_from_probe() -> gix_worktree::checkout::Options { - gix_worktree::checkout::Options { - fs: probe_gitoxide_dir().unwrap(), - destination_is_initially_empty: true, - thread_limit: gix_features::parallel::num_threads(None).into(), - ..Default::default() - } -} - -fn paths<'a>(p: impl IntoIterator) -> Vec { - p.into_iter().map(PathBuf::from).collect() -} - -fn setup_filter_pipeline(opts: &mut gix_filter::pipeline::Options) { - opts.eol_config.auto_crlf = gix_filter::eol::AutoCrlf::Enabled; - opts.drivers = vec![gix_filter::Driver { - name: "arrow".into(), - clean: None, - smudge: None, - process: Some((driver_exe() + " process").into()), - required: true, - }]; -} diff --git a/gix-worktree/tests/worktree/mod.rs b/gix-worktree/tests/worktree/mod.rs index 85ffef380a4..f51aa9528b1 100644 --- a/gix-worktree/tests/worktree/mod.rs +++ b/gix-worktree/tests/worktree/mod.rs @@ -1,8 +1,4 @@ mod cache; -mod checkout; -mod status; - -use std::path::{Path, PathBuf}; use gix_hash::ObjectId; pub type Result = std::result::Result>; @@ -10,8 +6,3 @@ pub type Result = std::result::Result>; pub fn hex_to_id(hex: &str) -> ObjectId { ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex") } - -pub fn fixture_path(name: &str) -> PathBuf { - let dir = gix_testtools::scripted_fixture_read_only(Path::new(name).with_extension("sh")).expect("script works"); - dir -} diff --git a/gix-worktree/tests/worktree/status.rs b/gix-worktree/tests/worktree/status.rs deleted file mode 100644 index 11689b5f6e2..00000000000 --- a/gix-worktree/tests/worktree/status.rs +++ /dev/null @@ -1,226 +0,0 @@ -use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, -}; - -use bstr::BStr; -use filetime::{set_file_mtime, FileTime}; -use gix_index as index; -use gix_index::Entry; -use gix_worktree::{ - status, - status::{ - content::{CompareBlobs, FastEq, ReadDataOnce}, - Change, Options, Recorder, - }, -}; - -use crate::fixture_path; - -// since tests are fixtures a bunch of stat information (like inode number) -// changes when extracting the data so we need to disable all advanced stat -// changes and only look at mtime seconds and file size to properly -// test all code paths (and to trigger racy git). -const TEST_OPTIONS: index::entry::stat::Options = index::entry::stat::Options { - trust_ctime: false, - check_stat: false, - use_nsec: false, - use_stdev: false, -}; - -fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) { - let worktree = fixture_path(name); - let git_dir = worktree.join(".git"); - let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap(); - let mut recorder = Recorder::default(); - status( - &mut index, - &worktree, - &mut recorder, - FastEq, - |_, _| Ok::<_, std::convert::Infallible>(gix_object::BlobRef { data: &[] }), - Options { - fs: gix_fs::Capabilities::probe(git_dir), - stat: TEST_OPTIONS, - ..Options::default() - }, - ) - .unwrap(); - recorder.records.sort_unstable_by_key(|(name, _, _)| *name); - assert_eq!(recorder.records, expected_status) -} - -#[test] -fn removed() { - fixture( - "status_removed", - &[ - (BStr::new(b"dir/content"), Some(Change::Removed), false), - (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), false), - (BStr::new(b"empty"), Some(Change::Removed), false), - (BStr::new(b"executable"), Some(Change::Removed), false), - ], - ); -} - -#[test] -fn intent_to_add() { - fixture( - "status_intent_to_add", - &[(BStr::new(b"content"), Some(Change::IntentToAdd), false)], - ); -} - -#[test] -fn conflict() { - fixture( - "status_conflict", - &[( - BStr::new(b"content"), - Some(Change::Modification { - executable_bit_changed: false, - content_change: Some(()), - }), - true, - )], - ); -} - -#[test] -fn unchanged() { - fixture("status_unchanged", &[]); -} - -#[test] -#[cfg_attr( - windows, - ignore = "needs work, on windows plenty of additional files are considered modified for some reason" -)] -fn modified() { - fixture( - "status_changed", - &[ - ( - BStr::new(b"dir/content"), - Some(Change::Modification { - executable_bit_changed: true, - content_change: None, - }), - false, - ), - ( - BStr::new(b"dir/content2"), - Some(Change::Modification { - executable_bit_changed: false, - content_change: Some(()), - }), - false, - ), - (BStr::new(b"empty"), Some(Change::Type), false), - ( - BStr::new(b"executable"), - Some(Change::Modification { - executable_bit_changed: true, - content_change: Some(()), - }), - false, - ), - ], - ); -} - -#[test] -fn racy_git() { - let timestamp = 940040400; - // we need a writable fixture because we have to mess with `mtimes` manually, because touch -d - // respects the locale so the test wouldn't work depending on the timezone you - // run your test in. - let dir = gix_testtools::scripted_fixture_writable("racy_git.sh").expect("script works"); - let worktree = dir.path(); - let git_dir = worktree.join(".git"); - let fs = gix_fs::Capabilities::probe(&git_dir); - let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap(); - - #[derive(Clone)] - struct CountCalls(Arc, FastEq); - impl CompareBlobs for CountCalls { - type Output = (); - - fn compare_blobs<'a, E>( - &mut self, - entry: &'a Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { - self.0.fetch_add(1, Ordering::Relaxed); - self.1 - .compare_blobs(entry, worktree_blob_size, worktree_blob, entry_blob) - } - } - - // We artificially mess with the entry's `mtime` so that it's before the timestamp saved by git. - // This would usually mean an invalid fs/invalid index file and as a result the racy git - // mitigation doesn't work and the worktree shows up as unchanged even tough the file did - // change. - // This case doesn't happen in the realworld (except for file corruption) but - // makes sure we are actually hitting the right codepath. - index.entries_mut()[0].stat.mtime.secs = timestamp; - set_file_mtime(worktree.join("content"), FileTime::from_unix_time(timestamp as i64, 0)) - .expect("changing filetime works"); - let mut recorder = Recorder::default(); - - let count = Arc::new(AtomicUsize::new(0)); - let counter = CountCalls(count.clone(), FastEq); - status( - &mut index, - worktree, - &mut recorder, - counter.clone(), - |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), - Options { - fs, - stat: TEST_OPTIONS, - ..Options::default() - }, - ) - .unwrap(); - assert_eq!(count.load(Ordering::Relaxed), 0, "no blob content is accessed"); - assert_eq!(recorder.records, &[], "the testcase triggers racy git"); - - // Now we also backdate the index timestamp to match the artificially created - // mtime above this is now a realistic realworld race-condition which should trigger racy git - // and cause proper output. - index.set_timestamp(FileTime::from_unix_time(timestamp as i64, 0)); - let mut recorder = Recorder::default(); - status( - &mut index, - worktree, - &mut recorder, - counter, - |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), - Options { - fs, - stat: TEST_OPTIONS, - ..Options::default() - }, - ) - .unwrap(); - assert_eq!( - count.load(Ordering::Relaxed), - 1, - "no we needed to access the blob content" - ); - assert_eq!( - recorder.records, - &[( - BStr::new(b"content"), - Some(Change::Modification { - executable_bit_changed: false, - content_change: Some(()), - }), - false - )], - "racy change is correctly detected" - ); -} diff --git a/justfile b/justfile index 961bfd9f482..5284615fbeb 100755 --- a/justfile +++ b/justfile @@ -147,10 +147,6 @@ unit-tests: set -ex; \ cargo test; \ cargo test --features verbose-object-parsing-errors - cd gix-worktree; \ - set -ex; \ - cargo test; \ - cargo test --features "internal-testing-gix-features-parallel" cargo test -p gix-tempfile --features signals cargo test -p gix-tempfile cargo test -p gix-features From 5d5f2866e512a4ddad7cb4606913026d2fe62840 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 19 Aug 2023 16:22:52 +0200 Subject: [PATCH 6/7] change!: rename `Cache` to `Stack` because it's more fitting. --- gix-worktree/src/lib.rs | 8 ++++---- gix-worktree/src/{cache => stack}/delegate.rs | 2 +- gix-worktree/src/{cache => stack}/mod.rs | 16 ++++++++-------- gix-worktree/src/{cache => stack}/platform.rs | 2 +- .../src/{cache => stack}/state/attributes.rs | 6 +++--- .../src/{cache => stack}/state/ignore.rs | 2 +- gix-worktree/src/{cache => stack}/state/mod.rs | 2 +- gix-worktree/tests/worktree/mod.rs | 5 +++-- .../worktree/{cache => stack}/attributes.rs | 8 ++++---- .../{cache => stack}/create_directory.rs | 16 ++++++++-------- .../tests/worktree/{cache => stack}/ignore.rs | 14 +++++++------- .../tests/worktree/{cache => stack}/mod.rs | 0 12 files changed, 41 insertions(+), 40 deletions(-) rename gix-worktree/src/{cache => stack}/delegate.rs (99%) rename gix-worktree/src/{cache => stack}/mod.rs (97%) rename gix-worktree/src/{cache => stack}/platform.rs (98%) rename gix-worktree/src/{cache => stack}/state/attributes.rs (99%) rename gix-worktree/src/{cache => stack}/state/ignore.rs (99%) rename gix-worktree/src/{cache => stack}/state/mod.rs (99%) rename gix-worktree/tests/worktree/{cache => stack}/attributes.rs (95%) rename gix-worktree/tests/worktree/{cache => stack}/create_directory.rs (90%) rename gix-worktree/tests/worktree/{cache => stack}/ignore.rs (93%) rename gix-worktree/tests/worktree/{cache => stack}/mod.rs (100%) diff --git a/gix-worktree/src/lib.rs b/gix-worktree/src/lib.rs index 9a0c32641fc..32d1d7c0e3b 100644 --- a/gix-worktree/src/lib.rs +++ b/gix-worktree/src/lib.rs @@ -32,20 +32,20 @@ use bstr::BString; /// /// The caching is only useful if consecutive calls to create a directory are using a sorted list of entries. #[derive(Clone)] -pub struct Cache { +pub struct Stack { stack: gix_fs::Stack, /// tells us what to do as we change paths. - state: cache::State, + state: stack::State, /// A buffer used when reading attribute or ignore files or their respective objects from the object database. buf: Vec, /// If case folding should happen when looking up attributes or exclusions. case: gix_glob::pattern::Case, /// A lookup table for object ids to read from in some situations when looking up attributes or exclusions. id_mappings: Vec, - statistics: cache::Statistics, + statistics: stack::Statistics, } pub(crate) type PathIdMapping = (BString, gix_hash::ObjectId); /// -pub mod cache; +pub mod stack; diff --git a/gix-worktree/src/cache/delegate.rs b/gix-worktree/src/stack/delegate.rs similarity index 99% rename from gix-worktree/src/cache/delegate.rs rename to gix-worktree/src/stack/delegate.rs index d982ae7592e..4c14ba297c7 100644 --- a/gix-worktree/src/cache/delegate.rs +++ b/gix-worktree/src/stack/delegate.rs @@ -1,6 +1,6 @@ use bstr::{BStr, ByteSlice}; -use crate::{cache::State, PathIdMapping}; +use crate::{stack::State, PathIdMapping}; /// Various aggregate numbers related to the stack delegate itself. #[derive(Default, Clone, Copy, Debug)] diff --git a/gix-worktree/src/cache/mod.rs b/gix-worktree/src/stack/mod.rs similarity index 97% rename from gix-worktree/src/cache/mod.rs rename to gix-worktree/src/stack/mod.rs index 4088de5f6ea..d38adefa1e4 100644 --- a/gix-worktree/src/cache/mod.rs +++ b/gix-worktree/src/stack/mod.rs @@ -4,10 +4,10 @@ use std::path::{Path, PathBuf}; use bstr::{BStr, ByteSlice}; use gix_hash::oid; -use super::Cache; +use super::Stack; use crate::PathIdMapping; -/// Various aggregate numbers collected from when the corresponding [`Cache`] was instantiated. +/// Various aggregate numbers collected from when the corresponding [`Stack`] was instantiated. #[derive(Default, Clone, Copy, Debug)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Statistics { @@ -45,12 +45,12 @@ pub enum State { #[must_use] pub struct Platform<'a> { - parent: &'a Cache, + parent: &'a Stack, is_dir: Option, } /// Initialization -impl Cache { +impl Stack { /// Create a new instance with `worktree_root` being the base for all future paths we match. /// `state` defines the capabilities of the cache. /// The `case` configures attribute and exclusion case sensitivity at *query time*, which should match the case that @@ -64,7 +64,7 @@ impl Cache { id_mappings: Vec, ) -> Self { let root = worktree_root.into(); - Cache { + Stack { stack: gix_fs::Stack::new(root), state, case, @@ -76,7 +76,7 @@ impl Cache { } /// Entry points for attribute query -impl Cache { +impl Stack { /// Append the `relative` path to the root directory of the cache and efficiently create leading directories, while assuring that no /// symlinks are in that path. /// Unless `is_dir` is known with `Some(…)`, then `relative` points to a directory itself in which case the entire resulting @@ -140,7 +140,7 @@ impl Cache { } /// Mutation -impl Cache { +impl Stack { /// Reset the statistics after returning them. pub fn take_statistics(&mut self) -> Statistics { std::mem::take(&mut self.statistics) @@ -159,7 +159,7 @@ impl Cache { } /// Access -impl Cache { +impl Stack { /// Return the statistics we gathered thus far. pub fn statistics(&self) -> &Statistics { &self.statistics diff --git a/gix-worktree/src/cache/platform.rs b/gix-worktree/src/stack/platform.rs similarity index 98% rename from gix-worktree/src/cache/platform.rs rename to gix-worktree/src/stack/platform.rs index d07ef6e8858..8d29566e8bb 100644 --- a/gix-worktree/src/cache/platform.rs +++ b/gix-worktree/src/stack/platform.rs @@ -2,7 +2,7 @@ use std::path::Path; use bstr::ByteSlice; -use crate::cache::Platform; +use crate::stack::Platform; /// Access impl<'a> Platform<'a> { diff --git a/gix-worktree/src/cache/state/attributes.rs b/gix-worktree/src/stack/state/attributes.rs similarity index 99% rename from gix-worktree/src/cache/state/attributes.rs rename to gix-worktree/src/stack/state/attributes.rs index 00b61544879..79b05edc44b 100644 --- a/gix-worktree/src/cache/state/attributes.rs +++ b/gix-worktree/src/stack/state/attributes.rs @@ -4,8 +4,8 @@ use bstr::{BStr, ByteSlice}; use gix_glob::pattern::Case; use crate::{ - cache::state::{AttributeMatchGroup, Attributes}, - Cache, PathIdMapping, + stack::state::{AttributeMatchGroup, Attributes}, + PathIdMapping, Stack, }; /// Various aggregate numbers related [`Attributes`]. @@ -200,7 +200,7 @@ impl Attributes { } /// Attribute matching specific methods -impl Cache { +impl Stack { /// Creates a new container to store match outcomes for all attribute matches. /// /// ### Panics diff --git a/gix-worktree/src/cache/state/ignore.rs b/gix-worktree/src/stack/state/ignore.rs similarity index 99% rename from gix-worktree/src/cache/state/ignore.rs rename to gix-worktree/src/stack/state/ignore.rs index df7d0bbe682..2dbddc520fd 100644 --- a/gix-worktree/src/cache/state/ignore.rs +++ b/gix-worktree/src/stack/state/ignore.rs @@ -4,7 +4,7 @@ use bstr::{BStr, ByteSlice}; use gix_glob::pattern::Case; use crate::{ - cache::state::{Ignore, IgnoreMatchGroup}, + stack::state::{Ignore, IgnoreMatchGroup}, PathIdMapping, }; diff --git a/gix-worktree/src/cache/state/mod.rs b/gix-worktree/src/stack/state/mod.rs similarity index 99% rename from gix-worktree/src/cache/state/mod.rs rename to gix-worktree/src/stack/state/mod.rs index 48118e505ec..f353e13c42d 100644 --- a/gix-worktree/src/cache/state/mod.rs +++ b/gix-worktree/src/stack/state/mod.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use bstr::{BString, ByteSlice}; use gix_glob::pattern::Case; -use crate::{cache::State, PathIdMapping}; +use crate::{stack::State, PathIdMapping}; type AttributeMatchGroup = gix_attributes::Search; type IgnoreMatchGroup = gix_ignore::Search; diff --git a/gix-worktree/tests/worktree/mod.rs b/gix-worktree/tests/worktree/mod.rs index f51aa9528b1..4fd60015588 100644 --- a/gix-worktree/tests/worktree/mod.rs +++ b/gix-worktree/tests/worktree/mod.rs @@ -1,6 +1,7 @@ -mod cache; - use gix_hash::ObjectId; + +mod stack; + pub type Result = std::result::Result>; pub fn hex_to_id(hex: &str) -> ObjectId { diff --git a/gix-worktree/tests/worktree/cache/attributes.rs b/gix-worktree/tests/worktree/stack/attributes.rs similarity index 95% rename from gix-worktree/tests/worktree/cache/attributes.rs rename to gix-worktree/tests/worktree/stack/attributes.rs index b7d9f55d8f6..6f0246597e0 100644 --- a/gix-worktree/tests/worktree/cache/attributes.rs +++ b/gix-worktree/tests/worktree/stack/attributes.rs @@ -1,7 +1,7 @@ use bstr::ByteSlice; use gix_attributes::search::Outcome; use gix_glob::pattern::Case; -use gix_worktree::cache::state; +use gix_worktree::stack::state; #[test] fn baseline() -> crate::Result { @@ -19,17 +19,17 @@ fn baseline() -> crate::Result { let mut buf = Vec::new(); let mut collection = gix_attributes::search::MetadataCollection::default(); - let state = gix_worktree::cache::State::for_checkout( + let state = gix_worktree::stack::State::for_checkout( false, state::Attributes::new( gix_attributes::Search::new_globals([base.join("user.attributes")], &mut buf, &mut collection)?, Some(git_dir.join("info").join("attributes")), - gix_worktree::cache::state::attributes::Source::WorktreeThenIdMapping, + gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping, collection, ), ); - let mut cache = gix_worktree::Cache::new(&base, state, case, buf, vec![]); + let mut cache = gix_worktree::Stack::new(&base, state, case, buf, vec![]); let mut actual = cache.attribute_matches(); let input = std::fs::read(base.join("baseline"))?; diff --git a/gix-worktree/tests/worktree/cache/create_directory.rs b/gix-worktree/tests/worktree/stack/create_directory.rs similarity index 90% rename from gix-worktree/tests/worktree/cache/create_directory.rs rename to gix-worktree/tests/worktree/stack/create_directory.rs index f0f16b83172..7d21c550394 100644 --- a/gix-worktree/tests/worktree/cache/create_directory.rs +++ b/gix-worktree/tests/worktree/stack/create_directory.rs @@ -1,7 +1,7 @@ use std::path::Path; use gix_testtools::tempfile::{tempdir, TempDir}; -use gix_worktree::{cache, Cache}; +use gix_worktree::{stack, Stack}; #[allow(clippy::ptr_arg)] fn panic_on_find<'buf>(_oid: &gix_hash::oid, _buf: &'buf mut Vec) -> std::io::Result> { @@ -11,9 +11,9 @@ fn panic_on_find<'buf>(_oid: &gix_hash::oid, _buf: &'buf mut Vec) -> std::io #[test] fn root_is_assumed_to_exist_and_files_in_root_do_not_create_directory() -> crate::Result { let dir = tempdir()?; - let mut cache = Cache::new( + let mut cache = Stack::new( dir.path().join("non-existing-root"), - cache::State::for_checkout(false, Default::default()), + stack::State::for_checkout(false, Default::default()), Default::default(), Vec::new(), Default::default(), @@ -68,7 +68,7 @@ fn symlinks_or_files_in_path_are_forbidden_or_unlinked_when_forced() -> crate::R std::fs::write(tmp.path().join("file-in-dir"), [])?; for dirname in &["file-in-dir", "link-to-dir"] { - if let cache::State::CreateDirectoryAndAttributesStack { + if let stack::State::CreateDirectoryAndAttributesStack { unlink_on_collision, .. } = cache.state_mut() { @@ -90,7 +90,7 @@ fn symlinks_or_files_in_path_are_forbidden_or_unlinked_when_forced() -> crate::R ); cache.take_statistics(); for dirname in &["link-to-dir", "file-in-dir"] { - if let cache::State::CreateDirectoryAndAttributesStack { + if let stack::State::CreateDirectoryAndAttributesStack { unlink_on_collision, .. } = cache.state_mut() { @@ -109,11 +109,11 @@ fn symlinks_or_files_in_path_are_forbidden_or_unlinked_when_forced() -> crate::R Ok(()) } -fn new_cache() -> (Cache, TempDir) { +fn new_cache() -> (Stack, TempDir) { let dir = tempdir().unwrap(); - let cache = Cache::new( + let cache = Stack::new( dir.path(), - cache::State::for_checkout(false, Default::default()), + stack::State::for_checkout(false, Default::default()), Default::default(), Vec::new(), Default::default(), diff --git a/gix-worktree/tests/worktree/cache/ignore.rs b/gix-worktree/tests/worktree/stack/ignore.rs similarity index 93% rename from gix-worktree/tests/worktree/cache/ignore.rs rename to gix-worktree/tests/worktree/stack/ignore.rs index d8210694325..516475fbd7b 100644 --- a/gix-worktree/tests/worktree/cache/ignore.rs +++ b/gix-worktree/tests/worktree/stack/ignore.rs @@ -1,7 +1,7 @@ use bstr::{BStr, ByteSlice}; use gix_glob::pattern::Case; use gix_odb::FindExt; -use gix_worktree::{cache::state::ignore::Source, Cache}; +use gix_worktree::{stack::state::ignore::Source, Stack}; use crate::hex_to_id; @@ -37,16 +37,16 @@ fn exclude_by_dir_is_handled_just_like_git() { let mut buf = Vec::new(); let case = gix_glob::pattern::Case::Sensitive; - let state = gix_worktree::cache::State::for_add( + let state = gix_worktree::stack::State::for_add( Default::default(), - gix_worktree::cache::state::Ignore::new( + gix_worktree::stack::state::Ignore::new( Default::default(), gix_ignore::Search::from_git_dir(&git_dir, None, &mut buf).unwrap(), None, Source::WorktreeThenIdMappingIfNotSkipped, ), ); - let mut cache = Cache::new(&dir, state, case, buf, Default::default()); + let mut cache = Stack::new(&dir, state, case, buf, Default::default()); let baseline = std::fs::read(git_dir.parent().unwrap().join("git-check-ignore.baseline")).unwrap(); let expectations = IgnoreExpectations { lines: baseline.lines(), @@ -95,9 +95,9 @@ fn check_against_baseline() -> crate::Result { }; let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default())?; let odb = gix_odb::at(git_dir.join("objects"))?; - let state = gix_worktree::cache::State::for_add( + let state = gix_worktree::stack::State::for_add( Default::default(), - gix_worktree::cache::state::Ignore::new( + gix_worktree::stack::state::Ignore::new( gix_ignore::Search::from_overrides(vec!["!force-include"]), gix_ignore::Search::from_git_dir(&git_dir, Some(user_exclude_path), &mut buf)?, None, @@ -113,7 +113,7 @@ fn check_against_baseline() -> crate::Result { hex_to_id("5c7e0ed672d3d31d83a3df61f13cc8f7b22d5bfd") )] ); - let mut cache = Cache::new(&worktree_dir, state, case, buf, attribute_files_in_index); + let mut cache = Stack::new(&worktree_dir, state, case, buf, attribute_files_in_index); let baseline = std::fs::read(git_dir.parent().unwrap().join("git-check-ignore.baseline"))?; let expectations = IgnoreExpectations { diff --git a/gix-worktree/tests/worktree/cache/mod.rs b/gix-worktree/tests/worktree/stack/mod.rs similarity index 100% rename from gix-worktree/tests/worktree/cache/mod.rs rename to gix-worktree/tests/worktree/stack/mod.rs From e5717e1d12c49285d31a90b03b7f8e9cbc6c1108 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 19 Aug 2023 16:24:12 +0200 Subject: [PATCH 7/7] adapt to changes in `gix-worktree` --- Cargo.lock | 1 + gitoxide-core/src/hours/core.rs | 4 +-- gitoxide-core/src/index/checkout.rs | 8 +++--- .../src/repository/attributes/query.rs | 8 +++--- gitoxide-core/src/repository/clone.rs | 2 +- gitoxide-core/src/repository/index/entries.rs | 14 +++++----- gix-archive/tests/archive.rs | 10 +++---- gix-filter/tests/pipeline/mod.rs | 16 ++++++------ gix-status/src/index_as_worktree/types.rs | 4 +-- gix-worktree-state/src/checkout/chunk.rs | 4 +-- gix-worktree-state/src/checkout/entry.rs | 4 +-- gix-worktree-state/src/checkout/function.rs | 6 ++--- gix-worktree-state/src/checkout/mod.rs | 2 +- gix-worktree-state/tests/state/checkout.rs | 16 ++++++------ gix-worktree-stream/tests/stream.rs | 10 +++---- gix-worktree/src/stack/mod.rs | 4 +-- gix-worktree/src/stack/state/attributes.rs | 4 +-- gix-worktree/src/stack/state/ignore.rs | 2 +- gix/Cargo.toml | 1 + gix/src/clone/checkout.rs | 11 ++++---- gix/src/config/cache/access.rs | 18 ++++++------- gix/src/filter.rs | 6 ++--- gix/src/pathspec.rs | 8 +++--- gix/src/repository/attributes.rs | 26 +++++++++---------- gix/src/repository/filter.rs | 4 +-- gix/src/repository/pathspec.rs | 2 +- gix/src/repository/worktree.rs | 2 +- gix/src/types.rs | 2 +- gix/src/worktree/mod.rs | 15 ++++++----- 29 files changed, 109 insertions(+), 105 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bc8cb1be423..48e8e4da16c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1240,6 +1240,7 @@ dependencies = [ "gix-utils 0.1.5", "gix-validate 0.8.0", "gix-worktree 0.24.0", + "gix-worktree-state", "gix-worktree-stream", "is_ci", "log", diff --git a/gitoxide-core/src/hours/core.rs b/gitoxide-core/src/hours/core.rs index ddc6c4dbb28..eb033f954aa 100644 --- a/gitoxide-core/src/hours/core.rs +++ b/gitoxide-core/src/hours/core.rs @@ -101,8 +101,8 @@ pub fn spawn_tree_delta_threads<'scope>( repo.index_or_load_from_head().map_err(Into::into).and_then(|index| { repo.attributes( &index, - gix::worktree::cache::state::attributes::Source::IdMapping, - gix::worktree::cache::state::ignore::Source::IdMapping, + gix::worktree::stack::state::attributes::Source::IdMapping, + gix::worktree::stack::state::ignore::Source::IdMapping, None, ) .map_err(Into::into) diff --git a/gitoxide-core/src/index/checkout.rs b/gitoxide-core/src/index/checkout.rs index d28423e485b..bdfaa3235f0 100644 --- a/gitoxide-core/src/index/checkout.rs +++ b/gitoxide-core/src/index/checkout.rs @@ -4,7 +4,7 @@ use std::{ }; use anyhow::bail; -use gix::{odb::FindExt, worktree::checkout, Progress}; +use gix::{odb::FindExt, worktree::state::checkout, Progress}; use crate::{ index, @@ -55,7 +55,7 @@ pub fn checkout_exclusive( progress.info(format!("Skipping {num_skipped} DIR/SYMLINK/COMMIT entries")); } - let opts = gix::worktree::checkout::Options { + let opts = gix::worktree::state::checkout::Options { fs: gix::fs::Capabilities::probe(dest_directory), destination_is_initially_empty: true, @@ -86,7 +86,7 @@ pub fn checkout_exclusive( delayed_paths_unknown, delayed_paths_unprocessed, } = match repo { - Some(repo) => gix::worktree::checkout( + Some(repo) => gix::worktree::state::checkout( &mut index, dest_directory, { @@ -109,7 +109,7 @@ pub fn checkout_exclusive( should_interrupt, opts, ), - None => gix::worktree::checkout( + None => gix::worktree::state::checkout( &mut index, dest_directory, |_, buf| { diff --git a/gitoxide-core/src/repository/attributes/query.rs b/gitoxide-core/src/repository/attributes/query.rs index bb90aeccff3..ec777183ee7 100644 --- a/gitoxide-core/src/repository/attributes/query.rs +++ b/gitoxide-core/src/repository/attributes/query.rs @@ -89,16 +89,16 @@ pub(crate) mod function { pub(crate) fn attributes_cache( repo: &gix::Repository, -) -> anyhow::Result<(gix::worktree::Cache, IndexPersistedOrInMemory)> { +) -> anyhow::Result<(gix::worktree::Stack, IndexPersistedOrInMemory)> { let index = repo.index_or_load_from_head()?; let cache = repo.attributes( &index, if repo.is_bare() { - gix::worktree::cache::state::attributes::Source::IdMapping + gix::worktree::stack::state::attributes::Source::IdMapping } else { - gix::worktree::cache::state::attributes::Source::WorktreeThenIdMapping + gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping }, - gix::worktree::cache::state::ignore::Source::IdMapping, + gix::worktree::stack::state::ignore::Source::IdMapping, None, )?; Ok((cache, index)) diff --git a/gitoxide-core/src/repository/clone.rs b/gitoxide-core/src/repository/clone.rs index 7e0530ef899..6fec5801335 100644 --- a/gitoxide-core/src/repository/clone.rs +++ b/gitoxide-core/src/repository/clone.rs @@ -111,7 +111,7 @@ pub(crate) mod function { } }; - if let Some(gix::worktree::checkout::Outcome { collisions, errors, .. }) = outcome { + if let Some(gix::worktree::state::checkout::Outcome { collisions, errors, .. }) = outcome { if !(collisions.is_empty() && errors.is_empty()) { let mut messages = Vec::new(); if !errors.is_empty() { diff --git a/gitoxide-core/src/repository/index/entries.rs b/gitoxide-core/src/repository/index/entries.rs index 5b808db085d..9a0a2825d0d 100644 --- a/gitoxide-core/src/repository/index/entries.rs +++ b/gitoxide-core/src/repository/index/entries.rs @@ -56,22 +56,22 @@ pub(crate) mod function { match attrs { Attributes::WorktreeAndIndex => { if repo.is_bare() { - gix::worktree::cache::state::attributes::Source::IdMapping + gix::worktree::stack::state::attributes::Source::IdMapping } else { - gix::worktree::cache::state::attributes::Source::WorktreeThenIdMapping + gix::worktree::stack::state::attributes::Source::WorktreeThenIdMapping } } - Attributes::Index => gix::worktree::cache::state::attributes::Source::IdMapping, + Attributes::Index => gix::worktree::stack::state::attributes::Source::IdMapping, }, match attrs { Attributes::WorktreeAndIndex => { if repo.is_bare() { - gix::worktree::cache::state::ignore::Source::IdMapping + gix::worktree::stack::state::ignore::Source::IdMapping } else { - gix::worktree::cache::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped + gix::worktree::stack::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped } } - Attributes::Index => gix::worktree::cache::state::ignore::Source::IdMapping, + Attributes::Index => gix::worktree::stack::state::ignore::Source::IdMapping, }, None, ) @@ -203,7 +203,7 @@ pub(crate) mod function { pub excluded: usize, pub with_attributes: usize, pub max_attributes_per_path: usize, - pub cache: Option, + pub cache: Option, } #[cfg(feature = "serde")] diff --git a/gix-archive/tests/archive.rs b/gix-archive/tests/archive.rs index 1a91937ddc6..3eff24c7570 100644 --- a/gix-archive/tests/archive.rs +++ b/gix-archive/tests/archive.rs @@ -11,7 +11,7 @@ mod from_tree { use gix_object::tree::EntryMode; use gix_odb::FindExt; use gix_testtools::bstr::ByteSlice; - use gix_worktree::cache::state::attributes::Source; + use gix_worktree::stack::state::attributes::Source; use crate::hex_to_id; @@ -284,7 +284,7 @@ mod from_tree { Ok(()) } - fn basic() -> gix_testtools::Result<(PathBuf, gix_hash::ObjectId, gix_odb::HandleArc, gix_worktree::Cache)> { + fn basic() -> gix_testtools::Result<(PathBuf, gix_hash::ObjectId, gix_odb::HandleArc, gix_worktree::Stack)> { let dir = gix_testtools::scripted_fixture_read_only("basic.sh")?; let head = { @@ -295,14 +295,14 @@ mod from_tree { let mut collection = Default::default(); let mut buf = Default::default(); - let attributes = gix_worktree::cache::state::Attributes::new( + let attributes = gix_worktree::stack::state::Attributes::new( gix_attributes::Search::new_globals(None::, &mut buf, &mut collection)?, None, Source::WorktreeThenIdMapping, collection, ); - let state = gix_worktree::cache::State::AttributesStack(attributes); - let cache = gix_worktree::Cache::new(&dir, state, Case::Sensitive, Default::default(), Default::default()); + let state = gix_worktree::stack::State::AttributesStack(attributes); + let cache = gix_worktree::Stack::new(&dir, state, Case::Sensitive, Default::default(), Default::default()); Ok((dir, head, odb.into_arc()?, cache)) } diff --git a/gix-filter/tests/pipeline/mod.rs b/gix-filter/tests/pipeline/mod.rs index ba683564f4d..7db3224357d 100644 --- a/gix-filter/tests/pipeline/mod.rs +++ b/gix-filter/tests/pipeline/mod.rs @@ -17,22 +17,22 @@ fn default() -> crate::Result { Ok(()) } -fn attribute_cache(name: &str) -> gix_testtools::Result { +fn attribute_cache(name: &str) -> gix_testtools::Result { let dir = gix_testtools::scripted_fixture_read_only("pipeline_repos.sh")?.join(name); - Ok(gix_worktree::Cache::new( + Ok(gix_worktree::Stack::new( dir, - gix_worktree::cache::State::for_add( - gix_worktree::cache::state::Attributes::new( + gix_worktree::stack::State::for_add( + gix_worktree::stack::state::Attributes::new( Default::default(), None, - gix_worktree::cache::state::attributes::Source::WorktreeThenIdMapping, + gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping, Default::default(), ), - gix_worktree::cache::state::Ignore::new( + gix_worktree::stack::state::Ignore::new( Default::default(), Default::default(), None, - gix_worktree::cache::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, + gix_worktree::stack::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, ), ), Case::Sensitive, @@ -49,7 +49,7 @@ fn pipeline( gix_filter::pipeline::CrlfRoundTripCheck, eol::Configuration, ), -) -> gix_testtools::Result<(gix_worktree::Cache, gix_filter::Pipeline)> { +) -> gix_testtools::Result<(gix_worktree::Stack, gix_filter::Pipeline)> { let cache = attribute_cache(name)?; let (drivers, encodings_with_roundtrip_check, crlf_roundtrip_check, eol_config) = init(); let pipe = gix_filter::Pipeline::new( diff --git a/gix-status/src/index_as_worktree/types.rs b/gix-status/src/index_as_worktree/types.rs index 3d488d24ef4..10ff5c28d24 100644 --- a/gix-status/src/index_as_worktree/types.rs +++ b/gix-status/src/index_as_worktree/types.rs @@ -1,6 +1,6 @@ use bstr::BStr; -/// The error returned by [`status()`][crate::status()]. +/// The error returned by [`status()`](crate::index_as_worktree()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { @@ -40,7 +40,7 @@ pub enum Change { /// Indicates that one of the stat changes was an executable bit change /// which is a significant change itself. executable_bit_changed: bool, - /// The output of the [`CompareBlobs`][crate::status::content::CompareBlobs] run on this entry. + /// The output of the [`CompareBlobs`](crate::index_as_worktree::content::CompareBlobs) run on this entry. /// If there is no content change and only the executable bit /// changed than this is `None`. content_change: Option, diff --git a/gix-worktree-state/src/checkout/chunk.rs b/gix-worktree-state/src/checkout/chunk.rs index 445ed9a9680..46d09bac28c 100644 --- a/gix-worktree-state/src/checkout/chunk.rs +++ b/gix-worktree-state/src/checkout/chunk.rs @@ -7,7 +7,7 @@ use bstr::{BStr, BString}; use gix_hash::oid; use crate::{checkout, checkout::entry}; -use gix_worktree::Cache; +use gix_worktree::Stack; mod reduce { use std::marker::PhantomData; @@ -92,7 +92,7 @@ pub struct Outcome<'a> { #[derive(Clone)] pub struct Context { pub find: Find, - pub path_cache: Cache, + pub path_cache: Stack, pub filters: gix_filter::Pipeline, pub buf: Vec, pub options: Options, diff --git a/gix-worktree-state/src/checkout/entry.rs b/gix-worktree-state/src/checkout/entry.rs index e699d948958..68cb796aa54 100644 --- a/gix-worktree-state/src/checkout/entry.rs +++ b/gix-worktree-state/src/checkout/entry.rs @@ -10,11 +10,11 @@ use gix_hash::oid; use gix_index::{entry::Stat, Entry}; use io_close::Close; -use gix_worktree::Cache; +use gix_worktree::Stack; pub struct Context<'a, Find> { pub find: &'a mut Find, - pub path_cache: &'a mut Cache, + pub path_cache: &'a mut Stack, pub filters: &'a mut gix_filter::Pipeline, pub buf: &'a mut Vec, } diff --git a/gix-worktree-state/src/checkout/function.rs b/gix-worktree-state/src/checkout/function.rs index ba825e8431a..b29a6e69356 100644 --- a/gix-worktree-state/src/checkout/function.rs +++ b/gix-worktree-state/src/checkout/function.rs @@ -4,7 +4,7 @@ use gix_features::{interrupt, parallel::in_parallel_with_finalize, progress::Pro use gix_hash::oid; use crate::checkout::chunk; -use gix_worktree::{cache, Cache}; +use gix_worktree::{stack, Stack}; /// Checkout the entire `index` into `dir`, and resolve objects found in index entries with `find` to write their content to their /// respective path in `dir`. @@ -66,12 +66,12 @@ where None, ); - let state = cache::State::for_checkout(options.overwrite_existing, std::mem::take(&mut options.attributes)); + let state = stack::State::for_checkout(options.overwrite_existing, std::mem::take(&mut options.attributes)); let attribute_files = state.id_mappings_from_index(index, paths, case); let mut ctx = chunk::Context { buf: Vec::new(), options: (&options).into(), - path_cache: Cache::new(dir, state, case, Vec::with_capacity(512), attribute_files), + path_cache: Stack::new(dir, state, case, Vec::with_capacity(512), attribute_files), filters: options.filters, find, }; diff --git a/gix-worktree-state/src/checkout/mod.rs b/gix-worktree-state/src/checkout/mod.rs index e01f7bff74a..70e46c84e2e 100644 --- a/gix-worktree-state/src/checkout/mod.rs +++ b/gix-worktree-state/src/checkout/mod.rs @@ -60,7 +60,7 @@ pub struct Options { /// Control how stat comparisons are made when checking if a file is fresh. pub stat_options: stat::Options, /// A stack of attributes to use with the filesystem cache to use as driver for filters. - pub attributes: gix_worktree::cache::state::Attributes, + pub attributes: gix_worktree::stack::state::Attributes, /// The filter pipeline to use for applying mandatory filters before writing to the worktree. pub filters: gix_filter::Pipeline, /// Control how long-running processes may use the 'delay' capability. diff --git a/gix-worktree-state/tests/state/checkout.rs b/gix-worktree-state/tests/state/checkout.rs index 52e453dd23e..ff06812b528 100644 --- a/gix-worktree-state/tests/state/checkout.rs +++ b/gix-worktree-state/tests/state/checkout.rs @@ -11,7 +11,7 @@ use gix_features::progress; use gix_object::bstr::ByteSlice; use gix_odb::FindExt; use gix_testtools::tempfile::TempDir; -use gix_worktree::checkout::Collision; +use gix_worktree_state::checkout::Collision; use once_cell::sync::Lazy; use crate::fixture_path; @@ -454,18 +454,18 @@ pub fn dir_structure>(path: P) -> Vec crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree::checkout::Outcome)> { +) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree_state::checkout::Outcome)> { checkout_index_in_tmp_dir_opts(opts, name, |_d| true, |_| Ok(())) } fn checkout_index_in_tmp_dir_opts( - opts: gix_worktree::checkout::Options, + opts: gix_worktree_state::checkout::Options, name: &str, mut allow_return_object: impl FnMut(&gix_hash::oid) -> bool + Send + Clone, prep_dest: impl Fn(&Path) -> std::io::Result<()>, -) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree::checkout::Outcome)> { +) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree_state::checkout::Outcome)> { let source_tree = fixture_path(name); let git_dir = source_tree.join(".git"); let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default())?; @@ -473,7 +473,7 @@ fn checkout_index_in_tmp_dir_opts( let destination = gix_testtools::tempfile::tempdir_in(std::env::current_dir()?)?; prep_dest(destination.path()).expect("preparation must succeed"); - let outcome = gix_worktree::checkout( + let outcome = gix_worktree_state::checkout( &mut index, destination.path(), move |oid, buf| { @@ -501,8 +501,8 @@ fn probe_gitoxide_dir() -> crate::Result { )) } -fn opts_from_probe() -> gix_worktree::checkout::Options { - gix_worktree::checkout::Options { +fn opts_from_probe() -> gix_worktree_state::checkout::Options { + gix_worktree_state::checkout::Options { fs: probe_gitoxide_dir().unwrap(), destination_is_initially_empty: true, thread_limit: gix_features::parallel::num_threads(None).into(), diff --git a/gix-worktree-stream/tests/stream.rs b/gix-worktree-stream/tests/stream.rs index e70c5b11837..22561bb7076 100644 --- a/gix-worktree-stream/tests/stream.rs +++ b/gix-worktree-stream/tests/stream.rs @@ -15,7 +15,7 @@ mod from_tree { use gix_object::{bstr::ByteSlice, tree::EntryMode}; use gix_odb::FindExt; use gix_testtools::once_cell::sync::Lazy; - use gix_worktree::cache::state::attributes::Source; + use gix_worktree::stack::state::attributes::Source; use crate::hex_to_id; @@ -231,7 +231,7 @@ mod from_tree { Ok(()) } - fn basic() -> gix_testtools::Result<(PathBuf, gix_hash::ObjectId, gix_odb::HandleArc, gix_worktree::Cache)> { + fn basic() -> gix_testtools::Result<(PathBuf, gix_hash::ObjectId, gix_odb::HandleArc, gix_worktree::Stack)> { let dir = gix_testtools::scripted_fixture_read_only("basic.sh")?; let head = { @@ -242,14 +242,14 @@ mod from_tree { let mut collection = Default::default(); let mut buf = Default::default(); - let attributes = gix_worktree::cache::state::Attributes::new( + let attributes = gix_worktree::stack::state::Attributes::new( gix_attributes::Search::new_globals(None::, &mut buf, &mut collection)?, None, Source::WorktreeThenIdMapping, collection, ); - let state = gix_worktree::cache::State::AttributesStack(attributes); - let cache = gix_worktree::Cache::new(&dir, state, Case::Sensitive, Default::default(), Default::default()); + let state = gix_worktree::stack::State::AttributesStack(attributes); + let cache = gix_worktree::Stack::new(&dir, state, Case::Sensitive, Default::default(), Default::default()); Ok((dir, head, odb.into_arc()?, cache)) } diff --git a/gix-worktree/src/stack/mod.rs b/gix-worktree/src/stack/mod.rs index d38adefa1e4..6892f99025d 100644 --- a/gix-worktree/src/stack/mod.rs +++ b/gix-worktree/src/stack/mod.rs @@ -81,7 +81,7 @@ impl Stack { /// symlinks are in that path. /// Unless `is_dir` is known with `Some(…)`, then `relative` points to a directory itself in which case the entire resulting /// path is created as directory. If it's not known it is assumed to be a file. - /// `find` maybe used to lookup objects from an [id mapping][crate::cache::State::id_mappings_from_index()], with mappnigs + /// `find` maybe used to lookup objects from an [id mapping][crate::stack::State::id_mappings_from_index()], with mappnigs /// /// Provide access to cached information for that `relative` path via the returned platform. pub fn at_path( @@ -110,7 +110,7 @@ impl Stack { /// Obtain a platform for lookups from a repo-`relative` path, typically obtained from an index entry. `is_dir` should reflect /// whether it's a directory or not, or left at `None` if unknown. - /// `find` maybe used to lookup objects from an [id mapping][crate::cache::State::id_mappings_from_index()]. + /// `find` maybe used to lookup objects from an [id mapping][crate::stack::State::id_mappings_from_index()]. /// All effects are similar to [`at_path()`][Self::at_path()]. /// /// If `relative` ends with `/` and `is_dir` is `None`, it is automatically assumed to be a directory. diff --git a/gix-worktree/src/stack/state/attributes.rs b/gix-worktree/src/stack/state/attributes.rs index 79b05edc44b..ea6b0f61a30 100644 --- a/gix-worktree/src/stack/state/attributes.rs +++ b/gix-worktree/src/stack/state/attributes.rs @@ -23,7 +23,7 @@ pub struct Statistics { /// Decide where to read `.gitattributes` files from. /// /// To Retrieve attribute files from id mappings, see -/// [State::id_mappings_from_index()][crate::cache::State::id_mappings_from_index()]. +/// [State::id_mappings_from_index()][crate::stack::State::id_mappings_from_index()]. /// /// These mappings are typically produced from an index. /// If a tree should be the source, build an attribute list from a tree instead, or convert a tree to an index. @@ -230,7 +230,7 @@ impl Stack { } /// Return the metadata collection that enables initializing attribute match outcomes as done in - /// [`attribute_matches()`][Cache::attribute_matches()] or [`selected_attribute_matches()`][Cache::selected_attribute_matches()] + /// [`attribute_matches()`][Stack::attribute_matches()] or [`selected_attribute_matches()`][Stack::selected_attribute_matches()] /// /// ### Panics /// diff --git a/gix-worktree/src/stack/state/ignore.rs b/gix-worktree/src/stack/state/ignore.rs index 2dbddc520fd..0945a3c9403 100644 --- a/gix-worktree/src/stack/state/ignore.rs +++ b/gix-worktree/src/stack/state/ignore.rs @@ -12,7 +12,7 @@ use crate::{ #[derive(Default, Debug, Clone, Copy)] pub enum Source { /// Retrieve ignore files from id mappings, see - /// [State::id_mappings_from_index()][crate::cache::State::id_mappings_from_index()]. + /// [State::id_mappings_from_index()][crate::stack::State::id_mappings_from_index()]. /// /// These mappings are typically produced from an index. /// If a tree should be the source, build an attribute list from a tree instead, or convert a tree to an index. diff --git a/gix/Cargo.toml b/gix/Cargo.toml index b3bd08d53a1..703c9c8e989 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -162,6 +162,7 @@ gix-credentials = { version = "^0.17.1", path = "../gix-credentials" } gix-prompt = { version = "^0.5.5", path = "../gix-prompt" } gix-index = { version = "^0.22.0", path = "../gix-index" } gix-worktree = { version = "^0.24.0", path = "../gix-worktree" } +gix-worktree-state = { version = "^0.1.0", path = "../gix-worktree-state" } gix-hashtable = { version = "^0.2.4", path = "../gix-hashtable" } gix-commitgraph = { version = "^0.18.2", path = "../gix-commitgraph" } gix-pathspec = { version = "^0.1.0", path = "../gix-pathspec" } diff --git a/gix/src/clone/checkout.rs b/gix/src/clone/checkout.rs index 8657f9a3aa9..a51b7197146 100644 --- a/gix/src/clone/checkout.rs +++ b/gix/src/clone/checkout.rs @@ -27,7 +27,8 @@ pub mod main_worktree { CheckoutOptions(#[from] crate::config::checkout_options::Error), #[error(transparent)] IndexCheckout( - #[from] gix_worktree::checkout::Error>, + #[from] + gix_worktree_state::checkout::Error>, ), #[error("Failed to reopen object database as Arc (only if thread-safety wasn't compiled in)")] OpenArcOdb(#[from] std::io::Error), @@ -68,7 +69,7 @@ pub mod main_worktree { &mut self, mut progress: impl crate::Progress, should_interrupt: &AtomicBool, - ) -> Result<(Repository, gix_worktree::checkout::Outcome), Error> { + ) -> Result<(Repository, gix_worktree_state::checkout::Outcome), Error> { let _span = gix_trace::coarse!("gix::clone::PrepareCheckout::main_worktree()"); let repo = self .repo @@ -82,7 +83,7 @@ pub mod main_worktree { None => { return Ok(( self.repo.take().expect("still present"), - gix_worktree::checkout::Outcome::default(), + gix_worktree_state::checkout::Outcome::default(), )) } }; @@ -95,7 +96,7 @@ pub mod main_worktree { let mut opts = repo .config - .checkout_options(repo, gix_worktree::cache::state::attributes::Source::IdMapping)?; + .checkout_options(repo, gix_worktree::stack::state::attributes::Source::IdMapping)?; opts.destination_is_initially_empty = true; let mut files = progress.add_child_with_id("checkout", ProgressId::CheckoutFiles.into()); @@ -105,7 +106,7 @@ pub mod main_worktree { bytes.init(None, crate::progress::bytes()); let start = std::time::Instant::now(); - let outcome = gix_worktree::checkout( + let outcome = gix_worktree_state::checkout( &mut index, workdir, { diff --git a/gix/src/config/cache/access.rs b/gix/src/config/cache/access.rs index 35220ce7644..7ca9be0d59c 100644 --- a/gix/src/config/cache/access.rs +++ b/gix/src/config/cache/access.rs @@ -160,8 +160,8 @@ impl Cache { pub(crate) fn checkout_options( &self, repo: &Repository, - attributes_source: gix_worktree::cache::state::attributes::Source, - ) -> Result { + attributes_source: gix_worktree::stack::state::attributes::Source, + ) -> Result { let git_dir = repo.git_dir(); let thread_limit = self.apply_leniency( self.resolved @@ -189,7 +189,7 @@ impl Cache { } else { gix_filter::driver::apply::Delay::Forbid }; - Ok(gix_worktree::checkout::Options { + Ok(gix_worktree_state::checkout::Options { filter_process_delay, filters, attributes: self @@ -219,14 +219,14 @@ impl Cache { &self, git_dir: &std::path::Path, overrides: Option, - source: gix_worktree::cache::state::ignore::Source, + source: gix_worktree::stack::state::ignore::Source, buf: &mut Vec, - ) -> Result { + ) -> Result { let excludes_file = match self.excludes_file().transpose()? { Some(user_path) => Some(user_path), None => self.xdg_config_path("ignore")?, }; - Ok(gix_worktree::cache::state::Ignore::new( + Ok(gix_worktree::stack::state::Ignore::new( overrides.unwrap_or_default(), gix_ignore::Search::from_git_dir(git_dir, excludes_file, buf)?, None, @@ -237,9 +237,9 @@ impl Cache { pub(crate) fn assemble_attribute_globals( &self, git_dir: &std::path::Path, - source: gix_worktree::cache::state::attributes::Source, + source: gix_worktree::stack::state::attributes::Source, attributes: crate::open::permissions::Attributes, - ) -> Result<(gix_worktree::cache::state::Attributes, Vec), config::attribute_stack::Error> { + ) -> Result<(gix_worktree::stack::state::Attributes, Vec), config::attribute_stack::Error> { let configured_or_user_attributes = match self .trusted_file_path("core", None, Core::ATTRIBUTES_FILE.name) .transpose()? @@ -265,7 +265,7 @@ impl Cache { let info_attributes_path = git_dir.join("info").join("attributes"); let mut buf = Vec::new(); let mut collection = gix_attributes::search::MetadataCollection::default(); - let state = gix_worktree::cache::state::Attributes::new( + let state = gix_worktree::stack::state::Attributes::new( gix_attributes::Search::new_globals(attribute_files, &mut buf, &mut collection)?, Some(info_attributes_path), source, diff --git a/gix/src/filter.rs b/gix/src/filter.rs index 073ea6328b9..27447e12bc7 100644 --- a/gix/src/filter.rs +++ b/gix/src/filter.rs @@ -67,7 +67,7 @@ pub mod pipeline { #[derive(Clone)] pub struct Pipeline<'repo> { inner: gix_filter::Pipeline, - cache: gix_worktree::Cache, + cache: gix_worktree::Stack, repo: &'repo Repository, } @@ -110,7 +110,7 @@ impl<'repo> Pipeline<'repo> { /// Create a new instance by extracting all necessary information and configuration from a `repo` along with `cache` for accessing /// attributes. The `index` is used for some filters which may access it under very specific circumstances. - pub fn new(repo: &'repo Repository, cache: gix_worktree::Cache) -> Result { + pub fn new(repo: &'repo Repository, cache: gix_worktree::Stack) -> Result { let pipeline = gix_filter::Pipeline::new(cache.attributes_collection(), Self::options(repo)?); Ok(Pipeline { inner: pipeline, @@ -120,7 +120,7 @@ impl<'repo> Pipeline<'repo> { } /// Detach the repository and obtain the individual functional parts. - pub fn into_parts(self) -> (gix_filter::Pipeline, gix_worktree::Cache) { + pub fn into_parts(self) -> (gix_filter::Pipeline, gix_worktree::Stack) { (self.inner, self.cache) } } diff --git a/gix/src/pathspec.rs b/gix/src/pathspec.rs index 16649898d2f..587aef4bf93 100644 --- a/gix/src/pathspec.rs +++ b/gix/src/pathspec.rs @@ -43,7 +43,7 @@ impl<'repo> Pathspec<'repo> { repo: &'repo Repository, patterns: impl IntoIterator>, inherit_ignore_case: bool, - make_attributes: impl FnOnce() -> Result>, + make_attributes: impl FnOnce() -> Result>, ) -> Result { let mut defaults = repo.pathspec_defaults()?; if inherit_ignore_case && repo.config.fs_capabilities()?.ignore_case { @@ -67,9 +67,9 @@ impl<'repo> Pathspec<'repo> { Ok(Self { repo, search, cache }) } /// Turn ourselves into the functional parts for direct usage. - /// Note that the [`cache`](gix_worktree::Cache) is only set if one of the [`search` patterns](Search) + /// Note that the [`cache`](gix_worktree::Stack) is only set if one of the [`search` patterns](Search) /// is specifying attributes to match for. - pub fn into_parts(self) -> (Search, Option) { + pub fn into_parts(self) -> (Search, Option) { (self.search, self.cache) } } @@ -77,7 +77,7 @@ impl<'repo> Pathspec<'repo> { /// Access impl<'repo> Pathspec<'repo> { /// Return the attributes cache which is used when matching attributes in pathspecs, or `None` if none of the pathspecs require that. - pub fn attributes(&self) -> Option<&gix_worktree::Cache> { + pub fn attributes(&self) -> Option<&gix_worktree::Stack> { self.cache.as_ref() } diff --git a/gix/src/repository/attributes.rs b/gix/src/repository/attributes.rs index 23fd65203d9..e875ef0b0d8 100644 --- a/gix/src/repository/attributes.rs +++ b/gix/src/repository/attributes.rs @@ -29,10 +29,10 @@ impl Repository { pub fn attributes( &self, index: &gix_index::State, - attributes_source: gix_worktree::cache::state::attributes::Source, - ignore_source: gix_worktree::cache::state::ignore::Source, + attributes_source: gix_worktree::stack::state::attributes::Source, + ignore_source: gix_worktree::stack::state::ignore::Source, exclude_overrides: Option, - ) -> Result { + ) -> Result { let case = if self.config.ignore_case { gix_glob::pattern::Case::Fold } else { @@ -46,9 +46,9 @@ impl Repository { let ignore = self.config .assemble_exclude_globals(self.git_dir(), exclude_overrides, ignore_source, &mut buf)?; - let state = gix_worktree::cache::State::AttributesAndIgnoreStack { attributes, ignore }; + let state = gix_worktree::stack::State::AttributesAndIgnoreStack { attributes, ignore }; let attribute_list = state.id_mappings_from_index(index, index.path_backing(), case); - Ok(gix_worktree::Cache::new( + Ok(gix_worktree::Stack::new( // this is alright as we don't cause mutation of that directory, it's virtual. self.work_dir().unwrap_or(self.git_dir()), state, @@ -62,8 +62,8 @@ impl Repository { pub fn attributes_only( &self, index: &gix_index::State, - attributes_source: gix_worktree::cache::state::attributes::Source, - ) -> Result { + attributes_source: gix_worktree::stack::state::attributes::Source, + ) -> Result { let case = if self.config.ignore_case { gix_glob::pattern::Case::Fold } else { @@ -74,9 +74,9 @@ impl Repository { attributes_source, self.options.permissions.attributes, )?; - let state = gix_worktree::cache::State::AttributesStack(attributes); + let state = gix_worktree::stack::State::AttributesStack(attributes); let attribute_list = state.id_mappings_from_index(index, index.path_backing(), case); - Ok(gix_worktree::Cache::new( + Ok(gix_worktree::Stack::new( // this is alright as we don't cause mutation of that directory, it's virtual. self.work_dir().unwrap_or(self.git_dir()), state, @@ -105,8 +105,8 @@ impl Repository { &self, index: &gix_index::State, overrides: Option, - source: gix_worktree::cache::state::ignore::Source, - ) -> Result { + source: gix_worktree::stack::state::ignore::Source, + ) -> Result { let case = if self.config.ignore_case { gix_glob::pattern::Case::Fold } else { @@ -116,9 +116,9 @@ impl Repository { let ignore = self .config .assemble_exclude_globals(self.git_dir(), overrides, source, &mut buf)?; - let state = gix_worktree::cache::State::IgnoreStack(ignore); + let state = gix_worktree::stack::State::IgnoreStack(ignore); let attribute_list = state.id_mappings_from_index(index, index.path_backing(), case); - Ok(gix_worktree::Cache::new( + Ok(gix_worktree::Stack::new( // this is alright as we don't cause mutation of that directory, it's virtual. self.work_dir().unwrap_or(self.git_dir()), state, diff --git a/gix/src/repository/filter.rs b/gix/src/repository/filter.rs index 77dd9005d62..6f08309ce0f 100644 --- a/gix/src/repository/filter.rs +++ b/gix/src/repository/filter.rs @@ -49,13 +49,13 @@ impl Repository { }, Ok, )?)?; - let cache = self.attributes_only(&index, gix_worktree::cache::state::attributes::Source::IdMapping)?; + let cache = self.attributes_only(&index, gix_worktree::stack::state::attributes::Source::IdMapping)?; (cache, IndexPersistedOrInMemory::InMemory(index)) } else { let index = self.index()?; let cache = self.attributes_only( &index, - gix_worktree::cache::state::attributes::Source::WorktreeThenIdMapping, + gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping, )?; (cache, IndexPersistedOrInMemory::Persisted(index)) }; diff --git a/gix/src/repository/pathspec.rs b/gix/src/repository/pathspec.rs index 5a7e8245fec..4b2ab8c331a 100644 --- a/gix/src/repository/pathspec.rs +++ b/gix/src/repository/pathspec.rs @@ -17,7 +17,7 @@ impl Repository { Pathspec::new(self, patterns, inherit_ignore_case, || { self.attributes_only( index, - gix_worktree::cache::state::attributes::Source::WorktreeThenIdMapping, + gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping, ) .map_err(Into::into) }) diff --git a/gix/src/repository/worktree.rs b/gix/src/repository/worktree.rs index c182e624342..b05c4fcbf48 100644 --- a/gix/src/repository/worktree.rs +++ b/gix/src/repository/worktree.rs @@ -74,7 +74,7 @@ impl crate::Repository { // TODO(perf): when loading a non-HEAD tree, we effectively traverse the tree twice. This is usually fast though, and sharing // an object cache between the copies of the ODB handles isn't trivial and needs a lock. let index = self.index_from_tree(&id)?; - let mut cache = self.attributes_only(&index, gix_worktree::cache::state::attributes::Source::IdMapping)?; + let mut cache = self.attributes_only(&index, gix_worktree::stack::state::attributes::Source::IdMapping)?; let pipeline = gix_filter::Pipeline::new(cache.attributes_collection(), crate::filter::Pipeline::options(self)?); let objects = self.objects.clone().into_arc().expect("TBD error handling"); diff --git a/gix/src/types.rs b/gix/src/types.rs index 581ea76e92b..076512a4b58 100644 --- a/gix/src/types.rs +++ b/gix/src/types.rs @@ -202,7 +202,7 @@ pub struct Remote<'repo> { pub struct Pathspec<'repo> { pub(crate) repo: &'repo Repository, /// The cache to power attribute access. It's only initialized if we have a pattern with attributes. - pub(crate) cache: Option, + pub(crate) cache: Option, /// The prepared search to use for checking matches. pub(crate) search: gix_pathspec::Search, } diff --git a/gix/src/worktree/mod.rs b/gix/src/worktree/mod.rs index 3fbdca49993..bd808373927 100644 --- a/gix/src/worktree/mod.rs +++ b/gix/src/worktree/mod.rs @@ -3,6 +3,7 @@ use std::path::PathBuf; #[cfg(feature = "worktree-archive")] pub use gix_archive as archive; pub use gix_worktree::*; +pub use gix_worktree_state as state; #[cfg(feature = "worktree-stream")] pub use gix_worktree_stream as stream; @@ -120,12 +121,12 @@ pub mod excludes { /// /// When only excludes are desired, this is the most efficient way to obtain them. Otherwise use /// [`Worktree::attributes()`][crate::Worktree::attributes()] for accessing both attributes and excludes. - pub fn excludes(&self, overrides: Option) -> Result { + pub fn excludes(&self, overrides: Option) -> Result { let index = self.index()?; Ok(self.parent.excludes( &index, overrides, - gix_worktree::cache::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, + gix_worktree::stack::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, )?) } } @@ -150,23 +151,23 @@ pub mod attributes { /// /// * `$XDG_CONFIG_HOME/…/ignore|attributes` if `core.excludesFile|attributesFile` is *not* set, otherwise use the configured file. /// * `$GIT_DIR/info/exclude|attributes` if present. - pub fn attributes(&self, overrides: Option) -> Result { + pub fn attributes(&self, overrides: Option) -> Result { let index = self.index()?; Ok(self.parent.attributes( &index, - gix_worktree::cache::state::attributes::Source::WorktreeThenIdMapping, - gix_worktree::cache::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, + gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping, + gix_worktree::stack::state::ignore::Source::WorktreeThenIdMappingIfNotSkipped, overrides, )?) } /// Like [attributes()][Self::attributes()], but without access to exclude/ignore information. - pub fn attributes_only(&self) -> Result { + pub fn attributes_only(&self) -> Result { let index = self.index()?; self.parent .attributes_only( &index, - gix_worktree::cache::state::attributes::Source::WorktreeThenIdMapping, + gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping, ) .map_err(|err| Error::CreateCache(err.into())) }