-
Notifications
You must be signed in to change notification settings - Fork 427
/
build.py
3164 lines (2773 loc) · 151 KB
/
build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
'''
Module that does most of the heavy lifting for the ``conda build`` command.
'''
from __future__ import absolute_import, division, print_function
from collections import deque, OrderedDict
import fnmatch
import glob2
import io
import json
import os
import warnings
from os.path import isdir, isfile, islink, join, dirname
import random
import re
import shutil
import stat
import string
import subprocess
import sys
import time
# this is to compensate for a requests idna encoding error. Conda is a better place to fix,
# eventually
# exception is raises: "LookupError: unknown encoding: idna"
# http://stackoverflow.com/a/13057751/1170370
import encodings.idna # NOQA
from bs4 import UnicodeDammit
import yaml
import conda_package_handling.api
# used to get version
from .conda_interface import env_path_backup_var_exists, conda_45, conda_46
from .conda_interface import PY3
from .conda_interface import prefix_placeholder
from .conda_interface import TemporaryDirectory
from .conda_interface import VersionOrder
from .conda_interface import text_type
from .conda_interface import CrossPlatformStLink
from .conda_interface import PathType, FileMode
from .conda_interface import EntityEncoder
from .conda_interface import get_rc_urls
from .conda_interface import url_path
from .conda_interface import root_dir
from .conda_interface import conda_private
from .conda_interface import MatchSpec
from .conda_interface import reset_context
from .conda_interface import context
from .conda_interface import UnsatisfiableError
from .conda_interface import NoPackagesFoundError
from .conda_interface import CondaError
from .conda_interface import pkgs_dirs
from .utils import env_var, glob, tmp_chdir, CONDA_TARBALL_EXTENSIONS
from conda_build import environ, source, tarcheck, utils
from conda_build.index import get_build_index, update_index
from conda_build.render import (output_yaml, bldpkg_path, render_recipe, reparse, distribute_variants,
expand_outputs, try_download, execute_download_actions,
add_upstream_pins)
import conda_build.os_utils.external as external
from conda_build.metadata import FIELDS, MetaData, default_structs
from conda_build.post import (post_process, post_build,
fix_permissions, get_build_metadata)
from conda_build.exceptions import indent, DependencyNeedsBuildingError, CondaBuildException
from conda_build.variants import (set_language_env_vars, dict_of_lists_to_list_of_dicts,
get_package_variants)
from conda_build.create_test import create_all_test_files
import conda_build.noarch_python as noarch_python
from conda import __version__ as conda_version
from conda_build import __version__ as conda_build_version
if sys.platform == 'win32':
import conda_build.windows as windows
if 'bsd' in sys.platform:
shell_path = '/bin/sh'
elif utils.on_win:
shell_path = 'bash'
else:
shell_path = '/bin/bash'
def stats_key(metadata, desc):
# get the build string from whatever conda-build makes of the configuration
used_loop_vars = metadata.get_used_loop_vars()
build_vars = '-'.join([k + '_' + str(metadata.config.variant[k]) for k in used_loop_vars
if k != 'target_platform'])
# kind of a special case. Target platform determines a lot of output behavior, but may not be
# explicitly listed in the recipe.
tp = metadata.config.variant.get('target_platform')
if tp and tp != metadata.config.subdir and 'target_platform' not in build_vars:
build_vars += '-target_' + tp
key = [metadata.name(), metadata.version()]
if build_vars:
key.append(build_vars)
key = "-".join(key)
key = desc + key
return key
def seconds_to_text(secs):
m, s = divmod(secs, 60)
h, m = divmod(int(m), 60)
return "{:d}:{:02d}:{:04.1f}".format(h, m, s)
def log_stats(stats_dict, descriptor):
print("\nResource usage statistics from {}:".format(descriptor))
print(" Process count: {}".format(stats_dict.get('processes', 1)))
if stats_dict.get('cpu_sys'):
print(" CPU time: Sys={}, User={}".format(seconds_to_text(stats_dict.get('cpu_sys', 0)),
seconds_to_text(stats_dict.get('cpu_user', 0))))
else:
print(" CPU time: unavailable")
if stats_dict.get('rss'):
print(" Memory: {}".format(utils.bytes2human(stats_dict.get('rss', 0))))
else:
print(" Memory: unavailable")
print(" Disk usage: {}".format(utils.bytes2human(stats_dict['disk'])))
print(" Time elapsed: {}\n".format(seconds_to_text(stats_dict['elapsed'])))
def create_post_scripts(m):
'''
Create scripts to run after build step
'''
ext = '.bat' if utils.on_win else '.sh'
for tp in 'pre-link', 'post-link', 'pre-unlink':
# To have per-output link scripts they must be prefixed by the output name or be explicitly
# specified in the build section
is_output = 'package:' not in m.get_recipe_text()
scriptname = tp
if is_output:
if m.meta.get('build', {}).get(tp, ''):
scriptname = m.meta['build'][tp]
else:
scriptname = m.name() + '-' + tp
scriptname += ext
dst_name = '.' + m.name() + '-' + tp + ext
src = join(m.path, scriptname)
if isfile(src):
dst_dir = join(m.config.host_prefix,
'Scripts' if m.config.host_subdir.startswith('win-') else 'bin')
if not isdir(dst_dir):
os.makedirs(dst_dir, 0o775)
dst = join(dst_dir, dst_name)
utils.copy_into(src, dst, m.config.timeout, locking=m.config.locking)
os.chmod(dst, 0o775)
def prefix_replacement_excluded(path):
if path.endswith(('.pyc', '.pyo')) or not isfile(path):
return True
if sys.platform != 'darwin' and islink(path):
# OSX does not allow hard-linking symbolic links, so we cannot
# skip symbolic links (as we can on Linux)
return True
return False
def have_prefix_files(files, prefix):
'''
Yields files that contain the current prefix in them, and modifies them
to replace the prefix with a placeholder.
:param files: Filenames to check for instances of prefix
:type files: list of tuples containing strings (prefix, mode, filename)
'''
prefix_bytes = prefix.encode(utils.codec)
prefix_placeholder_bytes = prefix_placeholder.encode(utils.codec)
searches = {prefix: prefix_bytes}
if utils.on_win:
# some windows libraries use unix-style path separators
forward_slash_prefix = prefix.replace('\\', '/')
forward_slash_prefix_bytes = forward_slash_prefix.encode(utils.codec)
searches[forward_slash_prefix] = forward_slash_prefix_bytes
# some windows libraries have double backslashes as escaping
double_backslash_prefix = prefix.replace('\\', '\\\\')
double_backslash_prefix_bytes = double_backslash_prefix.encode(utils.codec)
searches[double_backslash_prefix] = double_backslash_prefix_bytes
searches[prefix_placeholder] = prefix_placeholder_bytes
min_prefix = min([len(k) for k, _ in searches.items()])
# mm.find is incredibly slow, so ripgrep is used to pre-filter the list.
# Really, ripgrep could be used on its own with a bit more work though.
rg_matches = []
prefix_len = len(prefix) + 1
rg = external.find_executable('rg')
if rg:
for rep_prefix, _ in searches.items():
try:
args = [rg,
'--unrestricted',
'--no-heading',
'--with-filename',
'--files-with-matches',
'--fixed-strings',
'--text',
rep_prefix,
prefix]
matches = subprocess.check_output(args)
rg_matches.extend(matches.decode('utf-8').replace('\r\n', '\n').splitlines())
except subprocess.CalledProcessError:
continue
# HACK: this is basically os.path.relpath, just simpler and faster
# NOTE: path normalization needs to be in sync with create_info_files
if utils.on_win:
rg_matches = [rg_match.replace('\\', '/')[prefix_len:] for rg_match in rg_matches]
else:
rg_matches = [rg_match[prefix_len:] for rg_match in rg_matches]
else:
print("WARNING: Detecting which files contain PREFIX is slow, installing ripgrep makes it faster."
" 'conda install ripgrep'")
for f in files:
if os.path.isabs(f):
f = f[prefix_len:]
if rg_matches and f not in rg_matches:
continue
path = os.path.join(prefix, f)
if prefix_replacement_excluded(path):
continue
# dont try to mmap an empty file, and no point checking files that are smaller
# than the smallest prefix.
if os.stat(path).st_size < min_prefix:
continue
try:
fi = open(path, 'rb+')
except IOError:
log = utils.get_logger(__name__)
log.warn("failed to open %s for detecting prefix. Skipping it." % f)
continue
try:
mm = utils.mmap_mmap(fi.fileno(), 0, tagname=None, flags=utils.mmap_MAP_PRIVATE)
except OSError:
mm = fi.read()
mode = 'binary' if mm.find(b'\x00') != -1 else 'text'
if mode == 'text':
# TODO :: Ask why we do not do this on Windows too?!
if not utils.on_win and mm.find(prefix_bytes) != -1:
# Use the placeholder for maximal backwards compatibility, and
# to minimize the occurrences of usernames appearing in built
# packages.
data = mm[:]
mm.close()
fi.close()
rewrite_file_with_new_prefix(path, data, prefix_bytes, prefix_placeholder_bytes)
fi = open(path, 'rb+')
mm = utils.mmap_mmap(fi.fileno(), 0, tagname=None, flags=utils.mmap_MAP_PRIVATE)
for rep_prefix, rep_prefix_bytes in searches.items():
if mm.find(rep_prefix_bytes) != -1:
yield (rep_prefix, mode, f)
mm.close()
fi.close()
# It may be that when using the list form of passing args to subprocess
# what matters is the number of arguments rather than the accumulated
# string length. In that case, len(l[i]) should become 1, and we should
# pass this in instead. It could also depend on the platform. We should
# test this!
def chunks(l, n):
# For item i in a range that is a length of l,
size = 0
start = 0
for i in range(0, len(l)):
# + 3 incase a shell is used: 1 space and 2 quotes.
size = size + len(l[i]) + 3
if i == len(l) - 1:
yield l[start:i + 1]
elif size > n:
yield l[start:i + 1]
start = i
size = 0
def get_bytes_or_text_as_bytes(parent):
if 'bytes' in parent:
return parent['bytes']
return parent['text'].encode('utf-8')
def regex_files_rg(files, prefix, tag, rg, regex_rg, replacement_re,
also_binaries=False, debug_this=False, match_records=OrderedDict()):
# If we run out of space for args (could happen) we'll need to either:
# 1. Batching the calls.
# 2. Call for all (text?) files by passing just 'prefix' then filter out ones we don't care about (slow).
# 3. Use a shell prefixed with `cd prefix && ` (could still hit size limits, just later).
# I have gone for batching!
args_base = [rg.encode('utf-8'),
b'--unrestricted',
b'--no-heading',
b'--with-filename'] + \
([b'--text'] if also_binaries else []) + \
[b'--json',
regex_rg]
pu = prefix.encode('utf-8')
prefix_files = [os.path.join(pu, f.replace('/', os.sep).encode('utf-8')) for f in files]
args_len = len(b' '.join(args_base))
file_lists = list(chunks(prefix_files, (32760 if utils.on_win else 131071) - args_len))
for file_list in file_lists:
args = args_base[:] + file_list
# This will not work now our args are binary strings:
# from conda.utils import quote_for_shell
# print(quote_for_shell(args))
try:
if utils.on_win:
args = [a.decode('utf-8') for a in args]
matches = subprocess.check_output(args, shell=False).rstrip(b'\n').split(b'\n')
matches = b'[' + b','.join(matches) + b']\n'
matches = json.loads(matches)
except subprocess.CalledProcessError as _: # noqa
# Just means rg returned 1 as no matches were found.
continue
except Exception as e:
raise e
if matches:
stage = 'pre-begin'
for match in matches:
new_stage = match['type']
if new_stage == 'begin':
stage = new_stage
match_filename_begin = match['data']['path']['text'][len(prefix) + 1:]
match_filename_type = 'unknown'
# TODO :: Speed this up, and generalise it, the python version does similar.
with open(os.path.join(prefix, match_filename_begin), 'rb') as fh:
data = mmap_or_read(fh)
match_filename_type = 'binary' if data.find(b'\x00') != -1 else 'text'
assert match_filename_type != 'unknown'
elif new_stage == 'match':
old_stage = stage
assert stage == 'begin' or stage == 'match' or stage == 'end'
stage = new_stage
match_filename = match['data']['path']['text'][len(prefix) + 1:]
# Get stuff from the 'line' (to be consistent with the python version we ignore this).
# match_line = get_bytes_or_text_as_bytes(match['data']['lines'])
# match_line_number = match['data']['line_number']
# match_absolute_offset = match['data']['absolute_offset']
if old_stage == 'begin':
assert match_filename_begin == match_filename, '{} != \n {}'\
.format(match_filename_begin, match_filename)
if match_filename not in match_records:
if debug_this:
# We could add: #'line': match_line, 'line_number': match_line_number but it would
# break our ability to compare against the python code.
match_records[match_filename] = {'type': match_filename_type,
'submatches': []}
else:
match_records[match_filename] = {'type': match_filename_type,
'submatches': []}
for submatch in match['data']['submatches']:
submatch_match_text = get_bytes_or_text_as_bytes(submatch['match'])
submatch_start = submatch['start'] + match['data']['absolute_offset']
submatch_end = submatch['end'] + match['data']['absolute_offset']
# print("{}({}) :: {}..{} = {}".format(
# match_filename, match_line_number,
# submatch_start, submatch_end, submatch_match_text))
submatch_record = {'tag': tag,
'text': submatch_match_text,
'start': submatch_start,
'end': submatch_end,
'regex_re': regex_rg,
'replacement_re': replacement_re}
if submatch_record not in match_records[match_filename]['submatches']:
match_records[match_filename]['submatches'].append(submatch_record)
elif new_stage == 'end':
assert stage == 'match'
stage = new_stage
elif new_stage == 'elpased_total':
assert stage == 'end'
stage = new_stage
print('ELAPSED TOTAL')
return sort_matches(match_records)
def mmap_or_read(fh):
try:
mm = utils.mmap_mmap(fh.fileno(), 0, tagname=None, flags=utils.mmap_MAP_PRIVATE)
except OSError:
mm = fh.read()
return mm
def regex_files_py(files, prefix, tag, regex_re, replacement_re,
also_binaries=False, match_records=OrderedDict()):
import re
re_re = re.compile(regex_re)
for file in files:
with open(join(prefix, file), 'rb+') as f:
if os.fstat(f.fileno()).st_size == 0:
continue
data = mmap_or_read(f)
type = 'binary' if data.find(b'\x00') != -1 else 'text'
if not also_binaries and type == 'binary':
continue
# data2 = f.read()
for match in re.finditer(re_re, data):
if match:
# absolute_offset = match.pos
if file not in match_records:
# Could add 'absolute_offset': absolute_offset,
match_records[file] = {'type': type,
'submatches': []}
# else:
# if match_records[file]['absolute_offset'] != absolute_offset:
# print("Dropping match.pos() of {}, neq {}".format(absolute_offset, match_records[file]['absolute_offset']))
g_index = len(match.groups())
if g_index == 0:
# Complete match.
submatch_match_text = match.group()
submatch_start = match.start()
submatch_end = match.end()
else:
submatch_match_text = match.groups(g_index)[0]
submatch_start = match.start(g_index)
submatch_end = match.end(g_index)
# print("found {} ({}..{})".format(submatch_match_text, submatch_start, submatch_end))
match_records[file]['submatches'].append({'tag': tag,
'text': submatch_match_text,
'start': submatch_start,
'end': submatch_end,
'regex_re': regex_re,
'replacement_re': replacement_re})
# assert data2[match.start(g_index):match.end(g_index)] == match_text
# print(data2[match.start(g_index):match.end(g_index)])
return sort_matches(match_records)
def regex_matches_tighten_re(match_records, regex_re, tag=None):
# Do we need to shrink the matches?
if match_records:
import re
re_re = re.compile(regex_re)
for filename, match in match_records.items():
for submatch in match['submatches']:
if tag and submatch['tag'] != tag:
continue
match_re = re.match(re_re, submatch['text'])
if match_re:
groups = match_re.groups()
if groups:
match_tigher = match_re.group(len(groups))
else:
match_tigher = str(match_re)
if match_tigher != submatch['text']:
# Assert we can find submatches correctly at their start and end in the line.
if 'line' in match:
assert (match['line'][submatch['start'] -
match['absolute_offset']:submatch['end'] -
match['absolute_offset']] == submatch['text'])
index = submatch['text'].find(match_tigher)
assert index != -1
submatch['start'] += index
submatch['end'] = submatch['start'] + len(match_tigher)
# print("from {} to {} (index={})".format(submatch['text'], match_tigher, index))
submatch['text'] = match_tigher
# Assert we can still find submatches correctly at their start and end in the line.
if 'line' in match:
assert (match['line'][submatch['start'] -
match['absolute_offset']:submatch['end'] -
match['absolute_offset']] == submatch['text'])
# Even if the match was not tighter we overwrite the regex.
submatch['regex_re'] = regex_re
else:
print("ERROR :: Tighter regex_re does not match")
return sort_matches(match_records)
# Sorts matches by filename and also submatches by start position.
def sort_matches(match_records):
match_records_o = OrderedDict(sorted(match_records.items()))
for file, match in match_records_o.items():
match['submatches'] = sorted(match['submatches'], key=lambda x: x['start'])
return match_records_o
def check_matches(prefix, match_records):
print("::CHECKING MATCHES::")
for file, match in match_records.items():
data = None
with open(join(prefix, file), 'rb+') as f:
data = f.read()
if data:
for submatch in match['submatches']:
file_content = data[submatch['start']:submatch['end']]
if file_content != submatch['text']:
print("ERROR :: file_content {} != submatch {}".format(file_content, submatch['text']))
print("{} :: ({}..{}) = {}".format(file, submatch['start'], submatch['end'], submatch['text']))
def have_regex_files(files, prefix, tag, regex_re, replacement_re,
also_binaries=False, match_records={}, regex_rg=None, debug=False):
'''
:param files: Filenames to check for instances of regex_re
:param prefix: Prefix in which to search for these files
:param regex_re: The regex to use
:param replacement_re: The replacement regex to use
:param also_binaries: Search and replace in binaries too
:param regex_rg: rg does not support all pcre2 nor python re features. You can use this to provide a
more compatible but also more broad, fast regex (it must capture everything regex_re
would capture, but can capture more) as a pre-filter. Then python re will be used to
reduce the matches. There are also some minor syntax differences between rg and re.
The last group is taken as the matching portion, though I am not sure about that
decision.
:param match_records: A dictionary of previous results should you wish to augment it
:return: input match_records augmented with matches
'''
if not len(files):
return match_records
import copy
match_records_rg, match_records_re = copy.deepcopy(match_records), copy.deepcopy(match_records)
if not isinstance(regex_re, (bytes, bytearray)):
regex_re = regex_re.encode('utf-8')
if regex_rg and not isinstance(regex_rg, (bytes, bytearray)):
regex_rg = regex_rg.encode('utf-8')
rg = external.find_executable('rg')
if rg:
match_records_rg = regex_files_rg(files, prefix, tag,
rg,
regex_rg if regex_rg else regex_re,
replacement_re,
also_binaries=also_binaries,
debug_this=debug,
match_records=match_records_rg)
if regex_rg and regex_re:
match_records_rg = regex_matches_tighten_re(match_records_rg, regex_re, tag)
if not rg or debug:
match_records_re = regex_files_py(files, prefix, tag,
regex_re if regex_re else regex_rg,
replacement_re,
also_binaries=also_binaries,
match_records=match_records_re)
if debug:
check_matches(prefix, match_records_rg)
check_matches(prefix, match_records_re)
if match_records_rg != match_records_re:
for (k, v), (k2, v2) in zip(match_records_rg.items(), match_records_re.items()):
if k != k2:
print("File Mismatch:\n{}\n{}".format(k, k2))
elif v != v2:
print("Match Mismatch ({}):\n{}\n{}".format(v, v2, k))
for submatch, submatch2 in zip(v['submatches'], v2['submatches']):
if submatch != submatch2:
print("Submatch Mismatch ({}):\n{}\n{}".format(submatch, submatch2, k))
return match_records_rg if rg else match_records_re
def rewrite_file_with_new_prefix(path, data, old_prefix, new_prefix):
# Old and new prefix should be bytes
st = os.stat(path)
data = data.replace(old_prefix, new_prefix)
# Save as
with open(path, 'wb') as fo:
fo.write(data)
os.chmod(path, stat.S_IMODE(st.st_mode) | stat.S_IWUSR) # chmod u+w
return data
def perform_replacements(matches, prefix, verbose=False, diff=None):
for file, match in matches.items():
filename = os.path.join(prefix, file)
filename_tmp = filename + '.cbpatch.tmp'
if os.path.exists(filename_tmp):
os.unlink()
shutil.copy2(filename, filename_tmp)
filename_short = filename.replace(prefix + os.sep, '')
print("Patching '{}' in {} {}".format(filename_short,
len(match['submatches']),
'places' if len(match['submatches']) > 1 else 'place'))
with open(filename_tmp, 'wb+') as file_tmp:
file_tmp.truncate()
with open(filename, 'rb') as file:
last_index = 0
for submatch in match['submatches']:
length = submatch['start'] - last_index
data = file.read(length)
assert len(data) == length
file_tmp.write(data)
original = submatch['text']
# Ideally you wouldn't pass to this function any submatches with replacement_re of None,
# Still, it's easily handled.
if submatch['replacement_re']:
replacement_re = submatch['replacement_re']
if not isinstance(replacement_re, (bytes, bytearray)):
replacement_re = replacement_re.encode('utf-8')
new_string = re.sub(submatch['regex_re'], replacement_re, original)
else:
new_string = original
if match['type'] == 'binary':
if len(original) < len(new_string):
print("ERROR :: Cannot replace {} with {} in binary file {}".format(original,
new_string,
filename))
new_string = new_string.ljust(len(original), b'\0')
assert len(new_string) == len(original)
file_tmp.write(new_string)
# discarded (but also verified)
actual_original = file.read(len(original))
if match['type'] == 'binary':
assert actual_original == original
last_index += length + len(original)
if submatch == match['submatches'][len(match['submatches']) - 1]:
# Write the remainder.
data = file.read()
file_tmp.write(data)
# Could assert the lengths of binaries are the same here for extra safety.
if os.path.exists(filename_tmp):
if diff and match['type'] == 'text':
diffo = "Diff returned no difference after patching {}".format(filename_short)
# Always expect an exception.
try:
diffo = subprocess.check_output([diff, '-urN', filename, filename_tmp], stderr=subprocess.PIPE)
print('WARNING :: Non-deferred patching of "{}" did not change it'.format(filename))
except subprocess.CalledProcessError as e:
diffo = e.output
print(diffo.decode('utf-8'))
if os.path.exists(filename):
os.unlink(filename)
shutil.move(filename_tmp, filename)
def _copy_top_level_recipe(path, config, dest_dir, destination_subdir=None):
files = utils.rec_glob(path, "*")
file_paths = sorted([f.replace(path + os.sep, '') for f in files])
# when this actually has a value, we're copying the top-level recipe into a subdirectory,
# so that we have record of what parent recipe produced subpackages.
if destination_subdir:
dest_dir = join(dest_dir, destination_subdir)
else:
# exclude meta.yaml because the json dictionary captures its content
file_paths = [f for f in file_paths if not (f == 'meta.yaml' or
f == 'conda_build_config.yaml')]
file_paths = utils.filter_files(file_paths, path)
for f in file_paths:
utils.copy_into(join(path, f), join(dest_dir, f),
timeout=config.timeout,
locking=config.locking, clobber=True)
def _copy_output_recipe(m, dest_dir):
_copy_top_level_recipe(m.path, m.config, dest_dir, 'parent')
this_output = m.get_rendered_output(m.name()) or {}
install_script = this_output.get('script')
build_inputs = []
inputs = [install_script] + build_inputs
file_paths = [script for script in inputs if script]
file_paths = utils.filter_files(file_paths, m.path)
for f in file_paths:
utils.copy_into(join(m.path, f), join(dest_dir, f),
timeout=m.config.timeout,
locking=m.config.locking, clobber=True)
def copy_recipe(m):
if m.config.include_recipe and m.include_recipe():
# store the rendered meta.yaml file, plus information about where it came from
# and what version of conda-build created it
recipe_dir = join(m.config.info_dir, 'recipe')
try:
os.makedirs(recipe_dir)
except:
pass
original_recipe = ""
if m.is_output:
_copy_output_recipe(m, recipe_dir)
else:
_copy_top_level_recipe(m.path, m.config, recipe_dir)
original_recipe = m.meta_path
output_metadata = m.copy()
# hard code the build string, so that tests don't get it mixed up
build = output_metadata.meta.get('build', {})
build['string'] = output_metadata.build_id()
output_metadata.meta['build'] = build
# just for lack of confusion, don't show outputs in final rendered recipes
if 'outputs' in output_metadata.meta:
del output_metadata.meta['outputs']
if 'parent_recipe' in output_metadata.meta.get('extra', {}):
del output_metadata.meta['extra']['parent_recipe']
utils.sort_list_in_nested_structure(output_metadata.meta,
('build/script', 'test/commands'))
rendered = output_yaml(output_metadata)
if original_recipe:
with open(original_recipe, 'rb') as f:
original_recipe_text = UnicodeDammit(f.read()).unicode_markup
if not original_recipe or not original_recipe_text == rendered:
with open(join(recipe_dir, "meta.yaml"), 'w') as f:
f.write("# This file created by conda-build {}\n".format(conda_build_version))
if original_recipe:
f.write("# meta.yaml template originally from:\n")
f.write("# " + source.get_repository_info(m.path) + "\n")
f.write("# ------------------------------------------------\n\n")
f.write(rendered)
if original_recipe:
utils.copy_into(original_recipe, os.path.join(recipe_dir, 'meta.yaml.template'),
timeout=m.config.timeout, locking=m.config.locking, clobber=True)
# dump the full variant in use for this package to the recipe folder
with open(os.path.join(recipe_dir, 'conda_build_config.yaml'), 'w') as f:
yaml.dump(m.config.variant, f)
def copy_readme(m):
readme = m.get_value('about/readme')
if readme:
src = join(m.config.work_dir, readme)
if not isfile(src):
sys.exit("Error: no readme file: %s" % readme)
dst = join(m.config.info_dir, readme)
utils.copy_into(src, dst, m.config.timeout, locking=m.config.locking)
if os.path.split(readme)[1] not in {"README.md", "README.rst", "README"}:
print("WARNING: anaconda.org only recognizes about/readme "
"as README.md and README.rst", file=sys.stderr)
def copy_license(m):
license_files = utils.ensure_list(m.get_value('about/license_file', []))
if not license_files:
return
count = 0
for license_file in license_files:
# To not break existing recipes, ignore an empty string.
if license_file == "":
continue
src_file = join(m.config.work_dir, license_file)
if not os.path.isfile(src_file):
src_file = os.path.join(m.path, license_file)
if os.path.isfile(src_file):
# Rename absolute file paths or relative file paths starting with .. or .
if os.path.isabs(license_file) or license_file.startswith("."):
filename = "LICENSE{}.txt".format(count)
count += 1
else:
filename = license_file
utils.copy_into(src_file,
join(m.config.info_dir, 'licenses', filename), m.config.timeout,
locking=m.config.locking)
else:
raise ValueError("License file given in about/license_file ({}) does not exist in "
"source root dir or in recipe root dir (with meta.yaml)".format(src_file))
print("Packaged license file/s.")
def copy_recipe_log(m):
# the purpose of this file is to capture some change history metadata that may tell people
# why a given build was changed the way that it was
log_file = m.get_value('about/recipe_log_file') or "recipe_log.json"
# look in recipe folder first
src_file = os.path.join(m.path, log_file)
if not os.path.isfile(src_file):
src_file = join(m.config.work_dir, log_file)
if os.path.isfile(src_file):
utils.copy_into(src_file,
join(m.config.info_dir, 'recipe_log.json'), m.config.timeout,
locking=m.config.locking)
def copy_test_source_files(m, destination):
src_dir = ''
if os.listdir(m.config.work_dir):
src_dir = m.config.work_dir
elif hasattr(m.config, 'recipe_dir') and m.config.recipe_dir:
src_dir = os.path.join(m.config.recipe_dir, 'info', 'test')
src_dirs = [src_dir]
if os.path.isdir(os.path.join(src_dir, 'parent')):
src_dirs.append(os.path.join(src_dir, 'parent'))
for src_dir in src_dirs:
if src_dir and os.path.isdir(src_dir) and src_dir != destination:
for pattern in utils.ensure_list(m.get_value('test/source_files', [])):
if utils.on_win and '\\' in pattern:
raise RuntimeError("test/source_files paths must use / "
"as the path delimiter on Windows")
files = glob(join(src_dir, pattern))
if not files:
msg = "Did not find any source_files for test with pattern {0}"
raise RuntimeError(msg.format(pattern))
for f in files:
try:
# disable locking to avoid locking a temporary directory (the extracted
# test folder)
utils.copy_into(f, f.replace(src_dir, destination), m.config.timeout,
locking=False, clobber=True)
except OSError as e:
log = utils.get_logger(__name__)
log.warn("Failed to copy {0} into test files. Error was: {1}".format(f,
str(e)))
for ext in '.pyc', '.pyo':
for f in utils.get_ext_files(destination, ext):
os.remove(f)
recipe_test_files = m.get_value('test/files')
if recipe_test_files:
orig_recipe_dir = m.path
for pattern in recipe_test_files:
files = glob(join(orig_recipe_dir, pattern))
for f in files:
basedir = orig_recipe_dir
if not os.path.isfile(f):
basedir = os.path.join(orig_recipe_dir, 'parent')
dest = f.replace(basedir, destination)
if f != dest:
utils.copy_into(f, f.replace(basedir, destination),
timeout=m.config.timeout, locking=m.config.locking,
clobber=True)
def write_hash_input(m):
recipe_input = m.get_hash_contents()
with open(os.path.join(m.config.info_dir, 'hash_input.json'), 'w') as f:
json.dump(recipe_input, f, indent=2)
def get_files_with_prefix(m, files_in, prefix):
import time
start = time.time()
# It is nonsensical to replace anything in a symlink.
files = sorted([f for f in files_in if not os.path.islink(os.path.join(prefix, f))])
ignore_files = m.ignore_prefix_files()
ignore_types = set()
if not hasattr(ignore_files, "__iter__"):
if ignore_files is True:
ignore_types.update((FileMode.text.name, FileMode.binary.name))
ignore_files = []
if (not m.get_value('build/detect_binary_files_with_prefix', True) and
not m.get_value('build/binary_has_prefix_files', None)):
ignore_types.update((FileMode.binary.name,))
files_with_prefix = [(None, FileMode.binary.name if
open(os.path.join(prefix, f), 'rb+').read().find(b'\x00') != -1 else
FileMode.text.name, f) for f in files]
ignore_files.extend(
f[2] for f in files_with_prefix if (f[1] in ignore_types and
f[2] not in ignore_files) or prefix_replacement_excluded(os.path.join(prefix, f[2])))
files_with_prefix = [f for f in files_with_prefix if f[2] not in ignore_files]
prefix_u = prefix.replace('\\', '/') if utils.on_win else prefix
# If we've cross compiled on Windows to unix, chances are many files will refer to Windows
# paths.
if utils.on_win or m.config.subdir.startswith('win'):
# TODO :: Should we also handle MSYS2 paths (/c/blah) here? Probably!
pfx_variants = (prefix,
prefix_u,
prefix_placeholder.replace('\\', '\''),
prefix_placeholder.replace('/', '\\'))
else:
pfx_variants = (prefix, prefix_placeholder)
re_test = b'(' + b'|'.join(v.encode('utf-8').replace(b'\\', b'\\\\') for v in pfx_variants) + b')'
pfx_matches = have_regex_files([f[2] for f in files_with_prefix], prefix=prefix,
tag='prefix',
regex_re=re_test,
# We definitely do not want this as a replacement_re as it'd replace
# /opt/anaconda1anaconda2anaconda3 with the prefix. As it happens we
# do not do any replacement at all here.
# replacement_re=prefix.encode('utf-8').replace(b'\\', b'\\\\'),
replacement_re=None,
also_binaries=True,
match_records={},
debug=m.config.debug)
prefixes_for_file = {}
# This is for Windows mainly, though we may want to allow multiple searches at once in a file on
# all OSes some-day. It is harmless to do this on all systems anyway.
for filename, match in pfx_matches.items():
prefixes_for_file[filename] = set([sm['text'] for sm in match['submatches']])
files_with_prefix_new = []
for (_, mode, filename) in files_with_prefix:
np = filename.replace('/', '\\') if utils.on_win else filename
if np in prefixes_for_file and np in pfx_matches:
for pfx in prefixes_for_file[np]:
files_with_prefix_new.append((pfx.decode('utf-8'), mode, filename))
files_with_prefix = files_with_prefix_new
all_matches = {}
variant = m.config.variant
replacement_tags = ''
if 'replacements' in variant:
replacements = variant['replacements']
last = len(replacements['all_replacements']) - 1
for index, replacement in enumerate(replacements['all_replacements']):
all_matches = have_regex_files(files=[f for f in files if any(
glob2.fnmatch.fnmatch(f, r) for r in replacement['glob_patterns'])],
prefix=prefix,
tag=replacement['tag'],
regex_re=replacement['regex_re'],
replacement_re=replacement['replacement_re'],
match_records=all_matches,
regex_rg=replacement['regex_rg'] if 'regex_rg' in replacement else None,
debug=m.config.debug)
replacement_tags = replacement_tags + '"' + replacement['tag'] + ('"' if
index == last else '", ')
perform_replacements(all_matches, prefix)
end = time.time()
total_replacements = sum(map(lambda i: len(all_matches[i]['submatches']), all_matches))
print("INFO :: Time taken to mark (prefix){}\n"
" {} replacements in {} files was {:.2f} seconds".format(
" and mark+peform ({})".format(replacement_tags) if replacement_tags else '',
total_replacements, len(all_matches), end - start))
'''
# Keeping this around just for a while.
files_with_prefix2 = sorted(have_prefix_files(files_in, prefix))
end = time.time()
print("INFO :: Time taken to do replacements (prefix only) was: {}".format(end - start))
ignore_files = m.ignore_prefix_files()
ignore_types = set()
if not hasattr(ignore_files, "__iter__"):
if ignore_files is True:
ignore_types.update((FileMode.text.name, FileMode.binary.name))
ignore_files = []
if (not m.get_value('build/detect_binary_files_with_prefix', True) and
not m.get_value('build/binary_has_prefix_files', None)):
ignore_types.update((FileMode.binary.name,))
# files_with_prefix is a list of tuples containing (prefix_placeholder, file_type, file_path)
ignore_files.extend(
f[2] for f in files_with_prefix2 if f[1] in ignore_types and f[2] not in ignore_files)
files_with_prefix2 = [f for f in files_with_prefix2 if f[2] not in ignore_files]
end2 = time.time()
print("INFO :: Time taken to do replacements (prefix only) was: {}".format(end2 - start2))
files1 = set([f for _, _, f in files_with_prefix])
files2 = set([f for _, _, f in files_with_prefix2])
assert not (files2 - files1), "New ripgrep prefix search missed the following files:\n{}\n".format(files2 - files1)
'''
return sorted(files_with_prefix)
def record_prefix_files(m, files_with_prefix):
if not files_with_prefix:
return
# Copies are made to ease debugging. Sorry.
binary_has_prefix_files = m.binary_has_prefix_files()[:]
text_has_prefix_files = m.has_prefix_files()[:]
# We need to cache these as otherwise the fact we remove from this in a for loop later
# that also checks it has elements.
len_binary_has_prefix_files = len(binary_has_prefix_files)
if files_with_prefix and not m.noarch:
if utils.on_win:
# Paths on Windows can contain spaces, so we need to quote the
# paths. Fortunately they can't contain quotes, so we don't have
# to worry about nested quotes.
fmt_str = '"%s" %s "%s"\n'
else:
# Don't do it everywhere because paths on Unix can contain quotes,
# and we don't have a good method of escaping, and because older
# versions of conda don't support quotes in has_prefix
fmt_str = '%s %s %s\n'
print("Files containing CONDA_PREFIX")
print("-----------------------------")
with open(join(m.config.info_dir, 'has_prefix'), 'w') as fo:
for pfix, mode, fn in files_with_prefix:
print('{} :: {} :: {}'.format(pfix, mode, fn))
ignored_because = None
if (fn in binary_has_prefix_files or (not len_binary_has_prefix_files or
m.get_value('build/detect_binary_files_with_prefix', False) and mode == 'binary')):
if fn in binary_has_prefix_files:
if mode != 'binary':
mode = 'binary'
elif fn in binary_has_prefix_files:
print("File {} force-identified as 'binary', "
"But it is 'binary' anyway, suggest removing it from "
"`build/binary_has_prefix_files`".format(fn))
if fn in binary_has_prefix_files:
binary_has_prefix_files.remove(fn)
elif fn in text_has_prefix_files or mode == 'text':
if mode != 'text':
mode = 'text'
elif fn in text_has_prefix_files:
print("File {} force-identified as 'text', "
"But it is 'text' anyway, suggest removing it from "
"`build/has_prefix_files`".format(fn))
if fn in text_has_prefix_files:
text_has_prefix_files.remove(fn)
else:
ignored_because = " :: Not in build/%s_has_prefix_files" % (mode)