-
Notifications
You must be signed in to change notification settings - Fork 84
/
Copy pathmatterport-dl.py
1775 lines (1488 loc) · 92.9 KB
/
matterport-dl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# ruff: noqa: E722
"""
Downloads virtual tours from matterport.
Usage is either running this program with the URL/pageid as an argument or calling the initiateDownload(URL/pageid) method.
"""
from __future__ import annotations
import urllib.parse
from curl_cffi import requests
from enum import Enum
import asyncio
import aiofiles
import json
import threading
import urllib.request
from urllib.parse import urlparse
import pathlib
import re
import os
import hashlib
import platform
import shutil
import sys
from typing import Any, ClassVar, cast
from dataclasses import dataclass
import logging
from tqdm import tqdm
from http.server import HTTPServer, SimpleHTTPRequestHandler
import decimal
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
BASE_MATTERPORTDL_DIR = pathlib.Path(__file__).resolve().parent
MAX_CONCURRENT_REQUESTS = 20 # cffi will make sure no more than this many curl workers are used at once
MAX_CONCURRENT_TASKS = 64 # while we could theoretically leave this unbound just relying on MAX_CONCURRENT_REQESTS there is little reason to spawn a million tasks at once
BASE_MATTERPORT_DOMAIN = "matterport.com"
CHINA_MATTERPORT_DOMAIN = "matterportvr.cn"
MAIN_SHOWCASE_FILENAME = "" # the filename for the main showcase runtime
# Matterport uses various access keys for a page, when the primary key doesnt work we try some other ones, note a single model can have 1400+ unique access keys not sure which matter vs not
dirsMadeCache: dict[str, bool] = {}
THIS_MODEL_ROOT_DIR: str
SERVED_BASE_URL: str # url we are serving from ie http://127.0.0.1:8080
MODEL_IS_DEFURNISHED = False # defurnished models can be accessed directly but have some quarks eventually will add to initial dl
BASE_MODEL_ID = "" # normally this is the model id we are downloading unless defurnished
SWEEP_DO_4K = True # assume 4k by default
AccessKeyType = Enum("AccessKeyType", ["LeaveKeyAlone", "PrimaryKey", "MAIN_PAGE_GENERIC_KEY", "MAIN_PAGE_DAM_50K", "FILES2_BASE_URL_KEY", "FILES3_TEMPLATE_KEY", "SWEEP_KEY", "GRAPH_MODEL_VIEW_PREFETCH"]) # sweep key primarily used for defurnished, GRAPH_MODEL_VIEW_PREFETCH is only used for attachments
# if no git revision fall back to our sha
def self_sha():
with open(pathlib.Path(__file__).resolve(), "rb") as f:
return hashlib.file_digest(f, "sha1").hexdigest()
# modified from https://gist.github.com/pkienzle/5e13ec07077d32985fa48ebe43486832
def git_rev():
"""
Get the git revision for the repo in the path *repo*.
Returns the commit id of the current head.
Note: this function parses the files in the git repository directory
without using the git application. It may break if the structure of
the git repository changes. It only reads files, so it should not do
any damage to the repository in the process.
"""
# Based on stackoverflow am9417
# https://stackoverflow.com/questions/14989858/get-the-current-git-hash-in-a-python-script/59950703#59950703
git_root = BASE_MATTERPORTDL_DIR / ".git"
git_head = git_root / "HEAD"
if not git_head.exists():
return None
# Read .git/HEAD file
with git_head.open("r") as fd:
head_ref = fd.read()
# Find head file .git/HEAD (e.g. ref: ref/heads/master => .git/ref/heads/master)
if not head_ref.startswith("ref: "):
return head_ref
head_ref = head_ref[5:].strip()
# Read commit id from head file
head_path = git_root.joinpath(*head_ref.split("/"))
if not head_path.exists():
return None
with head_path.open("r") as fd:
commit = fd.read().strip()
return f"{head_ref} ({commit})"
def sys_info():
str = "Running python "
try:
str += platform.python_version()
str += " on " + sys.platform
ourVersion = None
try:
ourVersion = git_rev()
except Exception:
pass
if ourVersion is None:
ourVersion = "S " + self_sha()
str += " with matterport-dl version: " + ourVersion
except Exception:
pass
return str
def makeDirs(dirname):
global dirsMadeCache
if dirname in dirsMadeCache:
return
pathlib.Path(dirname).mkdir(parents=True, exist_ok=True)
dirsMadeCache[dirname] = True
def consoleDebugLog(msg: str, loglevel=logging.INFO, forceDebugOn=False):
logging.log(loglevel, msg)
if not CLA.getCommandLineArg(CommandLineArg.CONSOLE_LOG) and (forceDebugOn or CLA.getCommandLineArg(CommandLineArg.DEBUG)):
print(msg)
def consoleLog(msg: str, loglevel=logging.INFO):
consoleDebugLog(msg, loglevel, True)
def getModifiedName(filename: str):
filename, _, query = filename.partition("?")
basename = filename
ext = ""
pos = filename.rfind(".")
if pos != -1:
ext = basename[pos + 1 :]
basename = basename[0:pos]
if query:
ext += "?" + query
return f"{basename}.modified.{ext}"
def getVariants():
variants = []
global SWEEP_DO_4K
# to be smart we should be using the GetShowcaseSweeps file and data.model.locations[4].pano.resolutions to determine if we should be trying 2k or 4k
depths = ["512", "1k", "2k"]
if SWEEP_DO_4K:
depths.append("4K")
for depth in range(len(depths)):
z = depths[depth]
for x in range(2**depth):
for y in range(2**depth):
for face in range(6):
variants.append(f"{z}_face{face}_{x}_{y}.jpg")
return variants
async def downloadDAM(accessurl, uuid):
# This should have already been downloaded during the ADV download
damSrcFile = f"..{os.path.sep}{uuid}_50k.dam"
await downloadFile("UUID_DAM50K", True, accessurl.format(filename=f"{uuid}_50k.dam"), f"..{os.path.sep}{uuid}_50k.dam", key_type=AccessKeyType.FILES3_TEMPLATE_KEY)
shutil.copy(damSrcFile, f"{uuid}_50k.dam") # so the url here has the ~ in it but the primary dir is the parent sitl lwe will store it both places
cur_file = ""
try:
for i in range(1000): # basically download until on first failure and assume that is all of them, maybe we should be going to on first 404 or osmething:)
cur_file = accessurl.format(filename=f"{uuid}_50k_texture_jpg_high/{uuid}_50k_{i:03d}.jpg")
await downloadFile("UUID_TEXTURE_HIGH", True, cur_file, f"{uuid}_50k_texture_jpg_high/{uuid}_50k_{i:03d}.jpg")
cur_file = accessurl.format(filename=f"{uuid}_50k_texture_jpg_low/{uuid}_50k_{i:03d}.jpg")
await downloadFile("UUID_TEXTURE_LOW", True, cur_file, f"{uuid}_50k_texture_jpg_low/{uuid}_50k_{i:03d}.jpg")
except Exception as ex:
logging.warning(f"Exception downloading file: {cur_file} of: {str(ex)}")
pass # very lazy and bad way to only download required files
async def downloadSweeps(accessurl: str, sweeps: list[str]):
global MODEL_IS_DEFURNISHED
# the sweep query at least has data.model.defurnishViews[0].model.id for others
forceKey = AccessKeyType.PrimaryKey
if MODEL_IS_DEFURNISHED:
forceKey = AccessKeyType.SWEEP_KEY
toDownload: list[AsyncDownloadItem] = []
for sweep in sweeps:
sweep = sweep.replace("-", "")
for variant in getVariants(): # so if we checked for 404s we could do this more effeciently but serializing it to do that would be slower than just a bunch of 404s
toDownload.append(AsyncDownloadItem("MODEL_SWEEPS", True, accessurl.format(filename=f"tiles/{sweep}/{variant}") + "&imageopt=1", f"tiles/{sweep}/{variant}", key_type=forceKey))
await AsyncArrayDownload(toDownload)
# these 3 downwload with json posts were old functions for old graphql queries we dont need/use ducrrently
async def downloadFileWithJSONPostAndGetText(type, shouldExist, url, file, post_json_str, descriptor, always_download=False):
if not CLA.getCommandLineArg(CommandLineArg.TILDE):
file = file.replace("~", "_")
await downloadFileWithJSONPost(type, shouldExist, url, file, post_json_str, descriptor, always_download)
if not os.path.exists(file):
return ""
else:
async with aiofiles.open(file, "r", encoding="UTF-8") as f:
return await f.read()
# does not use access keys currently not needed
async def downloadFileWithJSONPost(type, shouldExist, url, file, post_json_str, descriptor, always_download=False):
global OUR_SESSION
if not CLA.getCommandLineArg(CommandLineArg.TILDE):
file = file.replace("~", "_")
if "/" in file:
makeDirs(os.path.dirname(file))
if not CLA.getCommandLineArg(CommandLineArg.DOWNLOAD) or (os.path.exists(file) and not always_download): # skip already downloaded files except forced downloads
logUrlDownloadSkipped(type, file, url, descriptor)
return
reqId = logUrlDownloadStart(type, file, url, descriptor, shouldExist, key_type=AccessKeyType.PrimaryKey)
try:
resp: requests.Response = await OUR_SESSION.request(url=url, method="POST", headers={"Content-Type": "application/json"}, data=bytes(post_json_str, "utf-8"))
resp.raise_for_status()
# req.add_header('Content-Length', len(body_bytes))
async with aiofiles.open(file, "wb") as the_file:
await the_file.write(resp.content)
logUrlDownloadFinish(type, file, url, descriptor, shouldExist, reqId)
except Exception as ex:
logUrlDownloadFinish(type, file, url, descriptor, shouldExist, reqId, ex)
raise Exception(f"Request error for url: {url} ({type}) that would output to: {file}") from ex
async def GetTextOnlyRequest(type, shouldExist, url, post_data=None) -> str:
global PROGRESS
useTmpFileName = ""
async with aiofiles.tempfile.NamedTemporaryFile(delete_on_close=False) as tmpFile: # type: ignore
useTmpFileName = cast(str, tmpFile.name)
result = await downloadFileAndGetText(type, shouldExist, url, useTmpFileName, post_data)
PROGRESS.Increment(ProgressType.Request, -1)
PROGRESS.Increment(ProgressType.Success, -1)
try:
os.remove(useTmpFileName)
except:
pass
return result
async def downloadFileAndGetText(type, shouldExist, url, file, post_data=None, isBinary=False, always_download=False, key_type: AccessKeyType = AccessKeyType.PrimaryKey):
if not CLA.getCommandLineArg(CommandLineArg.TILDE):
file = file.replace("~", "_")
await downloadFile(type, shouldExist, url, file, post_data, always_download, key_type)
if not os.path.exists(file):
return ""
else:
readMode = "r"
encoding = "UTF-8"
if isBinary:
readMode = "rb"
encoding = None
async with aiofiles.open(file, readMode, encoding=encoding) as f: # type: ignore - r and rb are handled but by diff overload groups
return await f.read()
# Add type parameter, shortResourcePath, shouldExist
async def downloadFile(type, shouldExist, url, file, post_data=None, always_download=False, key_type: AccessKeyType = AccessKeyType.PrimaryKey):
global MAX_TASKS_SEMAPHORE, OUR_SESSION
async with MAX_TASKS_SEMAPHORE:
if key_type != AccessKeyType.LeaveKeyAlone:
if key_type is None or key_type == AccessKeyType.PrimaryKey:
key = KeyHandler.PrimaryKey
else:
key = KeyHandler.GetAccessKey(key_type)
url = KeyHandler.SetAccessKeyForUrl(url, key)
if not CLA.getCommandLineArg(CommandLineArg.TILDE):
file = file.replace("~", "_")
if "/" in file:
makeDirs(os.path.dirname(file))
if "?" in file:
file = file.split("?")[0]
if not CLA.getCommandLineArg(CommandLineArg.DOWNLOAD) or (os.path.exists(file) and not always_download): # skip already downloaded files except always download ones which are genreally ones that may contain keys?
logUrlDownloadSkipped(type, file, url, "")
return
reqId = logUrlDownloadStart(type, file, url, "", shouldExist, key_type=key_type)
try:
response = await OUR_SESSION.get(url)
response.raise_for_status() # Raise an exception if the response has an error status code
async with aiofiles.open(file, "wb") as f:
await f.write(response.content)
logUrlDownloadFinish(type, file, url, "", shouldExist, reqId)
return
except Exception as err:
# Try again but with different accesskeys, if error is 404 though no need to retry
if "?t=" in url and "Error 404" not in f"{err}":
if False: # disable brute forcing at a minimum probably shouldnt do getallkeys just primary
for key in KeyHandler.GetAllKeys():
url2 = ""
try:
url2 = KeyHandler.SetAccessKeyForUrl(url, key)
response = await OUR_SESSION.get(url2)
response.raise_for_status() # Raise an exception if the response has an error status code
async with aiofiles.open(file, "wb") as f:
await f.write(response.content)
logUrlDownloadFinish(type, file, url2, "", shouldExist, reqId)
return
except Exception as err2:
logUrlDownloadFinish(type, file, url2, "", shouldExist, reqId, err2, True)
pass
logUrlDownloadFinish(type, file, url, "", shouldExist, reqId, err)
raise Exception(f"Request error for url: {url} ({type}) that would output to: {file}") from err
def validUntilFix(text):
return re.sub(r"validUntil\"\s*:\s*\"20[\d]{2}-[\d]{2}-[\d]{2}T", 'validUntil":"2099-01-01T', text)
async def downloadGraphModels(pageid):
global GRAPH_DATA_REQ, BASE_MATTERPORT_DOMAIN
makeDirs("api/mp/models")
for key in GRAPH_DATA_REQ:
file_path_base = f"api/mp/models/graph_{key}"
file_path = f"{file_path_base}.json"
req_url = GRAPH_DATA_REQ[key].replace("[MATTERPORT_MODEL_ID]", pageid)
text = await downloadFileAndGetText("GRAPH_MODEL", True, f"https://my.{BASE_MATTERPORT_DOMAIN}/api/mp/models/graph{req_url}", file_path, always_download=CLA.getCommandLineArg(CommandLineArg.REFRESH_KEY_FILES) and CLA.getCommandLineArg(CommandLineArg.ALWAYS_DOWNLOAD_GRAPH_REQS))
KeyHandler.SaveKeysFromText(f"GRAPH_{key}", text)
if key == "GetModelViewPrefetch":
KeyHandler.SetAccessKey(AccessKeyType.GRAPH_MODEL_VIEW_PREFETCH, KeyHandler.GetKeysFromStr(text)[0])
# Patch (graph_GetModelDetails.json & graph_GetSnapshots.json and such) URLs to Get files form local server instead of https://cdn-2.matterport.com/
if CLA.getCommandLineArg(CommandLineArg.MANUAL_HOST_REPLACEMENT):
text = text.replace(f"https://cdn-2.{BASE_MATTERPORT_DOMAIN}", "http://127.0.0.1:8080") # without the localhost it seems like it may try to do diff
text = text.replace(f"https://cdn-1.{BASE_MATTERPORT_DOMAIN}", "http://127.0.0.1:8080") # without the localhost it seems like it may try to do diff
text = validUntilFix(text)
async with aiofiles.open(getModifiedName(file_path), "w", encoding="UTF-8") as f:
await f.write(text)
ProgressType = Enum("ProgressType", ["Request", "Success", "Skipped", "Failed404", "Failed403", "FailedUnknown"])
class ProgressStats:
def __str__(self):
relInfo = ""
if self.relativeTo is not None:
relInfo = "Relative "
return f"{relInfo}Total fetches: {self.TotalPosRequests()} {self.ValStr(ProgressType.Skipped)} actual {self.ValStr(ProgressType.Request)} {self.ValStr(ProgressType.Success)} {self.ValStr(ProgressType.Failed403)} {self.ValStr(ProgressType.Failed404)} {self.ValStr(ProgressType.FailedUnknown)}"
def RelativeMark(self):
self.relativeTo = dict(self.stats)
def ClearRelative(self):
self.relativeTo = None
relativeTo: dict[ProgressType, int] | None
def __init__(self):
self.stats: dict[ProgressType, int] = dict()
# self.locks : dict[ProgressType,asyncio.Semaphore] = dict()
self.locks: dict[ProgressType, threading.Lock] = dict()
for typ in ProgressType:
self.stats[typ] = 0
self.locks[typ] = threading.Lock()
def Val(self, typ: ProgressType):
val = self.stats[typ]
if self.relativeTo is not None:
val -= self.relativeTo[typ]
return val
def TotalPosRequests(self):
return self.Val(ProgressType.Request) + self.Val(ProgressType.Skipped)
def ValStr(self, typ: ProgressType):
val = self.Val(typ)
perc = f" ({val / self.TotalPosRequests():.0%})"
return f"{typ.name}: {self.Val(typ)}{perc}"
def Increment(self, typ: ProgressType, amt: int = 1):
with self.locks[typ]:
self.stats[typ] += 1
return self.stats[typ]
PROGRESS = ProgressStats()
def logUrlDownloadFinish(type, localTarget, url, additionalParams, shouldExist, requestID, error=None, altUrlExists=False):
global PROGRESS
logLevel = logging.INFO
prefix = "Finished"
if error:
if altUrlExists:
logLevel = logging.WARNING
error = f"PartErr of: {error}"
prefix = "aTryFail"
else:
logLevel = logging.ERROR
error = f"Error of: {error}"
prefix = "aFailure"
PROGRESS.Increment(ProgressType.Failed403 if "Error 403" in error else ProgressType.Failed404 if "Error 404" in error else ProgressType.FailedUnknown)
else:
PROGRESS.Increment(ProgressType.Success)
error = ""
_logUrlDownload(logLevel, prefix, type, localTarget, url, additionalParams, shouldExist, requestID, error) # not sure if should lower log elve for shouldExist false
def logUrlDownloadSkipped(type, localTarget, url, additionalParams):
global PROGRESS
PROGRESS.Increment(ProgressType.Skipped)
_logUrlDownload(logging.DEBUG, "Skipped already downloaded", type, localTarget, url, additionalParams, False, "")
def logUrlDownloadStart(type, localTarget, url, additionalParams, shouldExist, key_type):
global PROGRESS
ourReqId = PROGRESS.Increment(ProgressType.Request)
_logUrlDownload(logging.DEBUG, "Starting", type, localTarget, url, additionalParams, shouldExist, ourReqId, key_type=key_type)
return ourReqId
def _logUrlDownload(logLevel, logPrefix, type, localTarget, url, additionalParams, shouldExist, requestID, optionalResult=None, key_type=AccessKeyType.PrimaryKey):
global CLA
if not CLA.getCommandLineArg(CommandLineArg.DOWNLOAD):
return
if optionalResult:
optionalResult = f"Result: {optionalResult}"
else:
optionalResult = ""
if key_type is not None:
key_type = f" KeyType: {key_type}"
else:
key_type = ""
logging.log(logLevel, f"{logPrefix} REQ for {type} {requestID}: should exist: {shouldExist} {optionalResult} File: {localTarget} at url: {url} {key_type} {additionalParams}")
def extractJSDict(forWhat: str, str: str):
ret: dict[str, str] = {}
# Expects a string where the first { starts the dict and last } ends it.
startPos = str.find("{")
if startPos == -1:
raise Exception(f"Unable to extract JS dictionary for: {forWhat} from the JS string: {str} can't find first {{")
endPos = str.rfind("}")
if endPos == -1:
raise Exception(f"Unable to extract JS dictionary for: {forWhat} from the JS string: {str} can't find last }}")
str = str[startPos + 1 : endPos]
pairs = str.split(",")
for kvp in pairs:
arr = kvp.replace('"', "").split(":")
key = arr[0]
key = int(float(key)) # keys can be in scientific notation
ret[f"{key}"] = arr[1]
return ret
async def downloadAssets(base, base_page_text):
global PROGRESS, BASE_MATTERPORT_DOMAIN, MAIN_SHOWCASE_FILENAME
language_codes = ["af", "sq", "ar-SA", "ar-IQ", "ar-EG", "ar-LY", "ar-DZ", "ar-MA", "ar-TN", "ar-OM", "ar-YE", "ar-SY", "ar-JO", "ar-LB", "ar-KW", "ar-AE", "ar-BH", "ar-QA", "eu", "bg", "be", "ca", "zh-TW", "zh-CN", "zh-HK", "zh-SG", "hr", "cs", "da", "nl", "nl-BE", "en", "en-US", "en-EG", "en-AU", "en-GB", "en-CA", "en-NZ", "en-IE", "en-ZA", "en-JM", "en-BZ", "en-TT", "et", "fo", "fa", "fi", "fr", "fr-BE", "fr-CA", "fr-CH", "fr-LU", "gd", "gd-IE", "de", "de-CH", "de-AT", "de-LU", "de-LI", "el", "he", "hi", "hu", "is", "id", "it", "it-CH", "ja", "ko", "lv", "lt", "mk", "mt", "no", "pl", "pt-BR", "pt", "rm", "ro", "ro-MO", "ru", "ru-MI", "sz", "sr", "sk", "sl", "sb", "es", "es-AR", "es-GT", "es-CR", "es-PA", "es-DO", "es-MX", "es-VE", "es-CO", "es-PE", "es-EC", "es-CL", "es-UY", "es-PY", "es-BO", "es-SV", "es-HN", "es-NI", "es-PR", "sx", "sv", "sv-FI", "th", "ts", "tn", "tr", "uk", "ur", "ve", "vi", "xh", "ji", "zu"]
language_codes = ["zh-TW", "zh-CN", "nl", "de", "it", "ja", "ko", "pt", "ru", "es"] # these are the only language codes that seem to succeed if a model works with one other than this please file a bug report and let us know, these are hardcoded into showcase.js file
font_files = ["ibm-plex-sans-100", "ibm-plex-sans-100italic", "ibm-plex-sans-200", "ibm-plex-sans-200italic", "ibm-plex-sans-300", "ibm-plex-sans-300italic", "ibm-plex-sans-500", "ibm-plex-sans-500italic", "ibm-plex-sans-600", "ibm-plex-sans-600italic", "ibm-plex-sans-700", "ibm-plex-sans-700italic", "ibm-plex-sans-italic", "ibm-plex-sans-regular", "mp-font", "roboto-100", "roboto-100italic", "roboto-300", "roboto-300italic", "roboto-500", "roboto-500italic", "roboto-700", "roboto-700italic", "roboto-900", "roboto-900italic", "roboto-italic", "roboto-regular"]
# extension assumed to be .png unless it is .svg or .jpg, for anything else place it in assets
image_files = ["360_placement_pin_mask", "chrome", "Desktop-help-play-button.svg", "Desktop-help-spacebar", "edge", "escape", "exterior", "exterior_hover", "firefox", "interior", "interior_hover", "matterport-logo-light.svg", "matterport-logo.svg", "mattertag-disc-128-free.v1", "mobile-help-play-button.svg", "nav_help_360", "nav_help_click_inside", "nav_help_gesture_drag", "nav_help_gesture_drag_two_finger", "nav_help_gesture_pinch", "nav_help_gesture_position", "nav_help_gesture_position_two_finger", "nav_help_gesture_tap", "nav_help_inside_key", "nav_help_keyboard_all", "nav_help_keyboard_left_right", "nav_help_keyboard_up_down", "nav_help_mouse_click", "nav_help_mouse_ctrl_click", "nav_help_mouse_drag_left", "nav_help_mouse_drag_right", "nav_help_mouse_position_left", "nav_help_mouse_position_right", "nav_help_mouse_zoom", "nav_help_tap_inside", "nav_help_zoom_keys", "NoteColor", "pinAnchor", "safari", "scope.svg", "showcase-password-background.jpg", "surface_grid_planar_256", "vert_arrows", "headset-quest-2", "tagColor", "matterport-app-icon.svg"]
assets = ["js/browser-check.js", "css/showcase.css", "css/packages-nova-ui.css", "css/scene.css", "css/unsupported_browser.css", "cursors/grab.png", "cursors/grabbing.png", "cursors/zoom-in.png", "cursors/zoom-out.png", "locale/strings.json", "css/ws-blur.css", "css/core.css", "css/late.css"]
# following seem no more: "css/split.css", "headset-cardboard", "headset-quest", "NoteIcon", "puck_256_red", "tagbg", "tagmask", "roboto-700-42_0", "pinIconDefault",
# downloadFile("my.matterport.com/favicon.ico", "favicon.ico")
base_page_js_loads = re.findall(r"script src=[\"']([^\"']+[.]js)[\"']", base_page_text, flags=re.IGNORECASE)
# now they use module imports as well like: import(importBase + 'js/runtime~showcase.69d7273003fd73b7a8f3.js'),
import_js_loads = re.findall(r'import\([^\'\"()]*[\'"]([^\'"()]+\.js)[\'"]\s*\)',base_page_text,flags=re.IGNORECASE)
for js in import_js_loads:
base_page_js_loads.append(js)
typeDict: dict[str, str] = {}
for asset in assets:
typeDict[asset] = "STATIC_ASSET"
showcase_runtime_filename : str = None
react_vendor_filename : str = None
if CLA.getCommandLineArg(CommandLineArg.DEBUG):
DebugSaveFile("js_found.txt", "\n".join(base_page_js_loads))
for js in base_page_js_loads:
file = js
if "://" in js:
consoleDebugLog(f"Skipping {js} should be the three.js file as the only non-relative one")
# if "://" not in js:
# file = base + js
if file in assets:
continue
typeDict[file] = "HTML_DISCOVERED_JS"
if "showcase" in js:
if "runtime" in js:
showcase_runtime_filename = file
else:
MAIN_SHOWCASE_FILENAME = file
assets.append(file)
else:
if "vendors-react" in js:
react_vendor_filename = file
assets.append(file)
if showcase_runtime_filename is None:
raise Exception("In all js files found on the page could not find any that have showcase and runtime in the filename for the showcase runtime js file")
if react_vendor_filename is None:
raise Exception("In all js files found on the page could not find any that have vendors-react in the filename for the react vendor js file")
await downloadFile("STATIC_ASSET", True, "https://matterport.com/nextjs-assets/images/favicon.ico", "favicon.ico") # mainly to avoid the 404, always matterport.com
showcase_cont = await downloadFileAndGetText(typeDict[showcase_runtime_filename], True, base + showcase_runtime_filename, showcase_runtime_filename, always_download=CLA.getCommandLineArg(CommandLineArg.REFRESH_KEY_FILES))
# lets try to extract the js files it might be loading and make sure we know them, the code has things like .e(858) ot load which are the numbers we care about
# js_extracted = re.findall(r"\.e\(([0-9]{2,3})\)", showcase_cont)
# here is how the JS is prettied up (aka with spaces). First are JS files with specific names, second are the js files to key, and finally are the css files. The js files with specific names you still need the key for just instead of [number].[key].js it is [name].[key].js
# , d.u = e => "js/" + ({
# 239: "three-examples",
# 777: "split",
# 1662: "sdk-bundle",
# 9114: "core",
# 9553: "control-kit"
# } [e] || e) + "." + {
# 172: "6c50ed8e5ff7620de75b",
# 9553: "8aa28bbfc8f4948fd4d1",
# 9589: "dc4901b493f7634edbcf",
# 9860: "976dc6caac98abda24c9"
# } [e] + ".js", d.miniCssF = e => "css/" + ({
# 7475: "late",
# 9114: "core"
# } [e] || e) + ".css"
match = re.search(
r"""
"js/"\+ # find js/+ (literal plus)
(?P<namedJSFiles>[^\[]+) #capture everything until the first [ character store in group namedJSFiles
(?P<JSFileToKey>.+?) #least greedy capture, so capture the minimum amount to make this regex still true
css #stopping when we see the css
(?P<namedCSSFiles>[^\[]+) #similar to before capture to first [
.+? #skip the minimum amount to get to next part
miniCss=.+? #find miniCss= then skip minimum to first &&
&&
(?P<CSSFileToKey>.+?) #capture minimum until we get to next &&
&&
""",
showcase_cont,
re.X,
)
if match is None:
raise Exception("Unable to extract js files and css files from showcase runtime js file")
groupDict = match.groupdict()
jsNamedDict = extractJSDict("showcase-runtime.js: namedJSFiles", groupDict["namedJSFiles"])
jsKeyDict = extractJSDict("showcase-runtime.js: JSFileToKey", groupDict["JSFileToKey"])
cssNamedDict = extractJSDict("showcase-runtime.js: namedCSSFiles", groupDict["namedCSSFiles"])
cssKeyDict = extractJSDict("showcase-runtime.js: CSSFileToKey", groupDict["CSSFileToKey"])
for number, key in jsKeyDict.items():
name = number
if name in jsNamedDict:
name = jsNamedDict[name]
file = f"js/{name}.{key}.js"
typeDict[file] = "SHOWCASE_DISCOVERED_JS"
assets.append(file)
for number, key in cssKeyDict.items():
name = number
if name in cssNamedDict:
name = cssNamedDict[name]
file = f"css/{name}.css" # key is not used for css its just 1 always
typeDict[file] = "SHOWCASE_DISCOVERED_CSS"
assets.append(file)
for image in image_files:
if not image.endswith(".jpg") and not image.endswith(".svg"):
image = image + ".png"
file = "images/" + image
typeDict[file] = "STATIC_IMAGE"
assets.append(file)
for f in font_files:
for file in ["fonts/" + f + ".woff", "fonts/" + f + ".woff2"]:
typeDict[file] = "STATIC_FONT"
assets.append(file)
for lc in language_codes:
file = "locale/messages/strings_" + lc + ".json"
typeDict[file] = "STATIC_LOCAL_STRINGS"
assets.append(file)
toDownload: list[AsyncDownloadItem] = []
for asset in assets:
local_file = asset
type = typeDict[asset]
if local_file.endswith("/"):
local_file = local_file + "index.html"
shouldExist = True
toDownload.append(AsyncDownloadItem(type, shouldExist, f"{base}{asset}", local_file))
await AsyncArrayDownload(toDownload)
if react_vendor_filename and os.path.exists(react_vendor_filename):
reactCont = ""
with open(react_vendor_filename, "r", encoding="UTF-8") as f:
reactCont = f.read()
reactCont = reactCont.replace("(t.src=s.src)", '(t.src=""+(t.src??s.src))') # hacky but in certain conditions react will try to reset the source on something after it loads to re-trigger the load event but this breaks jsnetproxy. This allows the same triggering but uses the existing source if it exists. https://github.com/facebook/react/blob/37906d4dfbe80d71f312f7347bb9ddb930484d28/packages/react-dom-bindings/src/client/ReactFiberConfigDOM.js#L744. Right now this seems to only happens on embedded attachments.
with open(getModifiedName(react_vendor_filename), "w", encoding="UTF-8") as f:
f.write(reactCont)
toDownload.clear()
async def downloadWebglVendors(base_page_text):
regex = r"https://static.matterport.com/webgl-vendors/three/[a-z0-9\-_/.]*/three(?:\.[a-zA-Z0-9]+)?\.min\.js"
threeMin = re.search(regex, base_page_text).group() # type: ignore - may be None , this is always.com
if threeMin is None:
raise Exception(f"Unable to extract the 3d js file name from the page, regex did not match: {regex}")
threeBase = threeMin.rpartition("/")[0]
webglVendors = ["three.module.min.js","three.core.min.js","libs/draco/gltf/draco_wasm_wrapper.js", "libs/draco/gltf/draco_decoder.wasm", "libs/basis/basis_transcoder.wasm","libs/basis/basis_transcoder.js"]
toDownload: list[AsyncDownloadItem] = []
for script in webglVendors:
url = f'{threeBase}/{script}'
toDownload.append(AsyncDownloadItem("WEBGL_FILE", False, url, urlparse(url).path[1:]))
await AsyncArrayDownload(toDownload)
class AsyncDownloadItem:
# shouldExist is purely for information in debugging, if false we are saying it might not and thats OK, does not change any internal logic.
# key_type overrides the accessKey we will change in the url, set to AccessKeyType.LeaveKeyAlone to not change the one in the url
def __init__(self, type: str, shouldExist: bool, url: str, file: str, key_type: AccessKeyType = AccessKeyType.PrimaryKey):
self.type = type
self.shouldExist = shouldExist
self.url = url
self.file = file
self.key_type = key_type
class ExceptionWhatExceptionTaskGroup(asyncio.TaskGroup):
def __init__(self):
super().__init__()
self._parent_cancel_requested = True # hacky but required to prevent an aborted task from stopping us from being async
def _abort(self): # normally it goes through and cancels all the others now
return None
async def __aexit__(self, et, exc, tb): # at end of block it would throw any exceptions
try:
await super().__aexit__(et, exc, tb)
except:
pass
async def AsyncArrayDownload(assets: list[AsyncDownloadItem]):
# with tqdm(total=(len(assets))) as pbar:
async with ExceptionWhatExceptionTaskGroup() as tg:
PROGRESS.RelativeMark()
for asset in tqdm(assets):
# pbar.update(1)
tg.create_task(downloadFile(asset.type, asset.shouldExist, asset.url, asset.file, key_type=asset.key_type))
await asyncio.sleep(0.001) # we need some sleep or we will not yield
while MAX_TASKS_SEMAPHORE.locked():
await asyncio.sleep(0.01)
logging.debug(f"{PROGRESS}")
# can get called twice for defurnished with the second call being the base model id
async def downloadFixedAPIInfo(pageid):
global BASE_MATTERPORT_DOMAIN
assets = [f"api/v1/jsonstore/model/highlights/{pageid}", f"api/v1/jsonstore/model/Labels/{pageid}", f"api/v1/jsonstore/model/mattertags/{pageid}", f"api/v1/jsonstore/model/measurements/{pageid}", f"api/v1/player/models/{pageid}/thumb?width=1707&dpr=1.5&disable=upscale", f"api/v2/models/{pageid}/sweeps", "api/v2/users/current", f"api/player/models/{pageid}/files", f"api/v1/jsonstore/model/trims/{pageid}", "api/v1/plugins?manifest=true", f"api/v1/jsonstore/model/plugins/{pageid}"]
toDownload: list[AsyncDownloadItem] = []
for asset in assets:
local_file = asset
if local_file.endswith("/"):
local_file = local_file + "index.html"
toDownload.append(AsyncDownloadItem("MODEL_INFO", True, f"https://my.{BASE_MATTERPORT_DOMAIN}/{asset}", local_file))
await AsyncArrayDownload(toDownload)
async def downloadInfo(pageid):
global BASE_MATTERPORT_DOMAIN, MODEL_IS_DEFURNISHED, BASE_MODEL_ID
await downloadFixedAPIInfo(pageid)
makeDirs("api/mp/models")
with open("api/mp/models/graph", "w", encoding="UTF-8") as f:
f.write('{"data": "empty"}')
if MODEL_IS_DEFURNISHED:
return
pageJsonFile = f"api/v1/player/models/{pageid}/"
modelJson = await downloadFileAndGetText("MODEL_INFO", True, f"https://my.{BASE_MATTERPORT_DOMAIN}/{pageJsonFile}", f"{pageJsonFile}/index.html", always_download=CLA.getCommandLineArg(CommandLineArg.REFRESH_KEY_FILES))
KeyHandler.SaveKeysFromText("ApiV1PlayerModelsJson", modelJson)
for i in range(1, 4): # file to url mapping
fileText = await downloadFileAndGetText("FILE_TO_URL_JSON", True, f"https://my.{BASE_MATTERPORT_DOMAIN}/api/player/models/{pageid}/files?type={i}", f"api/player/models/{pageid}/files_type{i}", always_download=CLA.getCommandLineArg(CommandLineArg.REFRESH_KEY_FILES)) # may have keys
KeyHandler.SaveKeysFromText(f"FilesType{i}", fileText) # used to be more elegant but now we can just gobble all the keys
async def downloadPlugins(pageid):
global BASE_MATTERPORT_DOMAIN
pluginJson: Any
with open("api/v1/plugins", "r", encoding="UTF-8") as f:
pluginJson = json.loads(f.read())
for plugin in pluginJson:
plugPath = f"showcase-sdk/plugins/published/{plugin['name']}/{plugin['currentVersion']}/plugin.json"
await downloadFile("PLUGIN", True, f"https://static.{BASE_MATTERPORT_DOMAIN}/{plugPath}", plugPath)
async def downloadAttachments():
# May be the only thing from the actual prefetch graph we need;)
try:
with open("api/mp/models/graph_GetModelViewPrefetch.json", "r", encoding="UTF-8") as f:
graphModelSnapshotsJson = json.loads(f.read())
toDownload: list[AsyncDownloadItem] = []
for mattertag in graphModelSnapshotsJson["data"]["model"]["mattertags"]:
if "fileAttachments" in mattertag:
for attachment in mattertag["fileAttachments"]:
toDownload.append(AsyncDownloadItem("MODEL_ATTACHMENTS", True, attachment["url"], urlparse(attachment["url"]).path[1:], key_type=AccessKeyType.LeaveKeyAlone))
await AsyncArrayDownload(toDownload)
except Exception:
logging.exception("Unable to open graph model for prefetch output and download the embedded attachments...")
return
async def downloadPics(pageid):
# All these should already be downloaded through AdvancedAssetDownload likely they wont work here without a different access key any more....
with open(f"api/v1/player/models/{pageid}/index.html", "r", encoding="UTF-8") as f:
modeldata = json.load(f)
toDownload: list[AsyncDownloadItem] = []
for image in modeldata["images"]:
toDownload.append(AsyncDownloadItem("MODEL_IMAGES", True, image["src"], urlparse(image["src"]).path[1:])) # want want to use signed_src or download_url?
await AsyncArrayDownload(toDownload)
async def downloadMainAssets(pageid, accessurl):
global THIS_MODEL_ROOT_DIR, MODEL_IS_DEFURNISHED
sweepUUIDs: list[str] = []
if MODEL_IS_DEFURNISHED: # technically we could use this for all, and this data is in the prefetch embedded as well
with open("api/mp/models/graph_GetShowcaseSweeps.json", "r", encoding="UTF-8") as f:
graphModelSweepsJson = json.loads(f.read())
base_node = graphModelSweepsJson["data"]["model"]
for location in base_node["locations"]:
sweepUUIDs.append(location["pano"]["sweepUuid"])
accessurl = base_node["locations"][0]["pano"]["skyboxes"][0]["tileUrlTemplate"]
tildeStart = accessurl.find("~/")
accessurl = accessurl[: tildeStart + 2]
sweepDir = urlparse(accessurl).path[1:]
if not CLA.getCommandLineArg(CommandLineArg.TILDE):
sweepDir = sweepDir.replace("~", "_")
accessurl = accessurl + "{filename}?t=2-796d5d010d7183bce7f0999701973d8b05b2df8f-1735673498-0" # access key here doesnt matter as we will be replacing it
makeDirs(sweepDir)
os.chdir(sweepDir)
else:
# this uses the old model json but we dont need it, the dam should have already been downloaded and the sweeps we can use getShowcaseSweeeps for
with open(f"api/v1/player/models/{pageid}/index.html", "r", encoding="UTF-8") as f:
modeldata = json.load(f)
match = re.search(r"models/([a-z0-9-_./~]*)/\{filename\}", accessurl)
if match is None:
raise Exception(f"Unable to extract access model id from url: {accessurl}")
accessid = match.group(1)
basePath = f"models/{accessid}"
if not CLA.getCommandLineArg(CommandLineArg.TILDE):
basePath = basePath.replace("~", "_")
makeDirs(basePath)
os.chdir(basePath)
await downloadDAM(accessurl, modeldata["job"]["uuid"])
sweepUUIDs = modeldata["sweeps"]
# now: getShowcaseSweeps then need to iterate the locatiosn and get the uuid data.model.locations[0].pano.sweepUuid this would resolve many of the 404s we will get by just bruteforcing each location has its only max res (2k 4k etc)
await downloadSweeps(accessurl, sweepUUIDs) # sweeps are generally the biggest thing minus a few modles that have massive 3d detail items
os.chdir(THIS_MODEL_ROOT_DIR)
# Patch showcase.js to fix expiration issue
def patchShowcase():
global BASE_MATTERPORT_DOMAIN
with open(MAIN_SHOWCASE_FILENAME, "r", encoding="UTF-8") as f:
j = f.read()
j = re.sub(r"\&\&\(!e.expires\|\|.{1,10}\*e.expires>Date.now\(\)\)", "", j) # old
j = j.replace("this.urlContainer.expires", "Date.now()") # newer
j = j.replace("this.onStale", "this.onStal") # even newer
if CLA.getCommandLineArg(CommandLineArg.MANUAL_HOST_REPLACEMENT):
j = j.replace('"/api/mp/', '`${window.location.pathname}`+"api/mp/')
j = j.replace("${this.baseUrl}", "${window.location.origin}${window.location.pathname}")
j = j.replace(f'e.get("https://static.{BASE_MATTERPORT_DOMAIN}/geoip/",{{responseType:"json",priority:n.ru.LOW}})', '{"country_code":"US","country_name":"united states","region":"CA","city":"los angeles"}')
if CLA.getCommandLineArg(CommandLineArg.MANUAL_HOST_REPLACEMENT):
j = j.replace(f"https://static.{BASE_MATTERPORT_DOMAIN}", "")
with open(getModifiedName(MAIN_SHOWCASE_FILENAME), "w", encoding="UTF-8") as f:
f.write(j)
# j = j.replace('"POST"','"GET"') #no post requests for external hosted
# with open("js/showcase.js","w",encoding="UTF-8") as f:
# f.write(j)
def drange(x, y, jump):
while x < y:
yield float(x)
x += decimal.Decimal(jump)
def DebugSaveFile(fileName, fileContent):
consoleLog(f"Saved debug file: {fileName}")
with open(f"debug/{fileName}", "w") as the_file:
the_file.write(fileContent)
def RemoteDomainsReplace(str: str):
global BASE_MATTERPORT_DOMAIN
domReplace = [f"static.{BASE_MATTERPORT_DOMAIN}", f"cdn-2.{BASE_MATTERPORT_DOMAIN}", f"cdn-1.{BASE_MATTERPORT_DOMAIN}", "mp-app-prod.global.ssl.fastly.net", f"events.{BASE_MATTERPORT_DOMAIN}"]
# str = str.replace('"https://static.matterport.com','`${window.location.origin}${window.location.pathname}` + "').replace('"https://cdn-2.matterport.com','`${window.location.origin}${window.location.pathname}` + "').replace('"https://cdn-1.matterport.com','`${window.location.origin}${window.location.pathname}` + "').replace('"https://mp-app-prod.global.ssl.fastly.net/','`${window.location.origin}${window.location.pathname}` + "').replace('"https://events.matterport.com/', '`${window.location.origin}${window.location.pathname}` + "')
if CLA.getCommandLineArg(CommandLineArg.MANUAL_HOST_REPLACEMENT):
for dom in domReplace:
str = str.replace(f"https://{dom}", "http://127.0.0.1:8080")
return str
async def downloadCapture(pageid):
global PROGRESS, RUN_ARGS_CONFIG_NAME, BASE_MATTERPORT_DOMAIN, CHINA_MATTERPORT_DOMAIN, THIS_MODEL_ROOT_DIR, MODEL_IS_DEFURNISHED, BASE_MODEL_ID
makeDirs(pageid)
BASE_MODEL_ID = pageid
alias = CLA.getCommandLineArg(CommandLineArg.ALIAS)
if alias and not os.path.exists(alias):
os.symlink(pageid, alias)
THIS_MODEL_ROOT_DIR = os.path.abspath(pageid)
os.chdir(THIS_MODEL_ROOT_DIR)
ROOT_FILE_COPY = ["JSNetProxy.js", "matterport-dl.py"]
for fl in ROOT_FILE_COPY:
if not os.path.exists(fl):
shutil.copy2(os.path.join(BASE_MATTERPORTDL_DIR, fl), fl)
CLA.SaveToFile(RUN_ARGS_CONFIG_NAME)
logging.basicConfig(filename="run_report.log", level=logging.DEBUG, format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S", encoding="utf-8")
if CLA.getCommandLineArg(CommandLineArg.DEBUG):
makeDirs("debug")
if CLA.getCommandLineArg(CommandLineArg.CONSOLE_LOG):
logging.getLogger().addHandler(logging.StreamHandler())
consoleLog(f"Started up a download run {sys_info()}")
url = f"https://my.{BASE_MATTERPORT_DOMAIN}/show/?m={pageid}"
consoleLog(f"Downloading capture of {pageid} with base page... {url}")
base_page_text = ""
try:
base_page_text: str = await downloadFileAndGetText("MAIN", True, url, "index.html", always_download=CLA.getCommandLineArg(CommandLineArg.REFRESH_KEY_FILES))
if f"{CHINA_MATTERPORT_DOMAIN}/showcase" in base_page_text:
BASE_MATTERPORT_DOMAIN = CHINA_MATTERPORT_DOMAIN
consoleLog("Chinese matterport url found in main page, will try China server, note if this does not work try a proxy outside china")
if CLA.getCommandLineArg(CommandLineArg.DEBUG):
DebugSaveFile("base_page.html", base_page_text) # noqa: E701
except Exception as error:
if "certificate verify failed" in str(error) or "SSL certificate problem" in str(error):
raise TypeError(f"Error: {str(error)}. Have you tried running the Install Certificates.command (or similar) file in the python folder to install the normal root certs?") from error
else:
raise TypeError("First request error") from error
KeyHandler.SaveKeysFromText("MainBasePage", base_page_text)
staticbase = re.search(rf'<base href="(https://static.{BASE_MATTERPORT_DOMAIN}/.*?)">', base_page_text).group(1) # type: ignore - may be None
base_page_deunicode = base_page_text.encode("utf-8", errors="ignore").decode("unicode-escape") # some non-english matterport pages have unicode escapes for even the generic url chars
if CLA.getCommandLineArg(CommandLineArg.DEBUG):
DebugSaveFile("base_page_deunicode.html", base_page_deunicode) # noqa: E701
match = re.search(r'"(?P<baseurl>https://cdn-\d*\.matterport(?:vr)?\.(?:com|cn)/models/[a-z0-9\-_/.]*/)(?:[{}0-9a-z_/<>.~]+)(?P<defaultAccessKey>\?t=.*?)"', base_page_deunicode) # the ~/ optional is mostly for defurnished secondary models
# matterportvr.cn
if match:
groupDict = match.groupdict()
accessurl = f"{groupDict['baseurl']}~/{{filename}}{groupDict['defaultAccessKey']}"
else:
raise Exception(f"Can't find urls, try the main page: {url} in a browser to make sure it loads the model correctly")
if not MODEL_IS_DEFURNISHED:
# get a valid access key, there are a few but this is a common client used one, this also makes sure it is fresh
file_type_content = await downloadFileAndGetText("MAIN", True, f"https://my.{BASE_MATTERPORT_DOMAIN}/api/player/models/{pageid}/files?type=3", f"api/player/models/{pageid}/files_type3", always_download=CLA.getCommandLineArg(CommandLineArg.REFRESH_KEY_FILES)) # get a valid access key, there are a few but this is a common client used one, this also makes sure it is fresh, note we would download this one later as well but we want this key early
KeyHandler.SetAccessKey(AccessKeyType.FILES3_TEMPLATE_KEY, KeyHandler.GetKeysFromStr(file_type_content)[0])
consoleLog("Downloading graph model data...") # need the details one for advanced download
await downloadGraphModels(pageid)
# Automatic redirect if GET param isn't correct
forcedProxyBase = "window.location.origin"
# forcedProxyBase='"http://127.0.0.1:9000"'
# window._ProxyAppendURL=1;
injectedjs = 'if (!window.location.search.startsWith("?m=' + pageid + '")) { document.location.search = "?m=' + pageid + '"; };window._NoTilde=' + ("false" if CLA.getCommandLineArg(CommandLineArg.TILDE) else "true") + ";window._ProxyBase=" + forcedProxyBase + ";"
content = base_page_text.replace(staticbase, ".")
proxyAdd = ""
if CLA.getCommandLineArg(CommandLineArg.MANUAL_HOST_REPLACEMENT):
content = RemoteDomainsReplace(content)
else:
content = re.sub(r"(?P<preDomain>src\s*=\s*['" '"])https?://[^/"' "']+/", r"\g<preDomain>", content, flags=re.IGNORECASE) # we replace any src= https://whatever.com stripping the part up to the first slash
content = re.sub(r"import\(\s*\s*(?P<quoteChar>['\"])https?://[^/\"']+/", r"import(\g<quoteChar>./", content, flags=re.IGNORECASE)# similar to above but for import('http://... must add ./ as well
proxyAdd = "<script blocking='render' src='JSNetProxy.js'></script>"
content = validUntilFix(content)
content = content.replace("<head>", f"<head><script>{injectedjs}</script>{proxyAdd}")
with open(getModifiedName("index.html"), "w", encoding="UTF-8") as f:
f.write(content)
consoleLog("Downloading model info...")
await downloadInfo(pageid)
urlKeyFind = CLA.getCommandLineArg(CommandLineArg.FIND_URL_KEY)
urlKeyFindIsDownload = False
if urlKeyFind == "":
urlKeyFind = CLA.getCommandLineArg(CommandLineArg.FIND_URL_KEY_AND_DOWNLOAD)
if CLA.getCommandLineArg(CommandLineArg.DEBUG):
KeyHandler.DumpKnownKeysToFile()
urlKeyFindIsDownload = True
if urlKeyFind:
await KeyHandler.PrintUrlKeys(urlKeyFind, urlKeyFindIsDownload)
exit(0)
consoleLog("Downloading Advanced Assets...")
if CLA.getCommandLineArg(CommandLineArg.ADVANCED_DOWNLOAD):
await AdvancedAssetDownload(base_page_text)
consoleLog("Downloading static files...")
await downloadAssets(staticbase, base_page_text)
await downloadWebglVendors(base_page_text)
# Patch showcase.js to fix expiration issue and some other changes for local hosting
patchShowcase()
consoleLog("Downloading plugins...")
await downloadPlugins(pageid)
if not MODEL_IS_DEFURNISHED:
consoleLog("Downloading images...")
await downloadPics(pageid)
consoleLog("Downloading matterport tags / embedded attachments...")
await downloadAttachments()
open("api/v1/event", "a").close()
if CLA.getCommandLineArg(CommandLineArg.MAIN_ASSET_DOWNLOAD):
consoleLog("Downloading primary model assets...")
await downloadMainAssets(pageid, accessurl)
os.chdir(THIS_MODEL_ROOT_DIR)
generatedCrops = 0
if CLA.getCommandLineArg(CommandLineArg.GENERATE_TILE_MESH_CROPS):
consoleLog("Generating tile_mesh crop images locally (no progress shown)...")
generatedCrops = GenerateMeshImageCrops()
PROGRESS.ClearRelative()
consoleLog(f"Done, {PROGRESS} GeneratedCrops: {generatedCrops}!")
def GenerateMeshImageCrops():
global Image
from PIL import Image
models_dir = "models"
totalGenned = 0
for model_id in os.listdir(models_dir):
model_path = os.path.join(models_dir, model_id, "assets", "mesh_tiles", "~")
if not os.path.exists(model_path):
return
for tile_folder in os.listdir(model_path):
tile_path = os.path.join(model_path, tile_folder)
if not os.path.isdir(tile_path):
continue
# Process each jpg file here
for file in os.listdir(tile_path):
if not file.endswith(".jpg") or "crop" in file:
continue
totalGenned += GenerateCrops(os.path.join(tile_path, file))
return totalGenned
def GenerateCrops(jpgFilePath):
cropSize = 512
testFilename = f"{jpgFilePath}crop={cropSize},{cropSize},x0,y0.jpg"
howMany = 0
if os.path.exists(testFilename):
return howMany
img = Image.open(jpgFilePath)
maxSize = img.width
increment = int(maxSize / cropSize)