From ac65ff778c3125a295a68752f32d0d0a9abc2a03 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Wed, 8 Nov 2023 16:36:51 +0800 Subject: [PATCH] chore(license): add license checker for copyright and fix license files (#1674) https://github.com/apache/incubator-pegasus/issues/1676 There are several tasks/steps for this PR: - develop a script to check the consistency between `.licenserc.yaml` and all files of the project. - according to the check result, fix `.licenserc.yaml`. - according to the fixed `.licenserc.yaml`, amend `LICENSE`. License checker could be run just by `python3 scripts/check_license.py`. --- .licenserc.yaml | 98 ++++++++++-------- LICENSE | 48 +++++++-- scripts/check_license.py | 213 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 311 insertions(+), 48 deletions(-) create mode 100755 scripts/check_license.py diff --git a/.licenserc.yaml b/.licenserc.yaml index c6f63afd8b..543a7c01ca 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -20,80 +20,104 @@ header: copyright-owner: Apache Software Foundation paths-ignore: + # Configuration files that could not be added with copyright info (otherwise would lead to error). + - '.rat-excludes' + - '**/*.csv' + - '**/*.json' # All the type of licenses of this project should be added to LICENSE. + - 'DISCLAIMER-WIP' - 'LICENSE' - 'NOTICE' - - '.github/pull_request_template.md' + # Template files of issues and pull requests for Github. - '.github/ISSUE_TEMPLATE/bug_report.md' - '.github/ISSUE_TEMPLATE/feature-request.md' - '.github/ISSUE_TEMPLATE/general_question.md' - - '.rat-excludes' - - 'DISCLAIMER-WIP' + - '.github/pull_request_template.md' + # Image files for docs. + - '**/*.jpg' + - '**/*.png' + # Files in pdf format. + - '**/*.pdf' + # Special files for golang. + - '**/go.sum' + # TODO(wangdan): Generated files for go client, could generate dynamically? + - 'go-client/idl/base/GoUnusedProtection__.go' + - 'go-client/idl/base/dsn_err_string.go' + - 'go-client/idl/base/rocskdb_err_string.go' + # Special files for nodejs. + - '**/.npmigonre' + # Special files for python. - 'python-client/requirement.txt' - - '.devcontainer/devcontainer.json' + # Text files used for tests and could not be added with copyright info (otherwise would lead to error). + - 'src/aio/test/copy_source.txt' + - 'src/runtime/test/command.txt' + - 'src/failure_detector/test/gtest.filter' + - 'src/meta/test/meta_state/gtest.filter' + - 'src/meta/test/suite1' + - 'src/meta/test/suite2' + - 'src/nfs/test/nfs_test_file1' + - 'src/nfs/test/nfs_test_file2' + - 'src/runtime/test/gtest.filter' + # Used for tests and should be empty, or ignore all comment lines (otherwise would lead to error). + - 'src/utils/test/config-empty.ini' + # Binary files used for tests and could not be added with copyright info (otherwise would lead to error). + - 'src/replica/duplication/test/log.1.0.handle_real_private_log' + - 'src/replica/duplication/test/log.1.0.handle_real_private_log2' + - 'src/replica/duplication/test/log.1.0.all_loaded_are_write_empties' + # Used for patches for thirdparties. + - 'thirdparty/fix_fds_for_macos.patch' + - 'thirdparty/fix_jemalloc_for_m1_on_macos.patch' + - 'thirdparty/fix_libevent_for_macos.patch' + - 'thirdparty/fix_prometheus-cpp_limits.patch' + - 'thirdparty/fix_s2_for_aarch64.patch' + - 'thirdparty/fix_thrift_for_cpp11.patch' + - 'thirdparty/rocksdb_fix_atomic_flush_0879c240.patch' # TODO(yingchun): shell/* files are import from thirdparties, we can move them to thirdparty later. + # Copyright (c) 2016, Adi Shavit - 'src/shell/argh.h' + # Copyright (c) 2010-2016, Salvatore Sanfilippo, etc. - 'src/shell/linenoise/linenoise.c' + # Copyright (c) 2010-2014, Salvatore Sanfilippo, etc. - 'src/shell/linenoise/linenoise.h' + # Copyright (c) 2006-2015, Salvatore Sanfilippo, etc. - 'src/shell/sds/sds.c' - 'src/shell/sds/sds.h' - 'src/shell/sds/sdsalloc.h' - - '**/go.sum' - - '**/*.csv' - - '**/*.json' - - 'go-client/idl/base/dsn_err_string.go' - - 'go-client/idl/base/rocskdb_err_string.go' - - 'go-client/idl/base/GoUnusedProtection__.go' - - '**/.npmigonre' # Copyright (c) Facebook, Inc - 'src/utils/TokenBucket.h' - 'src/utils/test/TokenBucketTest.cpp' - 'src/utils/test/TokenBucketTest.h' + # https://github.com/preshing/modern-cpp-threading/blob/master/LICENSE + - 'src/utils/hpc_locks/autoreseteventcondvar.h' # https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE - 'src/utils/hpc_locks/autoresetevent.h' - - 'src/utils/hpc_locks/autoreseteventcondvar.h' - 'src/utils/hpc_locks/benaphore.h' - 'src/utils/hpc_locks/bitfield.h' - 'src/utils/hpc_locks/readme.txt' - 'src/utils/hpc_locks/rwlock.h' - 'src/utils/hpc_locks/sema.h' - # Copyright (c) xxxx The Chromium Authors + # Copyright (c) 2011 The Chromium Authors - 'src/utils/safe_strerror_posix.h' + # Copyright (c) 2012 The Chromium Authors - 'src/runtime/build_config.h' - 'src/utils/test/autoref_ptr_test.cpp' + # Copyright (c) 2006-2009 The Chromium Authors - 'src/utils/safe_strerror_posix.cpp' # Copyright 2017 The Abseil Authors - 'src/utils/absl/base/internal/invoke.h' - 'src/utils/absl/utility/utility.h' + - 'src/utils/memutil.h' - 'src/utils/smart_pointers.h' + - 'src/utils/string_view.cpp' - 'src/utils/string_view.h' - 'src/utils/test/memutil_test.cpp' - - 'src/utils/test/string_view_test.cpp' - 'src/utils/test/smart_pointers_test.cpp' - - 'src/utils/memutil.h' - - 'src/utils/string_view.cpp' + - 'src/utils/test/string_view_test.cpp' # Copyright (c) 2010-2011, Rob Jansen - 'cmake_modules/FindRT.cmake' - 'cmake_modules/FindDL.cmake' # Copyright (c) 2017 Guillaume Papin - 'scripts/run-clang-format.py' - # need manual fix - - 'src/failure_detector/test/gtest.filter' - - 'src/meta/test/meta_state/gtest.filter' - - 'src/meta/test/suite1' - - 'src/meta/test/suite2' - - 'src/nfs/test/nfs_test_file1' - - 'src/nfs/test/nfs_test_file2' - - 'src/runtime/test/gtest.filter' - - 'thirdparty/fix_fds_for_macos.patch' - - 'thirdparty/fix_jemalloc_for_m1_on_macos.patch' - - 'thirdparty/fix_libevent_for_macos.patch' - - 'thirdparty/fix_prometheus-cpp_limits.patch' - - 'thirdparty/fix_s2_for_aarch64.patch' - - 'thirdparty/fix_thrift_for_cpp11.patch' - - 'thirdparty/rocksdb_fix_atomic_flush_0879c240.patch' - # should be empty, or ignore all comment lines - - 'src/utils/test/config-empty.ini' # The MIT License (MIT), Copyright (c) 2015 Microsoft Corporation - 'cmake_modules/BaseFunctions.cmake' - 'docs/rdsn-README.md' @@ -105,7 +129,6 @@ header: - 'idl/replica_admin.thrift' - 'scripts/compile_thrift.py' - 'scripts/learn_stat.py' - - 'src/common/api_common.h' - 'src/runtime/api_layer1.h' - 'src/runtime/api_task.h' - 'src/utils/api_utilities.h' @@ -113,7 +136,6 @@ header: - 'src/common/json_helper.h' - 'src/runtime/rpc/rpc_stream.h' - 'src/runtime/rpc/serialization.h' - - 'src/common/serialization_helper/dsn.layer2_types.h' - 'src/common/serialization_helper/dsn_types.h' - 'src/common/serialization_helper/thrift_helper.h' - 'src/runtime/serverlet.h' @@ -131,7 +153,6 @@ header: - 'src/client/partition_resolver.h' - 'src/replica/replica_base.h' - 'src/common/replica_envs.h' - - 'src/replica/replica_test_utils.h' - 'src/common/replication.codes.h' - 'src/replica/replication_app_base.h' - 'src/client/replication_ddl_client.h' @@ -187,7 +208,6 @@ header: - 'src/utils/configuration.h' - 'src/utils/crc.h' - 'src/utils/customizable_id.h' - - 'src/utils/dlib.h' - 'src/utils/enum_helper.h' - 'src/utils/error_code.h' - 'src/utils/errors.h' @@ -220,7 +240,6 @@ header: - 'src/aio/test/aio.cpp' - 'src/aio/test/clear.sh' - 'src/aio/test/config.ini' - - 'src/aio/test/copy_source.txt' - 'src/aio/test/run.sh' - 'src/block_service/test/config-test.ini' - 'src/client/CMakeLists.txt' @@ -363,7 +382,6 @@ header: - 'src/replica/replica_learn.cpp' - 'src/replica/replica_stub.cpp' - 'src/replica/replica_stub.h' - - 'src/replica/replica_test_utils.cpp' - 'src/replica/replication_app_base.cpp' - 'src/replica/replication_service_app.cpp' - 'src/replica/split/test/config-test.ini' @@ -514,7 +532,6 @@ header: - 'src/replica/test/run.sh' - 'src/runtime/CMakeLists.txt' - 'src/runtime/core_main.cpp' - - 'src/runtime/dsn.layer2_types.cpp' - 'src/runtime/env.sim.cpp' - 'src/runtime/env.sim.h' - 'src/runtime/fault_injector.cpp' @@ -569,7 +586,6 @@ header: - 'src/runtime/test/address_test.cpp' - 'src/runtime/test/async_call.cpp' - 'src/runtime/test/clear.sh' - - 'src/runtime/test/command.txt' - 'src/runtime/test/config-test-corrupt-message.ini' - 'src/runtime/test/config-test-sim.ini' - 'src/runtime/test/config-test.ini' diff --git a/LICENSE b/LICENSE index f6862595d8..06453d0c0f 100644 --- a/LICENSE +++ b/LICENSE @@ -231,7 +231,8 @@ limitations under the License. -------------------------------------------------------------------------------- -src/shell/linenoise/* - BSD-2-Clause License +src/shell/linenoise/linenoise.h - BSD-2-Clause License +src/shell/linenoise/LICENSE Copyright (c) 2010-2014, Salvatore Sanfilippo Copyright (c) 2010-2013, Pieter Noordhuis @@ -263,6 +264,38 @@ src/shell/linenoise/* - BSD-2-Clause License -------------------------------------------------------------------------------- +src/shell/linenoise/linenoise.c - BSD-2-Clause License + + Copyright (c) 2010-2016, Salvatore Sanfilippo + Copyright (c) 2010-2013, Pieter Noordhuis + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + src/shell/sds/* - BSD-2-Clause License Copyright (c) 2006-2015, Salvatore Sanfilippo @@ -327,12 +360,12 @@ src/shell/argh.h - BSD-3-Clause License -------------------------------------------------------------------------------- -src/utils/smart_pointers.h - Apache 2.0 License -src/utils/string_view.h -src/utils/absl/base/internal/invoke.h +src/utils/absl/base/internal/invoke.h - Apache 2.0 License src/utils/absl/utility/utility.h src/utils/memutil.h +src/utils/smart_pointers.h src/utils/string_view.cpp +src/utils/string_view.h src/utils/test/memutil_test.cpp src/utils/test/smart_pointers_test.cpp src/utils/test/string_view_test.cpp @@ -406,11 +439,12 @@ limitations under the License. -------------------------------------------------------------------------------- src/utils/hpc_locks/autoreseteventcondvar.h - zlib License -src/utils/hpc_locks/rwlock.h src/utils/hpc_locks/autoresetevent.h -src/utils/hpc_locks/sema.h -src/utils/hpc_locks/bitfield.h src/utils/hpc_locks/benaphore.h +src/utils/hpc_locks/bitfield.h +src/utils/hpc_locks/readme.txt +src/utils/hpc_locks/rwlock.h +src/utils/hpc_locks/sema.h Copyright (c) 2015 Jeff Preshing diff --git a/scripts/check_license.py b/scripts/check_license.py new file mode 100755 index 0000000000..8151979bd5 --- /dev/null +++ b/scripts/check_license.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import pprint + +PRJ_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +YML_PATH = os.path.join(PRJ_PATH, '.licenserc.yaml') + +IGNORED_STARTS_WITH = ['.git/', '.idea/'] +IGNORED_ENDS_WITH = ['.swp', '.npmigonre', 'go.sum', '.csv', '.json', '.pdf', '.jpg', '.png'] +IGNORED_NAMES = {'.licenserc.yaml', 'LICENSE', 'tags'} + +COPYRIGHT_MARKERS = [ + "Copyright (c) 2016, Adi Shavit", + "Copyright (c) 2010-2016, Salvatore Sanfilippo", + "Copyright (c) 2010-2014, Salvatore Sanfilippo", + "Copyright (c) 2006-2015, Salvatore Sanfilippo", + "Copyright (c) Facebook, Inc", + "https://github.com/preshing/modern-cpp-threading", + "https://github.com/preshing/cpp11-on-multicore", + "Copyright (c) 2011 The Chromium Authors", + "Copyright (c) 2012 The Chromium Authors", + "Copyright (c) 2006-2009 The Chromium Authors", + "Copyright 2017 The Abseil Authors", + "Copyright (c) 2010-2011, Rob Jansen", + "Copyright (c) 2017 Guillaume Papin", + "Copyright (c) 2015 Microsoft Corporation", +] +IGNORED_COPYRIGHT_MARKERS = ["http://www.apache.org/licenses/LICENSE-2.0"] + +NO_COPYRIGHT_MARKER_KEY = "NO_COPYRIGHT_MARKER" +IGNORED_COPYRIGHT_MARKER_KEY = "IGNORED_COPYRIGHT_MARKER" + + +def mark_file(path): + with open(path) as f: + try: + for line in f: + for marker in IGNORED_COPYRIGHT_MARKERS: + if marker in line: + return IGNORED_COPYRIGHT_MARKER_KEY + + for marker in COPYRIGHT_MARKERS: + if marker in line: + return marker + except UnicodeDecodeError: + # Ignore UnicodeDecodeError, since some files might be binary. + pass + + # No marker was found, thus marked with no copyright. + return NO_COPYRIGHT_MARKER_KEY + + +def is_path_ignored(path): + for header in IGNORED_STARTS_WITH: + if path.startswith(header): + return True + + for trailer in IGNORED_ENDS_WITH: + if path.endswith(trailer): + return True + + return False + + +def is_name_ignored(name): + return name in IGNORED_NAMES + + +def classify_files(): + """ + Scan all the files of the project, mark the ones that have copyright info. + """ + marked_files = {} + + for abs_dir, sub_dirs, file_names in os.walk(PRJ_PATH): + rel_dir = os.path.relpath(abs_dir, PRJ_PATH) + if rel_dir == '.': + # Drop the possible prefixed './' for the relative paths. + rel_dir = '' + + for name in file_names: + # Some kinds of files should be ignored. + if is_name_ignored(name): + continue + + rel_path = os.path.join(rel_dir, name) + + # Some kinds of dirs/files should be ignored. + if is_path_ignored(rel_path): + continue + + path = os.path.join(abs_dir, name) + marker = mark_file(path) + + # Some kinds of copyright could be ignored, such as Apache LICENSE-2.0. + if marker == IGNORED_COPYRIGHT_MARKER_KEY: + continue + + if marker not in marked_files: + marked_files[marker] = set() + marked_files[marker].add(rel_path) + + return marked_files + + +def parse_yml(): + """ + Scan all the files in .licenserc.yaml, mark the ones that have copyright info. + """ + marked_files = {} + + with open(YML_PATH) as f: + # The files without copyright info are marked with the specific key. + current_marker = NO_COPYRIGHT_MARKER_KEY + for line in f: + for marker in COPYRIGHT_MARKERS: + if marker in line: + # Files in following lines would belong to this copyright. + current_marker = marker + break + else: + begin_idx = line.find("'") + if begin_idx < 0: + # There's no file in this line, thus copyright would be reset. + current_marker = NO_COPYRIGHT_MARKER_KEY + continue + + begin_idx += 1 + end_idx = line.find("'", begin_idx) + if end_idx < 0: + raise ValueError("Invalid file path line in {yml_path}".format(yml_path=YML_PATH)) + + path = line[begin_idx:end_idx] + + # Some kinds of dirs/files should be ignored. + if is_name_ignored(os.path.basename(path)): + continue + if is_path_ignored(path): + continue + + if current_marker not in marked_files: + marked_files[current_marker] = set() + marked_files[current_marker].add(path) + + return marked_files + + +def check_diff(): + """ + Check if .licenserc.yaml is consistent with all real files of the project. + """ + yml_marked_files = parse_yml() + marked_files = classify_files() + for yml_marker, yml_files in yml_marked_files.items(): + if yml_marker not in marked_files: + print( + "marker {yml_marker} in {yml_path} not found in any file of the project".format(yml_marker=yml_marker, + yml_path=YML_PATH)) + continue + + files = marked_files[yml_marker] + yml_plus = yml_files - files + yml_minus = files - yml_files + if not yml_plus and not yml_minus: + # .licenserc.yaml is consistent with the project. + print( + "No diff found for marker '{yml_marker}' in {yml_path}".format(yml_marker=yml_marker, + yml_path=YML_PATH)) + del marked_files[yml_marker] + continue + + print("Diff found for marker '{yml_marker}' in {yml_path}:".format(yml_marker=yml_marker, yml_path=YML_PATH)) + if yml_plus: + # Files in .licenserc.yaml, but not in the project. + print("{plus}: {yml_plus}".format(plus='+' * len(yml_plus), yml_marker=yml_marker, yml_plus=yml_plus)) + if yml_minus: + # Files in the project, but not in .licenserc.yaml. + print("{minus}: {yml_minus}".format(minus='-' * len(yml_minus), yml_minus=yml_minus)) + + del marked_files[yml_marker] + + if not marked_files: + return + + print("markers in some files of the project not found in {yml_path}:".format(yml_path=YML_PATH)) + pprint.pprint(marked_files) + + +def main(): + check_diff() + + +if __name__ == '__main__': + main()