aikubo · aikubo · Mar 5, 2024 · Mar 5, 2024 · Mar 5, 2024 · Mar 5, 2024
diff --git a/README.md b/README.md
@@ -1,14 +1,6 @@
 
 # Welcome to LinkingLines!
- [![status](https://joss.theoj.org/papers/64eeef828a1100bfba74052d89314758/status.svg)](https://joss.theoj.org/papers/64eeef828a1100bfba74052d89314758)
-
-[![DOI](https://zenodo.org/badge/272334230.svg)](https://zenodo.org/badge/latestdoi/272334230)
-
-[![PyPI](https://img.shields.io/pypi/v/LinkingLines.svg)](https://pypi.org/project/LinkingLines/)
-
-[![ReadtheDocs](https://readthedocs.org/projects/linkinglines/badge/)](https://linkinglines.readthedocs.io/)
-
-[![License](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+ [![status](https://joss.theoj.org/papers/64eeef828a1100bfba74052d89314758/status.svg)](https://joss.theoj.org/papers/64eeef828a1100bfba74052d89314758) [![DOI](https://zenodo.org/badge/272334230.svg)](https://zenodo.org/badge/latestdoi/272334230) [![PyPI](https://img.shields.io/pypi/v/LinkingLines.svg)](https://pypi.org/project/LinkingLines/) [![ReadtheDocs](https://readthedocs.org/projects/linkinglines/badge/)](https:// inkinglines.readthedocs.io/) [![License](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
 # Read the Full documentation on [ReadtheDocs!](https://linkinglines.readthedocs.io/en/latest/)
 
@@ -53,10 +45,7 @@ import numpy as np
 import matplotlib.pyplot as plt
 import linkinglines as ll
 
-data=pd.read_csv('path/to/data')
-theta,rho,xc,yc=ll.HoughTransform(data)
-data['theta']=theta
-data['rho']=rho
+data=ll.readFile('path/to/data')
 
 dtheta=2 #degrees
 drho=500 #meters
@@ -73,3 +62,5 @@ You are now ready to utilize the power of Hough Line Transform, Agglomerative Cl
 
 Happy coding!
 
+
+[Contribution Guidelines](CONTRIBUTING.md)
diff --git a/contributing.md b/contributing.md
@@ -0,0 +1,41 @@
+Thank you for your interest in contributing to `linkinglines`. Please feel free to open up issues with bugs or requested features. Any contributions you make will benefit everybody else and are greatly appreciated.
+
+We recommend using a virutal environment to manage packages `virtualenv`, see [here](https://virtualenv.pypa.io/en/latest/). 
+
+```
+# Install virtualenv if you haven't already
+pip install virtualenv
+
+# Navigate to the project directory
+cd path/to/LinkingLines
+
+# Create a virtual environment
+virtualenv venv
+
+# Activate the virtual environment
+# On Windows
+venv\Scripts\activate
+# On Unix or MacOS
+source venv/bin/activate
+
+# Install dependencies
+pip install -r requirements.txt
+```
+
+If you would like to contribute code please do so in a seperate branch and open up an issue describing your contribution.
+
+```
+git clone git@github.com:aikubo/LinkingLines.git
+git checkout my-development-branch
+```
+
+Before submitting your pull request please verify the following:
+
+1. Code is documented in [NumPy Docstring Style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html)
+2. Code is tested and passes test 
+    - To run the tests please go to "/tests" and run `pytest`
+    - Add your test code to any file with the name `test`
+    - More here on [pytest and testing practices](https://docs.pytest.org/en/8.0.x/)
+3. Open an issue and pull request 
+
+After your pull request the code will be reviewed by maintainers. After the sucessful merge the documentation will be regenerated.
diff --git a/data/testCSV_dikemountain.csv b/data/testCSV_dikemountain.csv
diff --git a/data/testGJSON_dikemountain.geojson b/data/testGJSON_dikemountain.geojson
diff --git a/data/testGPKG_dikemountain.gpkg b/data/testGPKG_dikemountain.gpkg
diff --git a/data/testGPKG_dikemountain.gpkg-shm b/data/testGPKG_dikemountain.gpkg-shm
diff --git a/data/testGPKG_dikemountain.gpkg-wal b/data/testGPKG_dikemountain.gpkg-wal
diff --git a/data/testShapefile_dikemountain.dbf b/data/testShapefile_dikemountain.dbf
diff --git a/data/testShapefile_dikemountain.shp b/data/testShapefile_dikemountain.shp
diff --git a/data/testShapefile_dikemountain.shx b/data/testShapefile_dikemountain.shx
diff --git a/docs/DemoLinkingLines.ipynb b/docs/DemoLinkingLines.ipynb
@@ -59,11 +59,9 @@
    "source": [
     "# Load the example dataset \n",
     "\n",
-    "#load it using pandas.read_csv\n",
+    "#load it using built in reading function which also does the preprocessing \n",
     "# the CSV must have a column called \"WKT\" and can have any other data \n",
-    "dikeset=ll.readFile('/home/akh/myprojects/Dikes_Linking_Project/dikedata/spanish peaks/SpanishPeaks_3857.csv')\n",
-    "\n",
-    "dikeset=ll.preProcess(dikeset)\n"
+    "dikeset=ll.readFile('/../data/SpanishPeaks_3857.csv', preprocess=True)\n"
    ]
   },
   {

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,11 +25,19 @@ dependencies = [
     "scikit_learn>=1.3.0",
     "scipy>=1.11.2",
     "seaborn>=0.12.2",
-    "statsmodels>=0.14.0"
+    "statsmodels>=0.14.0",
+    "geopandas>=0.14.3",
+    "pyproj>=3.6.0",
+    "notebook>=7.0.4",
+    "jupyterlab>=4.0.6"
 ]
 [project.optional-dependencies]
 tests = [
-    "pytest"
+    "pytest>=8.0.2",
+    "Sphinx>=5.3.0",
+    "nbsphinx>=0.9.3"
+
+
 ]
 
 

diff --git a/requirements.txt b/requirements.txt
@@ -33,6 +33,7 @@ greenlet==3.0.3
 idna==3.6
 imagesize==1.4.1
 importlib-metadata==7.0.1
+iniconfig==2.0.0
 ipykernel==6.29.3
 ipython==8.22.1
 isoduration==20.11.0
@@ -82,6 +83,7 @@ patsy==0.5.6
 pexpect==4.9.0
 pillow==10.2.0
 platformdirs==4.2.0
+pluggy==1.4.0
 prometheus_client==0.20.0
 prompt-toolkit==3.0.43
 psutil==5.9.8
@@ -91,6 +93,7 @@ pycparser==2.21
 Pygments==2.17.2
 pyparsing==3.0.9
 pyproj==3.6.0
+pytest==8.0.2
 python-dateutil==2.9.0.post0
 python-json-logger==2.0.7
 pytz==2024.1

diff --git a/src/linkinglines/ClusterLines.py b/src/linkinglines/ClusterLines.py
@@ -12,7 +12,7 @@
 import numpy as np
 import pandas as pd
 from scipy.cluster.hierarchy import dendrogram
-from .HT import rotateData
+from .HT import rotateData, HoughTransform
 #from examineMod import *
 from .PrePostProcess import whichForm
 from scipy.spatial.distance import pdist, squareform
@@ -50,7 +50,7 @@ def AggCluster(dikeset, dtheta, drho, dimensions=2, linkage='complete', rotate=F
     # do the Hough transform
     if 'theta' not in dikeset.columns:
         print("Hough transform not found, performing Hough transform")
-        dikeset, xc, yc=HT(dikeset) 
+        dikeset, xc, yc=HoughTransform(dikeset) 
 
     t,r=whichForm(dikeset)
     angle=np.median(abs(dikeset[t]))-20

diff --git a/src/linkinglines/ExamineClusters.py b/src/linkinglines/ExamineClusters.py
@@ -50,7 +50,7 @@
 import statsmodels.api as sm
 from matplotlib.colors import ListedColormap, LinearSegmentedColormap
 
-from .PrePostProcess import completePreProcess, whichForm, midPoint
+from .PrePostProcess import preProcess, whichForm, midPoint, transformXstart
 
 from sklearn.neighbors import NearestNeighbors
 import matplotlib.pyplot as plt

diff --git a/src/linkinglines/HT.py b/src/linkinglines/HT.py
@@ -128,7 +128,7 @@ def HoughTransform(data, xc=None, yc=None):
     rho=b1*np.sin(angle)
     theta=np.rad2deg(angle)
 
-    newdata= data.copy
+    newdata= data.copy()
     newdata['theta']=theta
     newdata['rho']=rho
     newdata['xc']=xc
@@ -200,13 +200,16 @@ def rotateData(data, rotation_angle):
 
     ang=np.deg2rad(rotation_angle)
 
-    #xcR, ycR=HT_center(dataRotated)
-    dataRotated=HoughTransform(dataRotated, xc=xc, yc=yc)
+    dataRotated = data.copy(deep=True)
+
     dataRotated['Xstart']=x1+xc
     dataRotated['Ystart']=y1+yc
     dataRotated['Xend']=x2+xc
     dataRotated['Yend']=y2+yc
 
+    #xcR, ycR=HT_center(dataRotated)
+    dataRotated, _, _=HoughTransform(dataRotated, xc=xc, yc=yc)
+
 
     #print(xcR,ycR)
 
@@ -243,7 +246,10 @@ def MidtoPerpDistance(data, xc, yc):
     #     df=midPoint(df)
 
 
-    df=HoughTransform(data, xc, yc)
+    df, _, _=HoughTransform(data, xc, yc)
+    rho = df['rho'].values 
+    theta = df['theta'].values
+
     intx=rho*np.cos(np.deg2rad(df['theta'].values))
     inty=rho*np.sin(np.deg2rad(df['theta'].values))
     df['PerpOffsetDist']=np.sqrt( (df['Xmid'].values-intx)**2 +  (df['Ymid'].values-inty)**2)*np.sign((df['Ymid'].values-inty))

diff --git a/src/linkinglines/PrePostProcess.py b/src/linkinglines/PrePostProcess.py
@@ -23,7 +23,7 @@
     transformXstart: reorders dataframe so that Xstart is always < Xend
     DikesetReprocess: Reprocesses a dataframe containing dike line data to ensure it has essential attributes and is properly formatted.
     LinesReprocess: Reprocesses a dataframe containing line data to ensure it has essential attributes and is properly formatted.
-    CompletePreprocess:     Fully preprocesses a dataframe containing line data to ensure it has essential attributes and is properly formatted.
+    preProcess:     Fully preprocesses a dataframe containing line data to ensure it has essential attributes and is properly formatted.
     whichForm: Returns the form of the dataframe column names
     MaskArea: Returns dataframe masked by bounds
     getCartLimits: Computes the Cartesian limits (x and y) of a set of lines.
@@ -39,37 +39,44 @@
 from scipy import stats
 
 import matplotlib.pyplot as plt
-
+import os
 import geopandas
 
-def readFile(path):
+def readFile(name, preprocess=True):
 
     """
     Reads in a file and returns a pandas dataframe
 
     Parameters:
-        path: (string) the path to the file to be read in
+        name: (string) the path to the file to be read in
 
     Returns:
         data: (pandas.DataFrame) a pandas or geopandas dataframe
     """
 
     # if not a valid path, return error
-    if not os.path.exists(path):
+    if not os.path.exists(name):
         raise ValueError("Invalid path")
     # if file is not .csv, .txt, or .shp, return error
-    if path.endswith('.csv') or path.endswith('.txt') or path.endswith('.shp') or path.endswith('.geojson') or path.endswith('.json'):
+    valid_extensions = ['.csv', '.txt', '.shp', '.geojson', '.json']
+
+    if not any(name.endswith(ext) for ext in valid_extensions):
         raise ValueError("Invalid file type")
 
     # identify the type of file
     # read in .csv 
-    if path.endswith('.csv'):
-        data=pd.read_csv(path)
-    elif path.endswith('.txt'):
-        data=pd.read_csv(path, delimiter='\t')
+    if name.endswith('.csv'):
+        data=pd.read_csv(name)
+    elif name.endswith('.txt'):
+        data=pd.read_csv(name, delimiter='\t')
     else:
-        data=geopandas.read_file(path)
+        data=geopandas.read_file(name)
         data=data.to_wkt()
+
+    # if preprocess is True, preprocess the data
+    if preprocess:
+        data = WKTtoArray(data)
+        data = preProcess(data)
 
     return data
 
@@ -162,37 +169,53 @@ def WKTtoArray(df, plot=False):
 
     if len(df) < 1:
         raise ValueError("DataFrame is empty")
+
+    #     # if neither is in columns raise value error
+    if not ("WKT" in df.columns ):
+        if not ("geometry" in df.columns):
+         raise ValueError("No geometry present")
 
     xstart=[]
     ystart=[]
 
     xend=[]
     yend=[]
     drop=[]
+
     if plot:
         fig,ax=plt.subplots()
+
+    # check for either "WKT" or "geometry" columns 
+    if "geometry" in df.columns:
+        tag = "geometry"
+    else:
+        tag = "WKT"
+
     for i in range(len(df)):
-        temp=df["WKT"].iloc[i]
-        temp=re.split(r'[(|)]', temp)
+        temp=df[tag].iloc[i]
         t1=temp[0]
+        # Using regex to find all numbers in the string
+        temp = re.findall(r"[-+]?\d*\.\d+|\d+", temp)
 
-        #print("dike #:",i)
-        print(temp)
-        if 'EMPTY' in temp[0]:
+
+        if len(temp)<1:
             drop.append(i)
             continue
-        temp=re.split(r'[,\s]+', temp[2])
-
+
         if "Z" in t1:
             tempx=np.array(temp[::3]).astype(float)
             tempy=np.array(temp[1::3]).astype(float)
         else:
             tempx=np.array(temp[::2]).astype(float)
             tempy=np.array(temp[1::2]).astype(float)
 
-        print(tempx, tempy)
+        if np.unique(tempx).shape[0]==1 or np.unique(tempy).shape[0]==1:
+            drop.append(i)
+            continue
+
+
         slope, intercept, r_value, p_value, std_err = stats.linregress(tempx, tempy)
-        print(p_value)
+
         #for x,y in zip(tempx, tempy):
         if any(np.isnan( [slope, intercept])):
             drop.append(i)
@@ -558,10 +581,7 @@ def writeFile(df, name, myProj=None):
         df: (pandas.DataFrame) the input dataframe
     """
 
-    # if file is not .csv, .txt, or .shp, return error
-    if path.endswith('.csv') or path.endswith('.txt') or path.endswith('.shp') or path.endswith('.geojson') or path.endswith('.json'):
-        raise ValueError("Invalid file type")
-
+
    # if ends with .csv or .txt, write as csv
     if name.endswith('.csv') or name.endswith('.txt'):
         df = writeToWKT(df, name, myProj=myProj)
@@ -574,7 +594,7 @@ def writeFile(df, name, myProj=None):
         elif name.endswith('.gpkg'):
             driver = 'GPKG'
         else:
-            raise ValueError("Invalid file type")
+            driver = 'GeoJSON'
 
         df = writetoGeoData(df, name, driver, myProj=myProj)